diff --git a/.coderabbit.yaml b/.coderabbit.yaml
new file mode 100644
index 0000000000..e596a5d26f
--- /dev/null
+++ b/.coderabbit.yaml
@@ -0,0 +1,101 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# yaml-language-server: $schema=https://coderabbit.ai/integrations/schema.v2.json
+# https://docs.coderabbit.ai/getting-started/configure-coderabbit/
+# Validator https://docs.coderabbit.ai/configuration/yaml-validator#yaml-validator
+# In PR, comment "@coderabbitai configuration" to get the full config including defaults
+# Set the language for reviews by using the corresponding ISO language code.
+# Default: "en-US"
+language: "en-US"
+# Settings related to reviews.
+# Default: {}
+reviews:
+  # Set the profile for reviews. Assertive profile yields more feedback, that may be considered nitpicky.
+  # Options: chill, assertive
+  # Default: "chill"
+  profile: chill
+  # Add this keyword in the PR/MR title to auto-generate the title.
+  # Default: "@coderabbitai"
+  auto_title_placeholder: '@coderabbitai title'
+  # Auto Title Instructions - Custom instructions for auto-generating the PR/MR title.
+  # Default: ""
+  auto_title_instructions: 'Format: "<category>: <title>". Category must be one of: feat, fix, docs, style, refactor, perf, test, build, ci, chore, revert, cp. The category must be followed by a colon. Title should be concise (<= 80 chars). Example: "feat: Add logit_bias support".' # current: ''
+  # Set the commit status to 'pending' when the review is in progress and 'success' when it is complete.
+  # Default: true
+  commit_status: false
+  # Generate walkthrough in a markdown collapsible section.
+  # Default: false
+  collapse_walkthrough: true
+  # Generate an assessment of how well the changes address the linked issues in the walkthrough.
+  # Default: true
+  assess_linked_issues: true
+  # Include possibly related issues in the walkthrough.
+  # Default: true
+  related_issues: true
+  # Related PRs - Include possibly related pull requests in the walkthrough.
+  # Default: true
+  related_prs: true
+  # Suggest labels based on the changes in the pull request in the walkthrough.
+  # Default: true
+  suggested_labels: true
+  # Suggest reviewers based on the changes in the pull request in the walkthrough.
+  # Default: true
+  suggested_reviewers: true
+  # Generate a poem in the walkthrough comment.
+  # Default: true
+  poem: false # current: true
+  # Post review details on each review. Additionally, post a review status when a review is skipped in certain cases.
+  # Default: true
+  review_status: false # current: true
+  # Configuration for pre merge checks
+  # Default: {}
+  pre_merge_checks:
+    # Custom Pre-merge Checks - Add unique checks to enforce your team's standards before merging a pull request. Each check must have a unique name (up to 50 characters) and clear instructions (up to 10000 characters). Use these to automatically verify coding, security, documentation, or business rules and maintain code quality.
+    # Default: []
+    custom_checks:
+      - name: "Test Results for Major Changes"
+        mode: "warning"  # or "error" to block merges
+        instructions: |
+          If this PR contains major changes (such as new features, breaking changes, or significant refactoring), verify that the PR description includes test results or testing information.
+          If a change could affect numerics or convergence, the PR description should include information demonstrating that there is no regression.
+          If a change could affect performance, the PR description should include before-and-after performance numbers, as well as the configuration and context in which they apply.
+          Pass if test results are documented or if the changes are minor.
+  auto_review:
+    # Configuration for auto review
+    # Default: {}
+    # Automatic Incremental Review - Automatic incremental code review on each push
+    # Default: true
+    auto_incremental_review: false # current: true
+    # Review draft PRs/MRs.
+    # Default: false
+    drafts: false
+    # Base branches (other than the default branch) to review. Accepts regex patterns. Use '.*' to match all branches.
+    # Default: []
+    base_branches: ["main", "r[0-9].*"] # current: []
+# Configuration for knowledge base
+# Default: {}
+knowledge_base:
+  code_guidelines:
+    # CodeRabbit will analyse and learn from your organization's code guidelines, which you can mention in the file patterns section. These guidelines will then be used to conduct thorough code reviews.
+    # Default: {}
+    enabled: true
+    # Enabled - Enable CodeRabbit to enforce your organization's coding standards during reviews.
+    # Default: true
+    filePatterns: # current: []
+      # File Patterns - Specify files for your coding guideline documents in this section. CodeRabbit will scan these files to understand your team's standards and apply them during code reviews. Multiple files supported. File names are case-sensitive. Common files like: (**/.cursorrules, .github/copilot-instructions.md, .github/instructions/*.instructions.md, **/CLAUDE.md, **/GEMINI.md, **/.cursor/rules/*, **/.windsurfrules, **/.clinerules/*, **/.rules/*, **/AGENT.md, **/AGENTS.md) are included by default.
+      # Default: []
+      - "**/CODING_GUIDELINES.md"
+      - "**/.cursor/rules/*"
diff --git a/.dockerignore b/.dockerignore
index a5aa48cb04..8e4e560ff5 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -1,6 +1,8 @@
 # Adding to .gitignore helps reduce the size of your working_dir
 
-.git
+# Note: removing .git from .dockerignore since it is valuable to have the git history to
+#       know where this container was built
+# .git
 *.out
 *.log
 *.tar
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
deleted file mode 100644
index bdb8407d00..0000000000
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ /dev/null
@@ -1,42 +0,0 @@
----
-name: Bug report
-about: Create a report to help us improve
-title: ''
-labels: bug
-assignees: ''
-
----
-
-**Describe the bug**
-
-A clear and concise description of what the bug is.
-
-**Steps/Code to reproduce bug**
-
-Please list *minimal* steps or code snippet for us to be able to reproduce the bug.
-
-A helpful guide on on how to craft a minimal bug report  http://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports. 
-
-
-**Expected behavior**
-
-A clear and concise description of what you expected to happen.
-
-**Environment overview (please complete the following information)**
-
- - Environment location: [Bare-metal, Docker, Cloud(specify cloud provider - AWS, Azure, GCP, Collab)]
- - Method of install: [pip install or from source]. Please specify exact commands you used to install.
- - If method of install is [Docker], provide `docker pull` & `docker run` commands used
-
-**Environment details**
-
-If NVIDIA docker image is used you don't need to specify these.
-Otherwise, please provide:
-- OS version
-- PyTorch version
-- Python version
-
-**Additional context**
-
-Add any other context about the problem here.
-Example: GPU model
diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
deleted file mode 100644
index 3a13116134..0000000000
--- a/.github/ISSUE_TEMPLATE/feature_request.md
+++ /dev/null
@@ -1,25 +0,0 @@
----
-name: Feature request
-about: Suggest an idea for this project
-title: ''
-labels: feature request
-assignees: ''
-
----
-
-**Is your feature request related to a problem? Please describe.**
-
-A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
-
-**Describe the solution you'd like**
-
-A clear and concise description of what you want to happen.
-Provide a code snippet on how new APIs/changes would be used by others.
-
-**Describe alternatives you've considered**
-
-A clear and concise description of any alternative solutions or features you've considered.
-
-**Additional context**
-
-Add any other context or screenshots about the feature request here
\ No newline at end of file
diff --git a/.github/RENOVATE_SETUP.md b/.github/RENOVATE_SETUP.md
new file mode 100644
index 0000000000..225360c93a
--- /dev/null
+++ b/.github/RENOVATE_SETUP.md
@@ -0,0 +1,180 @@
+# Renovate Setup Documentation
+
+This repository uses [Renovate](https://docs.renovatebot.com/) to automatically update dependencies, including git submodules and Python packages managed in `pyproject.toml`.
+
+## What Renovate Does
+
+Renovate automatically:
+1. **Updates git submodules** by tracking the configured branches
+2. **Updates a small allowlist of Python dependencies** in `pyproject.toml`:
+   - `vllm`, `torch`, and `ray` for the core training stack
+   - `transformer-engine` and `flash-attn` for xformers compatibility
+   - `transformers` so we can track upstream releases
+   - _Everything else is frozen unless explicitly requested._
+3. **Syncs `3rdparty/*/setup.py` files** with their corresponding submodule dependencies
+4. **Regenerates `uv.lock`** after dependency updates
+5. **Pre-clones git submodules with full history** so Renovate can checkout new commits (works around `shallow=true` in `.gitmodules`)
+6. **Creates a single PR** that automatically triggers the full CI pipeline (`cicd-main.yml`)
+
+## Setup Requirements
+
+You need to set up authentication for Renovate. Choose one of the following options:
+
+### Option 1: Personal Access Token (PAT) - Quick Start
+
+**This is the easiest way to get started:**
+
+1. Create a GitHub Personal Access Token (PAT):
+   - Go to GitHub Settings → Developer settings → Personal access tokens → Tokens (classic)
+   - Click "Generate new token (classic)"
+   - Give it a descriptive name (e.g., "Renovate Bot")
+   - Select scopes:
+     - ✅ `repo` (Full control of private repositories)
+     - ✅ `workflow` (Update GitHub Action workflows - required for github-actions manager)
+   - Click "Generate token" and copy it
+
+2. Add the token as a repository secret:
+   - Go to your repository → Settings → Secrets and variables → Actions
+   - Click "New repository secret"
+   - Name: `RENOVATE_TOKEN`
+   - Value: Paste your PAT
+   - Click "Add secret"
+
+3. You're done! The workflow will use the PAT automatically.
+
+### Option 2: GitHub App (Recommended for Organizations)
+
+**Better for rate limits and security, but requires more setup:**
+
+1. Create a GitHub App:
+   - Go to Organization Settings → Developer settings → GitHub Apps → New GitHub App
+   - Or use an existing Renovate GitHub App
+   
+2. Configure the app with these permissions:
+   - Repository permissions:
+     - Contents: Read & Write
+     - Pull requests: Read & Write
+     - Workflows: Read & Write (if using github-actions manager)
+     - Metadata: Read-only
+   
+3. Install the app on your repository
+
+4. Add these secrets to your repository:
+   - `RENOVATE_APP_ID`: The app ID (found on the app's settings page)
+   - `RENOVATE_APP_PRIVATE_KEY`: The app's private key (PEM format)
+
+5. The workflow will automatically detect and use the GitHub App token
+
+### 2. Grant Workflow Permissions
+
+Ensure the Renovate workflow has permission to:
+- Create and update pull requests
+- Read and write to the repository
+- Access secrets
+
+This can be configured in: `Settings` → `Actions` → `General` → `Workflow permissions`
+
+## Configuration Files
+
+### `.github/renovate.json`
+Main configuration file that defines:
+- Update schedule (daily during business hours PST)
+- Package grouping rules
+- Branch naming conventions
+- PR labels (`dependencies`, `CI:L2`)
+
+### `.github/workflows/renovate.yml`
+GitHub Actions workflow that:
+- Runs daily at 9 AM UTC (1 AM PST / 2 AM PDT)
+- Can be manually triggered with `workflow_dispatch`
+- Sets up the environment (Python, uv)
+- Executes Renovate with proper credentials
+
+### `.github/scripts/sync_submodule_dependencies.py`
+Python script that:
+- Reads dependencies from `3rdparty/*/pyproject.toml` files in submodules
+- Updates `CACHED_DEPENDENCIES` in corresponding `setup.py` files
+- Ensures consistency between submodule requirements and wrapper packages
+
+### `.github/scripts/renovate_post_update.sh`
+Bash script that runs after Renovate updates dependencies:
+1. Syncs submodule dependencies to setup.py files
+2. Runs `uv lock` to regenerate the lock file
+3. Stages changes for commit
+
+## Manual Workflow Trigger
+
+You can manually trigger Renovate at any time:
+
+1. Go to `Actions` → `Renovate` in GitHub
+2. Click `Run workflow`
+3. Optional parameters:
+   - **Log level**: Set to `debug` for verbose output
+   - **Dry run**: Enable to preview changes without creating PRs
+
+## Update Strategy
+
+Renovate now produces **one consolidated PR at a time**:
+
+| Branch prefix | Contents | Notes |
+|---------------|----------|-------|
+| `renovate/allowlist-…` | Git submodules, Docker/GitHub Action updates, and the allowlisted Python packages above | Runs on the configured weekday schedule; no other dependencies are touched until explicitly re-enabled. Renovate's built-in vulnerability PRs are disabled so everything funnels through this branch. |
+
+## Debug vs. Production Settings
+
+- `prHourlyLimit` is currently `0` **only while debugging** so Renovate can recreate PRs immediately. Set it back to `1` once we're satisfied with the configuration to avoid noisy PR bursts.
+- `prConcurrentLimit` stays at `1` to preserve the "one PR at a time" contract; raise it temporarily if you ever need parallel testing.
+
+## CI Integration
+
+When Renovate creates a PR:
+1. The PR is automatically labeled with `CI:L2` to trigger full CI testing
+2. `cicd-main.yml` runs the complete test suite
+3. All L2 tests must pass before the PR can be merged
+4. The lock file and setup.py changes are included in the PR
+
+## Troubleshooting
+
+### Renovate workflow fails
+- Check that secrets `RENOVATE_APP_ID` and `RENOVATE_APP_PRIVATE_KEY` are set
+- Verify the GitHub App is installed on the repository
+- Check workflow logs for specific error messages
+
+### Dependencies not syncing
+- Ensure submodules are properly initialized
+- Check `.github/scripts/sync_submodule_dependencies.py` logs
+- Verify that submodule `pyproject.toml` files exist and are valid
+
+### uv lock fails
+- Ensure `uv` version in workflow matches project requirements
+- Check for dependency conflicts in the update
+- Review the post-update script logs
+
+### PRs not triggering CI
+- Verify PR has the `CI:L2` label
+- Check `cicd-main.yml` configuration
+- Ensure PR is targeting the `main` branch
+
+## Customization
+
+To modify Renovate behavior:
+1. Edit `.github/renovate.json` for scheduling, grouping, or update rules
+2. Update `.github/workflows/renovate.yml` for workflow settings
+3. Modify `.github/scripts/renovate_post_update.sh` for custom post-update logic
+
+## Testing Changes
+
+Before committing Renovate config changes:
+1. Use the workflow's dry-run mode to test
+2. Check the Renovate logs for validation errors
+3. Test the post-update script locally:
+   ```bash
+   .github/scripts/renovate_post_update.sh
+   ```
+
+## References
+
+- [Renovate Documentation](https://docs.renovatebot.com/)
+- [Renovate Configuration Options](https://docs.renovatebot.com/configuration-options/)
+- [GitHub Action for Renovate](https://github.com/renovatebot/github-action)
+
diff --git a/.github/renovate.json b/.github/renovate.json
new file mode 100644
index 0000000000..5dc894617d
--- /dev/null
+++ b/.github/renovate.json
@@ -0,0 +1,119 @@
+{
+  "$schema": "https://docs.renovatebot.com/renovate-schema.json",
+  "extends": [
+    "config:best-practices",
+    ":dependencyDashboard"
+  ],
+  "timezone": "America/Los_Angeles",
+  "cloneSubmodules": true,
+  "customEnvVariables": {
+    "NRL_AUTO_SYNC_DEPS": "1"
+  },
+  "prConcurrentLimit": 1,
+  "prHourlyLimit": 0,
+  "semanticCommits": "disabled",
+  "commitMessagePrefix": "[Renovate]",
+  "branchPrefix": "renovate/",
+  "labels": [
+    "dependencies",
+    "CI:L2"
+  ],
+  "assignees": [],
+  "reviewers": [],
+  "postUpgradeTasks": {
+    "commands": [
+      ".github/scripts/renovate_post_update.sh"
+    ],
+    "fileFilters": [
+      "uv.lock",
+      "3rdparty/**/setup.py"
+    ],
+    "executionMode": "update"
+  },
+  "packageRules": [
+    {
+      "description": "Disable Python dependency updates unless explicitly allowlisted",
+      "matchManagers": [
+        "pep621"
+      ],
+      "enabled": false
+    },
+    {
+      "description": "Allowlisted training stack dependencies (from initial requirements)",
+      "matchManagers": [
+        "pep621"
+      ],
+      "matchPackageNames": [
+        "vllm",
+        "transformer-engine",
+        "flash-attn"
+      ],
+      "enabled": true
+    },
+    {
+      "description": "Disable Python version updates (pyenv, github-actions python)",
+      "matchManagers": [
+        "pyenv"
+      ],
+      "enabled": false
+    },
+    {
+      "description": "Disable Python runtime version updates in GitHub Actions",
+      "matchPackageNames": [
+        "python"
+      ],
+      "matchManagers": [
+        "github-actions"
+      ],
+      "enabled": false
+    },
+    {
+      "description": "Disable alternate Python managers that duplicate pep621 coverage",
+      "matchManagers": [
+        "pip_setup",
+        "poetry",
+        "pixi",
+        "uv"
+      ],
+      "enabled": false
+    },
+    {
+      "description": "Disable Docker base image updates by default (enable manually when needed)",
+      "matchManagers": [
+        "dockerfile"
+      ],
+      "enabled": false
+    },
+    {
+      "description": "Group every allowed update into a single PR",
+      "matchPackagePatterns": [
+        "*"
+      ],
+      "groupName": "dependency updates",
+      "additionalBranchPrefix": "deps-",
+      "separateMajorMinor": false,
+      "separateMultipleMajor": false
+    }
+  ],
+  "git-submodules": {
+    "enabled": true
+  },
+  "pep621": {
+    "enabled": true,
+    "fileMatch": ["(^|/)pyproject\\.toml$"],
+    "updateLockFiles": false
+  },
+  "lockFileMaintenance": {
+    "enabled": false
+  },
+  "vulnerabilityAlerts": {
+    "enabled": false
+  },
+  "osvVulnerabilityAlerts": false,
+  "rangeStrategy": "bump",
+  "platformAutomerge": false,
+  "rebaseWhen": "behind-base-branch",
+  "recreateWhen": "always",
+  "forkProcessing": "enabled",
+  "includeForks": true
+}
diff --git a/.github/scripts/renovate_cmd.sh b/.github/scripts/renovate_cmd.sh
new file mode 100644
index 0000000000..d4eff6f1a0
--- /dev/null
+++ b/.github/scripts/renovate_cmd.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+# Custom Renovate command that configures git safe.directory before running renovate
+# This is needed because the pre-cloned repo is owned by a different user than the container user
+
+# Mark all directories as safe (required for pre-cloned repos with different ownership)
+git config --global --add safe.directory '*'
+
+# Enable auto-sync of CACHED_DEPENDENCIES in 3rdparty setup.py files
+# This allows submodule updates to proceed - the setup.py will use submodule deps directly
+export NRL_AUTO_SYNC_DEPS=1
+
+# Run the actual renovate command
+exec renovate "$@"
+
diff --git a/.github/scripts/renovate_post_update.sh b/.github/scripts/renovate_post_update.sh
new file mode 100755
index 0000000000..b9882fc3e7
--- /dev/null
+++ b/.github/scripts/renovate_post_update.sh
@@ -0,0 +1,77 @@
+#!/bin/bash
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO_ROOT="$(realpath "$SCRIPT_DIR/../..")"
+
+echo "==================================="
+echo "Renovate Post-Update Script"
+echo "==================================="
+echo ""
+
+cd "$REPO_ROOT"
+
+# Step 1: Sync submodule dependencies to setup.py files
+echo "Step 1: Syncing submodule dependencies..."
+python3 "$SCRIPT_DIR/sync_submodule_dependencies.py"
+if [ $? -ne 0 ]; then
+    echo "ERROR: Failed to sync submodule dependencies"
+    exit 1
+fi
+echo ""
+
+# Step 2: Run uv lock to regenerate lock file
+echo "Step 2: Running uv lock..."
+
+# Install uv if not available (needed when running inside Renovate's Docker container)
+if ! command -v uv &> /dev/null; then
+    echo "uv not found, installing..."
+    curl -LsSf https://astral.sh/uv/install.sh | sh
+    export PATH="$HOME/.local/bin:$PATH"
+fi
+
+if ! command -v uv &> /dev/null; then
+    echo "ERROR: uv is not installed or not in PATH after installation attempt"
+    exit 1
+fi
+
+# Run uv lock to regenerate the lock file
+# Note: Workspace members are defined in pyproject.toml [tool.uv.workspace].members
+# Some members point to submodule paths (e.g., Automodel-workspace/Automodel), not the parent dir
+uv lock
+if [ $? -ne 0 ]; then
+    echo "ERROR: uv lock failed"
+    exit 1
+fi
+echo ""
+
+# Step 3: Stage all changes for commit
+echo "Step 3: Staging changes..."
+git add -A 3rdparty/*/setup.py uv.lock
+if [ $? -ne 0 ]; then
+    echo "WARNING: Failed to stage files with git add"
+    # Don't exit, as Renovate might handle git operations differently
+fi
+echo ""
+
+echo "==================================="
+echo "Post-update completed successfully"
+echo "==================================="
+echo ""
+echo "Changed files:"
+git diff --cached --name-only || git status --short || echo "(git status unavailable)"
+
diff --git a/.github/scripts/sync_submodule_dependencies.py b/.github/scripts/sync_submodule_dependencies.py
new file mode 100755
index 0000000000..3902c7f0c1
--- /dev/null
+++ b/.github/scripts/sync_submodule_dependencies.py
@@ -0,0 +1,225 @@
+#!/usr/bin/env python3
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Sync CACHED_DEPENDENCIES in 3rdparty/*/setup.py files from their corresponding submodule pyproject.toml files.
+
+This script reads dependencies from submodule pyproject.toml files and updates the CACHED_DEPENDENCIES
+list in the wrapper setup.py files to keep them in sync.
+"""
+
+import re
+import sys
+import tomllib
+from pathlib import Path
+from typing import List, Tuple
+
+
+def get_repo_root() -> Path:
+    """Get the repository root directory."""
+    script_path = Path(__file__).resolve()
+    # Script is in .github/scripts/, so go up 2 levels
+    return script_path.parent.parent.parent
+
+
+def read_dependencies_from_pyproject(pyproject_path: Path) -> List[str]:
+    """Read dependencies from a pyproject.toml file."""
+    if not pyproject_path.exists():
+        raise FileNotFoundError(f"pyproject.toml not found at {pyproject_path}")
+    
+    with open(pyproject_path, "rb") as f:
+        data = tomllib.load(f)
+    
+    if "project" not in data or "dependencies" not in data["project"]:
+        raise ValueError(f"No [project].dependencies found in {pyproject_path}")
+    
+    return [str(dep).strip() for dep in data["project"]["dependencies"]]
+
+
+def find_list_end(content: str, start_pos: int) -> int:
+    """
+    Find the position of the closing bracket for a Python list.
+    
+    Args:
+        content: The file content
+        start_pos: Position right after the opening '['
+        
+    Returns:
+        Position of the matching closing ']'
+    """
+    bracket_count = 1
+    i = start_pos
+    in_string = False
+    string_char = None
+    
+    while i < len(content) and bracket_count > 0:
+        char = content[i]
+        
+        # Handle string literals (skip brackets inside strings)
+        if not in_string:
+            if char in '"\'':
+                in_string = True
+                string_char = char
+            elif char == '[':
+                bracket_count += 1
+            elif char == ']':
+                bracket_count -= 1
+        else:
+            # Check for escape sequences
+            if char == '\\' and i + 1 < len(content):
+                i += 1  # Skip the escaped character
+            elif char == string_char:
+                in_string = False
+                string_char = None
+        
+        i += 1
+    
+    if bracket_count != 0:
+        raise ValueError("Unmatched brackets in file")
+    
+    return i - 1  # Position of the closing ']'
+
+
+def update_cached_dependencies(setup_py_path: Path, new_dependencies: List[str]) -> bool:
+    """
+    Update CACHED_DEPENDENCIES list in a setup.py file.
+    
+    Returns True if changes were made, False otherwise.
+    """
+    if not setup_py_path.exists():
+        raise FileNotFoundError(f"setup.py not found at {setup_py_path}")
+    
+    content = setup_py_path.read_text()
+    
+    # Find the start of CACHED_DEPENDENCIES list
+    pattern = r'CACHED_DEPENDENCIES\s*=\s*\['
+    match = re.search(pattern, content)
+    
+    if not match:
+        raise ValueError(f"CACHED_DEPENDENCIES not found in {setup_py_path}")
+    
+    # Find the actual end of the list using bracket counting
+    # This handles nested brackets like those in "megatron-core[dev,mlm]"
+    list_start = match.end()  # Position right after the '['
+    list_end = find_list_end(content, list_start)  # Position of the closing ']'
+    
+    # Build new dependencies list with proper formatting
+    indent = "    "
+    formatted_deps = []
+    for dep in new_dependencies:
+        formatted_deps.append(f'{indent}"{dep}",')
+    
+    new_deps_str = "\n" + "\n".join(formatted_deps) + "\n"
+    
+    # Replace the content between brackets (keeping the brackets themselves)
+    new_content = content[:list_start] + new_deps_str + content[list_end:]
+    
+    # Check if content changed
+    if new_content == content:
+        return False
+    
+    setup_py_path.write_text(new_content)
+    return True
+
+
+def sync_megatron_bridge() -> Tuple[bool, str]:
+    """Sync Megatron-Bridge dependencies."""
+    repo_root = get_repo_root()
+    pyproject_path = repo_root / "3rdparty" / "Megatron-Bridge-workspace" / "Megatron-Bridge" / "pyproject.toml"
+    setup_py_path = repo_root / "3rdparty" / "Megatron-Bridge-workspace" / "setup.py"
+    
+    try:
+        dependencies = read_dependencies_from_pyproject(pyproject_path)
+        changed = update_cached_dependencies(setup_py_path, dependencies)
+        status = "updated" if changed else "unchanged"
+        return True, f"Megatron-Bridge: {status}"
+    except Exception as e:
+        return False, f"Megatron-Bridge: ERROR - {e}"
+
+
+def sync_penguin() -> Tuple[bool, str]:
+    """Sync Penguin dependencies."""
+    repo_root = get_repo_root()
+    
+    # Penguin submodule is at Penguin-workspace/Penguin/pyproject.toml
+    # but the directory structure shows it doesn't exist yet, so we check for it
+    penguin_pyproject = repo_root / "3rdparty" / "Penguin-workspace" / "Penguin" / "pyproject.toml"
+    setup_py_path = repo_root / "3rdparty" / "Penguin-workspace" / "setup.py"
+    
+    # Check if Penguin submodule exists
+    if not penguin_pyproject.exists():
+        return True, "Penguin: skipped (submodule not initialized)"
+    
+    try:
+        dependencies = read_dependencies_from_pyproject(penguin_pyproject)
+        changed = update_cached_dependencies(setup_py_path, dependencies)
+        status = "updated" if changed else "unchanged"
+        return True, f"Penguin: {status}"
+    except Exception as e:
+        return False, f"Penguin: ERROR - {e}"
+
+
+def sync_megatron_lm() -> Tuple[bool, str]:
+    """
+    Sync Megatron-LM dependencies.
+    
+    Note: Megatron-LM has hardcoded requirements in setup.py, but we should verify
+    they match the submodule's requirements files.
+    """
+    repo_root = get_repo_root()
+    
+    # Megatron-LM doesn't have a simple pyproject.toml with dependencies in the same format
+    # It has requirements files in various places. For now, we'll just report status.
+    megatron_lm_dir = repo_root / "3rdparty" / "Megatron-LM-workspace" / "Megatron-LM"
+    
+    if not megatron_lm_dir.exists():
+        return True, "Megatron-LM: skipped (submodule not initialized)"
+    
+    # The setup.py for Megatron-LM has hardcoded dependencies that are manually curated
+    # from requirements files. We'll just report it as a manual check.
+    return True, "Megatron-LM: manual check required (uses hardcoded requirements)"
+
+
+def main():
+    """Main function to sync all submodule dependencies."""
+    print("Syncing submodule dependencies to 3rdparty setup.py files...")
+    print()
+    
+    results = []
+    all_success = True
+    
+    # Sync each submodule
+    for sync_func in [sync_megatron_bridge, sync_penguin, sync_megatron_lm]:
+        success, message = sync_func()
+        results.append(message)
+        if not success:
+            all_success = False
+    
+    # Print results
+    for result in results:
+        print(f"  {result}")
+    
+    print()
+    if all_success:
+        print("✓ All submodule dependencies synced successfully")
+        return 0
+    else:
+        print("✗ Some submodule dependencies failed to sync")
+        return 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())
+
diff --git a/.github/workflows/_run_test.yml b/.github/workflows/_run_test.yml
deleted file mode 100644
index 4670905052..0000000000
--- a/.github/workflows/_run_test.yml
+++ /dev/null
@@ -1,160 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-name: ~test template
-
-on:
-  workflow_call:
-    inputs:
-      RUNNER:
-        type: string
-        description: Runner to use for test
-        required: true
-      TIMEOUT:
-        type: number
-        description: Max runtime of test in minutes
-        required: false
-        default: 10
-      UNIT_TEST_SCRIPT:
-        type: string
-        description: Unit test script to execute
-        required: true
-      DOC_TEST_SCRIPT:
-        type: string
-        description: Doc test script to execute
-        required: true
-      FUNCTIONAL_TEST_SCRIPT:
-        type: string
-        description: Functional test script to execute
-        required: true
-      AFTER_SCRIPT:
-        type: string
-        description: Script to run after main test in container
-        required: false
-        default: ":"
-      FINAL_SCRIPT_EXTERNAL:
-        type: string
-        description: Script to run after SCRIPT and AFTER_SCRIPT, but outside container (useful for logging)
-        required: false
-        default: ":"
-      IS_OPTIONAL:
-        type: boolean
-        description: Failure will cancel all other tests if set to true
-        required: false
-        default: false
-    secrets:
-      HF_TOKEN:
-        required: true
-jobs:
-  main:
-    runs-on: ${{ inputs.RUNNER }}
-    timeout-minutes: ${{ inputs.TIMEOUT }}
-    env:
-      HF_TOKEN: ${{ secrets.HF_TOKEN }}
-    steps:
-        - name: Docker system cleanup
-          run: |
-            docker system prune -a --filter "until=48h" --force || true
-
-        - name: Docker pull image
-          run: |
-            docker pull nemoci.azurecr.io/nemo_rl_container:${{ github.run_id }}
-
-        - name: Checkout repository
-          uses: actions/checkout@v4
-          with:
-            submodules: 'recursive'
-
-        - name: Setup docker run command
-          run: |
-            GITHUB_ACTION_DIR=$(dirname $GITHUB_STEP_SUMMARY)
-            DOCKER_RUN_CMD="docker run --rm -u root --runtime=nvidia --gpus all \
-            --shm-size=64g \
-            --env TRANSFORMERS_OFFLINE=0 \
-            --env HYDRA_FULL_ERROR=1 \
-            --env HF_HOME=/home/TestData/nemo-rl/hf_home \
-            --env HF_DATASETS_CACHE=/home/TestData/nemo-rl/hf_datasets_cache \
-            --env NEMO_RL_REPO_DIR=/opt/nemo-rl \
-            --env HF_TOKEN \
-            --volume $GITHUB_WORKSPACE:/opt/nemo-rl \
-            --volume $GITHUB_ACTION_DIR:$GITHUB_ACTION_DIR \
-            --volume /mnt/datadrive/TestData/nemo-rl/datasets:/opt/nemo-rl/datasets:ro \
-            --volume /mnt/datadrive/TestData/nemo-rl/checkpoints:/home/TestData/nemo-rl/checkpoints:ro \
-            --volume /mnt/datadrive/TestData/nemo-rl/hf_home/hub:/home/TestData/nemo-rl/hf_home/hub \
-            --volume /mnt/datadrive/TestData/nemo-rl/hf_datasets_cache:/home/TestData/nemo-rl/hf_datasets_cache \
-            nemoci.azurecr.io/nemo_rl_container:${{ github.run_id }}"
-            echo "DOCKER_RUN_CMD=$DOCKER_RUN_CMD" >> $GITHUB_ENV
-
-        - name: Run unit tests
-          run: |
-            $DOCKER_RUN_CMD bash -eux -o pipefail -c "
-            git config --global --add safe.directory /opt/nemo-rl
-            # This is needed since we create virtualenvs in the workspace, so this allows it to be cleaned up if necessary
-            umask 000
-            ${{ inputs.UNIT_TEST_SCRIPT }}"
-
-        - name: Run doc tests
-          run: |
-            $DOCKER_RUN_CMD bash -eux -o pipefail -c "
-            git config --global --add safe.directory /opt/nemo-rl
-            # This is needed since we create virtualenvs in the workspace, so this allows it to be cleaned up if necessary
-            umask 000
-            ${{ inputs.DOC_TEST_SCRIPT }}"
-
-        - name: Run functional tests
-          run: |
-            $DOCKER_RUN_CMD bash -eux -o pipefail -c "
-            git config --global --add safe.directory /opt/nemo-rl
-            # This is needed since we create virtualenvs in the workspace, so this allows it to be cleaned up if necessary
-            umask 000
-            ${{ inputs.FUNCTIONAL_TEST_SCRIPT }}"
-
-        - name: Upload coverage report
-          uses: actions/upload-artifact@v4
-          with:
-            name: unit-test-coverage-report
-            path: ${{ github.workspace }}/tests/coverage.json
-            retention-days: 7
-
-        - uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
-          if: failure()
-
-        - name: after_script
-          if: always() && inputs.AFTER_SCRIPT != ':'
-          run: |
-            # Run the after script
-            cmd=$(cat <<"RUN_TEST_EOF"
-            ${{ inputs.AFTER_SCRIPT }}
-            RUN_TEST_EOF
-            )
-            GITHUB_ACTION_DIR=$(dirname $GITHUB_STEP_SUMMARY)
-            docker run --rm \
-            --env GITHUB_STEP_SUMMARY \
-            --volume $GITHUB_ACTION_DIR:$GITHUB_ACTION_DIR \
-            --volume $GITHUB_WORKSPACE:/opt/nemo-rl \
-            nemoci.azurecr.io/nemo_rl_container:${{ github.run_id }} bash -eux -o pipefail -c "$cmd"
-
-        - name: final_script_external
-          if: always() && inputs.FINAL_SCRIPT_EXTERNAL != ':'
-          run: |
-            cmd=$(cat <<"RUN_TEST_EOF"
-            ${{ inputs.FINAL_SCRIPT_EXTERNAL }}
-            RUN_TEST_EOF
-            )
-            bash -eux -o pipefail -c "$cmd"
-
-        - name: Container shutdown
-          if: always()
-          run: |
-            # Ensure any added files in the mounted directory are owned by the runner user to allow it to clean up
-            docker run --rm nemoci.azurecr.io/nemo_rl_container:${{ github.run_id }} bash -c "find /opt/nemo-rl -path '/opt/nemo-rl/datasets' -prune -o -exec chown $(id -u):$(id -g) {} +"
diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
deleted file mode 100644
index c26162f6e3..0000000000
--- a/.github/workflows/cicd-main.yml
+++ /dev/null
@@ -1,332 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-name: "CICD NeMo RL"
-
-on:
-  pull_request:
-    branches:
-      - "main"
-      - "r**"
-    types: [labeled, opened, synchronize, reopened]
-  merge_group:
-    types: [checks_requested]
-  workflow_dispatch:
-    inputs:
-      test_to_run:
-        required: false
-        default: L2
-        type: choice
-        options:
-          - docs
-          - L0
-          - L1
-          - L2
-        description: Test level to run. docs = doc tests only, L0 = unit/docs/lint, L1 = L0 + functional, L2 = L1 + convergence
-  # TODO: Due to limited compute, disabling pushes to main. This is okay to do since we force PRs to be up to date and the CI tests on pull/$PR_NUM/merge
-  #push:
-  #  branches:
-  #    - 'main'
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-${{ github.event.label.name || 'main' }}
-  cancel-in-progress: true
-
-jobs:
-  pre-flight:
-    runs-on: ubuntu-latest
-    outputs:
-      test_level: ${{ steps.evaluate.outputs.test_level }}
-    steps:
-      - name: Get changed files
-        id: changed-files
-        if: github.event_name == 'pull_request'
-        uses: step-security/changed-files@v45.0.1
-        with:
-          files_yaml: |
-            doc:
-              - '**.md'
-              - docs/**
-            src:
-              - '!**.md'
-              - '!docs/**'
-
-      - name: Evaluate conditions
-        id: evaluate
-        env:
-          DOCS_ONLY: ${{ steps.changed-files.outputs.doc_any_changed == 'true' && steps.changed-files.outputs.src_any_changed == 'false' }}
-          CHANGED_DOCS: ${{ steps.changed-files.outputs.doc_all_changed_files }}
-          CHANGED_SRC: ${{ steps.changed-files.outputs.src_all_changed_files }}
-          IS_PULLREQUEST: ${{ github.event_name == 'pull_request' }}
-          LABEL: ${{ github.event.label.name }}
-          MERGE_GROUP: ${{ github.event_name == 'merge_group' }}
-        run: |
-          # Some output that's helpful for debugging
-          echo "Docs changed: $CHANGED_DOCS"
-          echo "Src changed: $CHANGED_SRC"
-          echo "LABEL: $LABEL"
-          echo "IS_PULLREQUEST: $IS_PULLREQUEST"
-          echo "DOCS_ONLY: $DOCS_ONLY"
-
-          # Run CI only (on main or if label is attached) and if it's not only docs
-          # Determine test level based on conditions
-          if [[ "$DOCS_ONLY" == "true" || "$LABEL" == "CI:docs" ]]; then
-            # For doc-only changes, run only doc tests
-            TEST_LEVEL="docs"
-          elif [[ "$LABEL" == "CI:L0" ]]; then
-            TEST_LEVEL="L0"
-          elif [[ "$LABEL" == "CI:L1" || "$IS_PULLREQUEST" == "false" || "$MERGE_GROUP" == "true"  ]]; then
-            # For labeled PRs, pushes to main (IS_PULL_REQUEST=false), or merge group events, run L1 by default
-            TEST_LEVEL="L1"
-          elif [[ "$LABEL" == "CI:L2" ]]; then
-            TEST_LEVEL="L2"
-          else
-            # Skip tests by default for non-labeled PRs
-            TEST_LEVEL="none"
-          fi
-
-          # Override test level if specified in workflow_dispatch
-          if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
-            echo "Overriding test level from $TEST_LEVEL to ${{ inputs.test_to_run }}"
-            TEST_LEVEL="${{ inputs.test_to_run }}"
-          fi
-
-          echo "test_level=$TEST_LEVEL" | tee -a "$GITHUB_OUTPUT"
-
-  submodule-check:
-    name: Check submodule fast-forward
-    needs: [pre-flight]
-    if: github.event_name == 'pull_request'
-    uses: ./.github/workflows/_submodule_check.yml
-    with:
-      base_ref: ${{ github.base_ref }}
-      head_ref: ${{ github.head_ref }}
-      pr_number: ${{ github.event.number }}
-      head_sha: ${{ github.event.pull_request.head.sha }}
-
-  lint-check:
-    name: Lint check
-    needs: [pre-flight]
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-        with:
-          submodules: 'recursive'
-      - name: Install uv
-        uses: astral-sh/setup-uv@v5
-        with:
-          version: "0.7.2"
-          enable-cache: true
-          prune-cache: false
-      # Faster than uv python install since it caches python alongside runner
-      - name: "Set up Python"
-        uses: actions/setup-python@v5
-        with:
-          python-version-file: ".python-version"
-      - name: Check lint
-        run: |
-          uv venv
-          uv run --group dev pre-commit install
-          uv run --group dev pre-commit run --all-files --show-diff-on-failure --color=always
-      # TODO: this is a temporary check and should be removed once we have 100% correctness
-      - name: Check if any files with zero errors not in whitelist
-        run: |
-          missing_count=0
-          for file in $(uv run --group dev pyrefly check $(git ls-files 'nemo_rl/**/*.py' 'examples/**/*.py' 'docs/*.py' 'tools/**/*.py') --output-format json | jq -r --slurpfile all_files <(git ls-files 'nemo_rl/**/*.py' 'examples/**/*.py' 'docs/*.py' 'tools/**/*.py' | jq -R -s 'split("\n")[:-1]') --arg pwd "$(pwd)/" '(.errors | group_by(.path) | map({(.[0].path | sub($pwd; "")): length}) | add // {}) as $error_counts | $all_files[0][] | . as $file | if ($error_counts[$file] // 0) == 0 then $file else empty end'); do
-            if ! fgrep -q "$file" pyrefly.toml; then
-              echo "File $file has zero errors but is not in pyrefly.toml in the 'project-includes' list. Please add it to this whitelist."
-              ((missing_count++))
-            fi
-          done
-          
-          exit $missing_count
-      - name: Minimize uv cache
-        run: uv cache prune --ci
-
-  sphinx-build:
-    if: ${{ needs.pre-flight.outputs.test_level != 'none' }}
-    name: Sphinx build
-    needs: [pre-flight]
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-        with:
-          submodules: 'recursive'
-      - name: Install uv
-        uses: astral-sh/setup-uv@v5
-        with:
-          version: "0.7.2"
-          enable-cache: true
-          prune-cache: false
-      # Faster than uv python install since it caches python alongside runner
-      - name: "Set up Python"
-        uses: actions/setup-python@v5
-        with:
-          python-version-file: ".python-version"
-      - name: build docs
-        run: |
-          uv venv
-          cd docs/
-          uv run --group docs sphinx-build --fail-on-warning --builder html . _build/html
-      - name: Minimize uv cache
-        run: uv cache prune --ci
-
-  build-container:
-    if: ${{ needs.pre-flight.outputs.test_level != 'none' }}
-    needs: [pre-flight]
-    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_build_container.yml@v0.30.0
-    with:
-      build-ref: ${{ github.sha }}
-      image-name: nemo_rl_container
-      dockerfile: docker/Dockerfile
-      image-label: nemo-rl
-      target: hermetic
-      build-args: |
-        MAX_JOBS=32
-        NEMO_RL_COMMIT=${{ github.sha }}
-
-  tests:
-    name: Tests
-    needs: [build-container, pre-flight]
-    uses: ./.github/workflows/_run_test.yml
-    if: ${{ needs.pre-flight.outputs.test_level != 'none' }}
-    with:
-      RUNNER: self-hosted-azure
-      TIMEOUT: 180
-      UNIT_TEST_SCRIPT: |
-        cd /opt/nemo-rl
-        if [[ "${{ needs.pre-flight.outputs.test_level }}" =~ ^(L0|L1|L2)$ ]]; then
-          uv run --no-sync bash -x ./tests/run_unit.sh --cov=nemo_rl --hf-gated
-          uv run --extra mcore bash -x ./tests/run_unit.sh --cov=nemo_rl --cov-append --cov-report=term-missing --cov-report=json --hf-gated --mcore-only
-        else
-          echo Skipping unit tests for docs-only level
-        fi
-      DOC_TEST_SCRIPT: |
-        cd /opt/nemo-rl/docs
-        if [[ "${{ needs.pre-flight.outputs.test_level }}" =~ ^(docs|L0|L1|L2)$ ]]; then
-          uv run --no-sync sphinx-build -b doctest . _build/doctest
-        else
-          echo Skipping doc tests for level ${{ needs.pre-flight.outputs.test_level }}
-        fi
-      FUNCTIONAL_TEST_SCRIPT: |
-        cd /opt/nemo-rl
-        if [[ "${{ needs.pre-flight.outputs.test_level }}" =~ ^(L1|L2)$ ]]; then
-          time uv run --no-sync bash ./tests/functional/sft.sh
-          time uv run --no-sync bash ./tests/functional/grpo.sh
-          time uv run --no-sync bash ./tests/functional/grpo_megatron.sh
-          time uv run --no-sync bash ./tests/functional/grpo_multiturn.sh
-          time uv run --no-sync bash ./tests/functional/grpo_non_colocated.sh
-          time uv run --no-sync bash ./tests/functional/dpo.sh
-          time uv run --no-sync bash ./tests/functional/eval.sh
-          time uv run --no-sync bash ./tests/functional/eval_async.sh
-          time uv run --no-sync bash ./tests/functional/test_mcore_extra_installed_correctly.sh
-        else
-          echo Skipping functional tests for level ${{ needs.pre-flight.outputs.test_level }}
-        fi
-      # TODO: enable once we have convergence tests in CI
-      #CONVERGENCE_TEST_SCRIPT: |
-      #  cd /opt/nemo-rl
-      #  if [[ "${{ needs.pre-flight.outputs.test_level }}" =~ ^(L2)$  ]]; then
-      #    echo "Running convergence tests"
-      #    # Add your convergence test commands here
-      #    # uv run --no-sync bash ./tests/convergence/test.sh
-      #  else
-      #    echo "Skipping convergence tests for level ${{ needs.pre-flight.outputs.test_level }}"
-      #  fi
-      AFTER_SCRIPT: |
-        cd /opt/nemo-rl
-        cat <<EOF | tee -a $GITHUB_STEP_SUMMARY
-        # Test Summary for level: ${{ needs.pre-flight.outputs.test_level }}
-
-        ## Unit test results
-        \`\`\`json
-        $(if [[ "${{ needs.pre-flight.outputs.test_level }}" =~ ^(L0|L1|L2)$ ]]; then cat tests/unit/unit_results.json || echo "n/a"; else echo "Not run"; fi)
-        \`\`\`
-
-        ## Test Level: ${{ needs.pre-flight.outputs.test_level }}
-        EOF
-    secrets:
-      HF_TOKEN: ${{ secrets.HF_TOKEN }}
-
-  coverage:
-    name: Upload coverage to Codecov
-    needs: [tests]
-    if: ${{ contains('L0 L1 L2', needs.pre-flight.outputs.test_level) }}
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-
-      - name: Download coverage artifact
-        uses: actions/download-artifact@v4
-        with:
-          name: unit-test-coverage-report
-          path: .
-
-      - name: Upload to Codecov
-        uses: codecov/codecov-action@v4
-        with:
-          token: ${{ secrets.CODECOV_TOKEN }}
-          file: ./coverage.json
-          flags: unit-tests
-          verbose: true
-          override_branch: ${{ github.event_name == 'merge_group' && 'main' || '' }}
-
-  CI_QA_Gate:
-    name: CI quality check
-    if: always()
-    runs-on: ubuntu-latest
-    needs:
-      - pre-flight
-      - lint-check
-      - sphinx-build
-      - tests
-    steps:
-      - name: main
-        env:
-          JOB_RESULTS: ${{ toJSON(needs) }}
-          # Job is considered successful if nothing was run, or if all jobs were successful (the tests run even if only docs were run b/c doctests are selected)
-          ALL_SUCCESS: >-
-            ${{
-              needs.lint-check.result == 'success' &&
-              (
-                needs.pre-flight.outputs.test_level == 'none' ||
-                (
-                  needs.pre-flight.outputs.test_level != 'none' &&
-                  needs.sphinx-build.result == 'success' &&
-                  needs.tests.result == 'success'
-                )
-              )
-            }}
-          CI_SKIP: ${{ github.event.label.name == 'Skip CICD' }}
-          TEST_LEVEL: ${{ needs.pre-flight.outputs.test_level }}
-        run: |
-          SUMMARY=$(echo $JOB_RESULTS | jq 'to_entries[] | .key + ": " + .value.result' | tr -d '"')
-          echo '🤖: CICD Result for test level: ${{ needs.pre-flight.outputs.test_level }}' >> $GITHUB_STEP_SUMMARY
-          echo "$SUMMARY" >> $GITHUB_STEP_SUMMARY
-
-          if [[ "$TEST_LEVEL" == "none" ]]; then
-            echo "No tests were run, passing gate" >> $GITHUB_STEP_SUMMARY
-            exit 0
-          fi
-
-          test "$ALL_SUCCESS" = "true" || test "$CI_SKIP" = "true"
-
-  DCO_merge_group:
-    name: DCO
-    if: github.event_name == 'merge_group'
-    runs-on: ubuntu-latest
-    steps:
-      - run: echo "The real DCO check happens on PRs only. This is a placeholder for the merge queue to keep the DCO check as a required status check."
diff --git a/.github/workflows/renovate.yml b/.github/workflows/renovate.yml
new file mode 100644
index 0000000000..6d51c0a94c
--- /dev/null
+++ b/.github/workflows/renovate.yml
@@ -0,0 +1,160 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: Renovate
+
+on:
+  schedule:
+    # Run daily at 5 PM UTC (9 AM PST / 10 AM PDT) to align with Renovate schedule
+    - cron: '0 17 * * *'
+  workflow_dispatch:
+    inputs:
+      log_level:
+        description: 'Renovate log level'
+        required: false
+        default: 'info'
+        type: choice
+        options:
+          - debug
+          - info
+          - warn
+          - error
+      update_docker:
+        description: 'Include Docker base image updates (normally disabled)'
+        required: false
+        default: false
+        type: boolean
+      force_run:
+        description: 'Force run ignoring schedule restrictions (always create PRs)'
+        required: false
+        default: true
+        type: boolean
+
+jobs:
+  renovate:
+    name: Renovate
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
+        with:
+          submodules: 'recursive'
+
+      - name: Set up Python
+        uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6
+        with:
+          python-version: '3.14.3'
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@37802adc94f370d6bfd71619e3f0bf239e1f3b78 # v7
+        with:
+          version: "0.11.1"
+          enable-cache: true
+          prune-cache: false
+
+      - name: Make scripts executable
+        run: |
+          chmod +x .github/scripts/sync_submodule_dependencies.py
+          chmod +x .github/scripts/renovate_post_update.sh
+          chmod +x .github/scripts/renovate_cmd.sh
+
+      - name: Configure Renovate options
+        id: config
+        run: |
+          # Enable Docker updates if requested (modify config temporarily)
+          if [ "${{ inputs.update_docker }}" = "true" ]; then
+            echo "Enabling Docker base image updates..."
+            jq '(.packageRules[] | select(.matchManagers == ["dockerfile"])).enabled = true' \
+              .github/renovate.json > .github/renovate.json.tmp && \
+              mv .github/renovate.json.tmp .github/renovate.json
+          fi
+          
+          # Build RENOVATE_FORCE JSON if force_run is enabled
+          if [ "${{ inputs.force_run }}" = "true" ]; then
+            echo "Force run enabled - ignoring schedule restrictions"
+            echo 'force_json={"schedule":[]}' >> $GITHUB_OUTPUT
+          else
+            echo 'force_json=' >> $GITHUB_OUTPUT
+          fi
+
+      - name: Pre-clone repository with full submodule history
+        env:
+          RENOVATE_TOKEN: ${{ secrets.RENOVATE_TOKEN }}
+        run: |
+          # Pre-clone the repo to where Renovate expects it, with unshallowed submodules
+          # Renovate will reuse this clone instead of cloning fresh
+          CLONE_DIR="/tmp/renovate/repos/github/${{ github.repository_owner }}/${{ github.event.repository.name }}"
+          mkdir -p "$(dirname "$CLONE_DIR")"
+          
+          echo "Cloning to $CLONE_DIR with full submodule history..."
+          git clone --recurse-submodules \
+            "https://x-access-token:${RENOVATE_TOKEN}@github.com/${{ github.repository }}.git" \
+            "$CLONE_DIR"
+          
+          cd "$CLONE_DIR"
+          
+          # Unshallow all submodules (they were cloned shallow due to .gitmodules setting)
+          echo "Unshallowing submodules..."
+          git submodule foreach --recursive 'git fetch --unshallow origin 2>/dev/null || echo "Already unshallow or no remote"'
+          
+          echo ""
+          echo "Submodule status:"
+          git submodule status --recursive
+          
+          # Copy the renovate command wrapper to /tmp so it's accessible in the container
+          # This wrapper sets git safe.directory before running renovate
+          # Use GITHUB_WORKSPACE since we've cd'd into the pre-cloned directory
+          cp "${GITHUB_WORKSPACE}/.github/scripts/renovate_cmd.sh" /tmp/renovate_cmd.sh
+          chmod +x /tmp/renovate_cmd.sh
+          
+          # Fix permissions for Renovate container (runs as uid 1000)
+          # Make everything world-writable so the container user can access it
+          chmod -R 777 /tmp/renovate
+
+      # Option 1: Use GitHub App token (recommended for better rate limits)
+      # Requires RENOVATE_APP_ID and RENOVATE_APP_PRIVATE_KEY secrets
+      # If these secrets aren't set, this step will be skipped and PAT will be used
+      - name: Get GitHub App token
+        id: get-app-token
+        continue-on-error: true
+        uses: actions/create-github-app-token@f8d387b68d61c58ab83c6c016672934102569859 # v3
+        with:
+          app-id: ${{ secrets.RENOVATE_APP_ID }}
+          private-key: ${{ secrets.RENOVATE_APP_PRIVATE_KEY }}
+
+      - name: Run Renovate
+        uses: renovatebot/github-action@68a3ea99af6ad249940b5a9fdf44fc6d7f14378b # v46.1.6
+        with:
+          configurationFile: .github/renovate.json
+          # Use GitHub App token if available, otherwise fall back to PAT
+          token: ${{ steps.get-app-token.outputs.token || secrets.RENOVATE_TOKEN }}
+          # Use custom command that sets git safe.directory before running renovate
+          # This avoids "dubious ownership" errors with the pre-cloned repo
+          docker-cmd-file: /tmp/renovate_cmd.sh
+          # Pass NRL_AUTO_SYNC_DEPS to container (extends default env-regex)
+          env-regex: '^(?:RENOVATE_\w+|LOG_LEVEL|GITHUB_COM_TOKEN|NODE_OPTIONS|(?:HTTPS?|NO)_PROXY|(?:https?|no)_proxy|NRL_AUTO_SYNC_DEPS)$'
+        env:
+          LOG_LEVEL: ${{ inputs.log_level || 'info' }}
+          # Explicitly set which repository to process
+          RENOVATE_REPOSITORIES: ${{ github.repository }}
+          # Global-only configuration options
+          RENOVATE_ONBOARDING: 'false'
+          RENOVATE_REQUIRE_CONFIG: 'optional'
+          RENOVATE_ALLOWED_POST_UPGRADE_COMMANDS: '["^\\.github/scripts/renovate_post_update\\.sh$"]'
+          # Enable auto-sync of CACHED_DEPENDENCIES in 3rdparty setup.py files
+          # This allows submodule updates to proceed without manual intervention
+          NRL_AUTO_SYNC_DEPS: '1'
+          # Force run ignoring schedule (when triggered manually with force_run=true)
+          RENOVATE_FORCE: ${{ steps.config.outputs.force_json }}
+
diff --git a/.gitignore b/.gitignore
index 954db7041d..5d5611d1c2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,6 @@
 # Adding to .gitignore helps reduce the size of your working_dir
 
-.git
+/.git
 *.out
 *.log
 *.tar
@@ -17,26 +17,31 @@ dist/
 *.vscode/
 release_run*
 ckpts/
-3rdparty/vllm
 
 # Test
 coverage.json
 .coverage*
+unit_results.json
+unit_results/
 test_assets/
+.nrl_remote_map.json
+.nrl_remote_state.json
 
 # Cache
 uv_cache/
 hf_home/
 hf_datasets_cache/
 *logs/
-datasets/
+/datasets/
 docker/*
 !docker/Dockerfile
+!docker/Dockerfile.ngc_pytorch
 !docker/README.md
 wandb/
 checkpoints/
 results/
-code_snapshots/
+code_snapshots*/
+.cache/
 
 # Runtime env
 *runtime_env.yaml
diff --git a/.gitmodules b/.gitmodules
index 2a588f3a89..3586f684df 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,10 +1,15 @@
-[submodule "3rdparty/NeMo"]
-	path = 3rdparty/NeMo-workspace/NeMo
-	url = https://github.com/NVIDIA/NeMo.git
-	branch = ashors/nemorl-qwen3
-	shallow = true
 [submodule "3rdparty/Megatron-LM"]
 	path = 3rdparty/Megatron-LM-workspace/Megatron-LM
 	url = https://github.com/terrykong/Megatron-LM.git
-	branch = sahilj/megatron-external-loss-norm
+	branch = yuya/nemo-rl-use-dev
+	shallow = true
+[submodule "3rdparty/Megatron-Bridge"]
+	path = 3rdparty/Megatron-Bridge-workspace/Megatron-Bridge
+	url = https://github.com/NVIDIA-NeMo/Megatron-Bridge.git
+	branch = main
+	shallow = true
+[submodule "3rdparty/Automodel-workspace/Automodel"]
+	path = 3rdparty/Automodel-workspace/Automodel
+	url = https://github.com/NVIDIA-NeMo/Automodel.git
+	branch = main
 	shallow = true
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index d22fdd475a..de1e14c5f7 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -3,11 +3,11 @@ repos:
     rev: v4.4.0
     hooks:
     - id: end-of-file-fixer
-      # only include python files
-      files: \.py$
+      types_or: [python, pyi] # Only include Python files.
     - id: trailing-whitespace
-      # only include python files
-      files: \.py$
+      types_or: [python, pyi] # Only include Python files.
+    - id: pretty-format-json
+      args: ["--autofix", "--no-sort-keys", "--indent", "2"]
 
   - repo: https://github.com/astral-sh/ruff-pre-commit
     rev: "v0.9.9" # Use the appropriate version
@@ -18,6 +18,12 @@ repos:
       args: ["check", "--select", "I", "--fix"]
     - id: ruff-format
 
+  - repo: https://github.com/ComPWA/mirrors-taplo
+    rev: v0.9.3
+    hooks:
+    - id: taplo-format
+      name: Format TOML files with taplo
+
   - repo: local
     hooks:
       - id: no-underscore-md
@@ -36,8 +42,51 @@ repos:
         exclude: '^\.github/'
         types: [file]
 
-  - repo: https://github.com/facebook/pyrefly
-    rev: 0.24.2
+  - repo: local
     hooks:
       - id: pyrefly-typecheck
-        files: \.py$
\ No newline at end of file
+        name: pyrefly check
+        entry: uv run --group dev pyrefly check
+        types_or: [python, pyi]
+        language: system
+        pass_filenames: false # Pyrefly reads config & project roots itself.
+        args: []
+        require_serial: true
+        additional_dependencies: []
+        minimum_pre_commit_version: "2.9.2"
+
+  # This pre-commit hook ensures that the config file is minimized and reflects exactly what you
+  # intend to merge. Without it, you might run experiments with one config, but when merging upstream,
+  # the config could silently fall back to the base defaults—resulting in different hyperparameters.
+  #
+  # For example, we’ve seen cases where an SFT recipe runs without a custom chat_template. When merged,
+  # it unexpectedly picks up the default recommended chat_template from upstream, which doesn’t match
+  # the original experiment setup.
+  #
+  # If this check is disruptive, you can disable the pre-commit hook locally. However, before a recipe
+  # is accepted upstream, we expect the config to be minimized.
+  - repo: local
+    hooks:
+      - id: configs-minimize-check-llm
+        name: minimize-check llm recipes
+        language: system
+        pass_filenames: false
+        entry: bash
+        args:
+          - -lc
+          - |
+            set -euo pipefail
+            base="examples/configs/dpo.yaml"; for f in examples/configs/recipes/llm/dpo-*.yaml; do [ -e "$f" ] && ./tools/config_cli.py minimize-check "$base" "$f"; done
+            base="examples/configs/grpo_math_1B.yaml"; for f in examples/configs/recipes/llm/grpo-*.yaml; do [ -e "$f" ] && ./tools/config_cli.py minimize-check "$base" "$f"; done
+            base="examples/configs/sft.yaml"; for f in examples/configs/recipes/llm/sft-*.yaml; do [ -e "$f" ] && ./tools/config_cli.py minimize-check "$base" "$f"; done
+            base="examples/configs/distillation_math.yaml"; for f in examples/configs/recipes/llm/distillation-*.yaml; do [ -e "$f" ] && ./tools/config_cli.py minimize-check "$base" "$f"; done
+      - id: configs-minimize-check-vlm
+        name: minimize-check vlm recipes
+        language: system
+        pass_filenames: false
+        entry: bash
+        args:
+          - -lc
+          - |
+            set -euo pipefail
+            base="examples/configs/vlm_grpo_3B.yaml"; for f in examples/configs/recipes/vlm/vlm_grpo-*.yaml; do [ -e "$f" ] && ./tools/config_cli.py minimize-check "$base" "$f"; done
diff --git a/3rdparty/Automodel-workspace/Automodel b/3rdparty/Automodel-workspace/Automodel
new file mode 160000
index 0000000000..f09ff4c0f2
--- /dev/null
+++ b/3rdparty/Automodel-workspace/Automodel
@@ -0,0 +1 @@
+Subproject commit f09ff4c0f2fd0d5dd0a562a2514340a2e4985747
diff --git a/3rdparty/Megatron-Bridge-workspace/Megatron-Bridge b/3rdparty/Megatron-Bridge-workspace/Megatron-Bridge
new file mode 160000
index 0000000000..4da8a6d936
--- /dev/null
+++ b/3rdparty/Megatron-Bridge-workspace/Megatron-Bridge
@@ -0,0 +1 @@
+Subproject commit 4da8a6d936edb7ae7c82755b192a50029d56f991
diff --git a/nemo_rl/converters/huggingface/vllm_export.py b/3rdparty/Megatron-Bridge-workspace/is_megatron_bridge_installed.py
similarity index 78%
rename from nemo_rl/converters/huggingface/vllm_export.py
rename to 3rdparty/Megatron-Bridge-workspace/is_megatron_bridge_installed.py
index 341a77c5bc..b2ae4cf651 100644
--- a/nemo_rl/converters/huggingface/vllm_export.py
+++ b/3rdparty/Megatron-Bridge-workspace/is_megatron_bridge_installed.py
@@ -11,3 +11,11 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+try:
+    from megatron.bridge import AutoBridge  # noqa: F401
+
+    INSTALLED = True
+except Exception:
+    INSTALLED = False
+
+print(f"Megatron Bridge {INSTALLED=}")
diff --git a/3rdparty/Megatron-Bridge-workspace/pyproject.toml b/3rdparty/Megatron-Bridge-workspace/pyproject.toml
new file mode 100644
index 0000000000..783532c660
--- /dev/null
+++ b/3rdparty/Megatron-Bridge-workspace/pyproject.toml
@@ -0,0 +1,10 @@
+[build-system]
+requires = ["setuptools>=61.0", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "megatron-bridge"
+dynamic = ["dependencies", "version"]
+authors = [{ name = "NVIDIA", email = "nemo-toolkit@nvidia.com" }]
+description = "Standalone packaging for the Megatron Bridge sub-module."
+requires-python = ">=3.10"
diff --git a/3rdparty/Megatron-Bridge-workspace/setup.py b/3rdparty/Megatron-Bridge-workspace/setup.py
new file mode 100644
index 0000000000..2890846f6f
--- /dev/null
+++ b/3rdparty/Megatron-Bridge-workspace/setup.py
@@ -0,0 +1,136 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import sys
+import tomllib
+
+import setuptools
+
+# Conditional packaging mirroring NeMo and Megatron-LM workspaces
+final_packages = []
+final_package_dir = {}
+
+# If the submodule is present, expose `megatron.bridge` package from the checkout
+bridge_src_dir = "Megatron-Bridge/src/megatron/bridge"
+bridge_package_name = "megatron.bridge"
+
+CACHED_DEPENDENCIES = [
+    "transformers>=5.0.0,<=5.3.0",
+    "peft>=0.18.1",
+    "datasets>=2.20.0",
+    "accelerate",
+    "diffusers>=0.36.0",
+    "peft>=0.18.0",
+    "einops",
+    "imageio",
+    "imageio-ffmpeg",
+    "omegaconf>=2.3.0",
+    "tensorboard>=2.19.0",
+    "typing-extensions",
+    "rich",
+    "wandb>=0.25.0",
+    "six>=1.17.0",
+    "regex>=2024.11.6",
+    "pyyaml>=6.0.2",
+    "tqdm>=4.67.1",
+    "hydra-core>1.3,<=1.3.2",
+    "megatron-core[dev,mlm]",
+    "qwen-vl-utils",
+    "transformer-engine[pytorch,core_cu13]",
+    "mamba-ssm",
+    "nvidia-resiliency-ext~=0.5.0",
+    "causal-conv1d",
+    "flash-linear-attention",
+    "timm",
+    "open-clip-torch>=3.2.0",
+    "mlflow>=3.5.0",
+    "comet-ml>=3.50.0",
+    "torch>=2.6.0",
+]
+
+# If the bridge source exists, compare cached dependencies with the submodule's pyproject
+if os.path.exists(bridge_src_dir):
+    pyproject_path = os.path.join("Megatron-Bridge", "pyproject.toml")
+    if not os.path.exists(pyproject_path):
+        raise FileNotFoundError(
+            f"[megatron-bridge][setup] {pyproject_path} not found; skipping dependency consistency check."
+        )
+
+    with open(pyproject_path, "rb") as f:
+        data = tomllib.load(f)
+    project = data["project"]
+    deps_list = project["dependencies"]
+    submodule_deps = set(str(d).strip() for d in deps_list)
+
+    missing_in_cached = submodule_deps - set(CACHED_DEPENDENCIES)
+    extra_in_cached = set(CACHED_DEPENDENCIES) - submodule_deps
+
+    if missing_in_cached or extra_in_cached:
+        print(
+            "[megatron-bridge][setup] Dependency mismatch between Megatron-Bridge-workspace/Megatron-Bridge/pyproject.toml vs Megatron-Bridge-workspace/setup.py::CACHED_DEPENDENCIES.",
+            file=sys.stderr,
+        )
+        if missing_in_cached:
+            print(
+                "  - Present in Megatron-Bridge/pyproject.toml but missing from CACHED_DEPENDENCIES:",
+                file=sys.stderr,
+            )
+            for dep in sorted(missing_in_cached):
+                print(f"    * {dep}", file=sys.stderr)
+        if extra_in_cached:
+            print(
+                "  - Present in CACHED_DEPENDENCIES but not in Megatron-Bridge/pyproject.toml:",
+                file=sys.stderr,
+            )
+            for dep in sorted(extra_in_cached):
+                print(f"    * {dep}", file=sys.stderr)
+
+        # Check for auto-sync mode (used by Renovate to allow submodule updates)
+        # Set NRL_AUTO_SYNC_DEPS=1 to auto-sync instead of erroring
+        auto_sync = os.environ.get("NRL_AUTO_SYNC_DEPS", "").lower() in ("1", "true", "yes")
+
+        if auto_sync:
+            CACHED_DEPENDENCIES.clear()
+            CACHED_DEPENDENCIES.extend(deps_list)
+            print(
+                "[megatron-bridge][setup] Auto-synced CACHED_DEPENDENCIES from submodule (NRL_AUTO_SYNC_DEPS=1)",
+                file=sys.stderr,
+            )
+        else:
+            print(
+                "  Please update CACHED_DEPENDENCIES or the submodule pyproject to keep them in sync.",
+                file=sys.stderr,
+            )
+            sys.exit(1)
+    else:
+        print(
+            "[megatron-bridge][setup] Dependency sets are consistent with the submodule pyproject.",
+            file=sys.stderr,
+        )
+
+if os.path.exists(bridge_src_dir):
+    final_packages.append(bridge_package_name)
+    final_package_dir[bridge_package_name] = bridge_src_dir
+
+setuptools.setup(
+    name="megatron-bridge",
+    version="0.0.0",
+    description="Standalone packaging for the Megatron Bridge sub-module.",
+    author="NVIDIA",
+    author_email="nemo-toolkit@nvidia.com",
+    packages=final_packages,
+    package_dir=final_package_dir,
+    py_modules=["is_megatron_bridge_installed"],
+    install_requires=CACHED_DEPENDENCIES,
+)
diff --git a/3rdparty/Megatron-LM-workspace/Megatron-LM b/3rdparty/Megatron-LM-workspace/Megatron-LM
index 2ff0f099ff..9fc9377109 160000
--- a/3rdparty/Megatron-LM-workspace/Megatron-LM
+++ b/3rdparty/Megatron-LM-workspace/Megatron-LM
@@ -1 +1 @@
-Subproject commit 2ff0f099ffc30ffd152e3e29e921a1609d00855c
+Subproject commit 9fc9377109abd18b2a97f897e0a3565ad40a1a66
diff --git a/3rdparty/Megatron-LM-workspace/pyproject.toml b/3rdparty/Megatron-LM-workspace/pyproject.toml
index 77f09f838a..4537293a9d 100644
--- a/3rdparty/Megatron-LM-workspace/pyproject.toml
+++ b/3rdparty/Megatron-LM-workspace/pyproject.toml
@@ -1,10 +1,7 @@
 # Copyright (c) 2023, NVIDIA CORPORATION.  All rights reserved.
 
 [build-system]
-requires = [
-    "setuptools",
-    "pybind11",
-]
+requires = ["setuptools", "pybind11"]
 build-backend = "setuptools.build_meta"
 
 [project]
@@ -12,4 +9,4 @@ name = "megatron-core"
 dynamic = ["dependencies", "version"]
 description = "Megatron Core - a library for efficient and scalable training of transformer based models"
 authors = [{ name = "NVIDIA", email = "nemo-toolkit@nvidia.com" }]
-maintainers = [{ name = "NVIDIA", email = "nemo-toolkit@nvidia.com" }]
\ No newline at end of file
+maintainers = [{ name = "NVIDIA", email = "nemo-toolkit@nvidia.com" }]
diff --git a/3rdparty/NeMo-workspace/NeMo b/3rdparty/NeMo-workspace/NeMo
deleted file mode 160000
index 33259f2540..0000000000
--- a/3rdparty/NeMo-workspace/NeMo
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 33259f2540af6eef375d43fc48bdcbd7ec490c29
diff --git a/3rdparty/NeMo-workspace/setup.py b/3rdparty/NeMo-workspace/setup.py
deleted file mode 100644
index 6bc940202c..0000000000
--- a/3rdparty/NeMo-workspace/setup.py
+++ /dev/null
@@ -1,55 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os
-
-import setuptools
-
-# --- Configuration Start ---
-final_packages = []
-final_package_dir = {}
-
-# --- nemo package conditional section ---
-nemo_package_source_dir = "NeMo/nemo"
-nemo_package_name = "nemo"
-
-if os.path.exists(nemo_package_source_dir):
-    final_packages.append(nemo_package_name)
-    final_package_dir[nemo_package_name] = nemo_package_source_dir
-# --- End of nemo package conditional section ---
-
-setuptools.setup(
-    name="nemo-tron",  # Must match [project].name in pyproject.toml
-    version="0.0.0",  # Must match [project].version in pyproject.toml
-    description="Standalone packaging for the NeMo Tron sub-module.",  # Can be sourced from pyproject.toml too
-    author="NVIDIA",
-    author_email="nemo-toolkit@nvidia.com",
-    packages=final_packages,
-    package_dir=final_package_dir,
-    py_modules=["is_nemo_installed"],
-    install_requires=[
-        "lightning",
-        "wget",
-        "onnx",
-        "fiddle",
-        "cloudpickle",
-        "braceexpand",
-        "webdataset",
-        "h5py",
-        "ijson",
-        "matplotlib",
-        "scikit-learn",
-        "nemo-run",
-        "hatchling",
-    ],
-)
diff --git a/nemo_rl/metrics/metrics_utils.py b/3rdparty/Penguin-workspace/is_penguin_installed.py
similarity index 80%
rename from nemo_rl/metrics/metrics_utils.py
rename to 3rdparty/Penguin-workspace/is_penguin_installed.py
index 341a77c5bc..56563cf447 100644
--- a/nemo_rl/metrics/metrics_utils.py
+++ b/3rdparty/Penguin-workspace/is_penguin_installed.py
@@ -11,3 +11,11 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+try:
+    from penguin import config_types  # noqa: F401
+
+    INSTALLED = True
+except Exception:
+    INSTALLED = False
+
+print(f"PENGUIN {INSTALLED=}")
diff --git a/3rdparty/NeMo-workspace/pyproject.toml b/3rdparty/Penguin-workspace/pyproject.toml
similarity index 60%
rename from 3rdparty/NeMo-workspace/pyproject.toml
rename to 3rdparty/Penguin-workspace/pyproject.toml
index 3eb6af1c86..62b135e012 100644
--- a/3rdparty/NeMo-workspace/pyproject.toml
+++ b/3rdparty/Penguin-workspace/pyproject.toml
@@ -1,10 +1,10 @@
 [build-system]
 requires = ["setuptools>=61.0", "wheel"]
+build-backend = "setuptools.build_meta"
 
 [project]
-name = "nemo-tron"
+name = "penguin"
 dynamic = ["dependencies", "version"]
 authors = [{ name = "NVIDIA", email = "nemo-toolkit@nvidia.com" }]
-description = "Standalone packaging for the NeMo Tron sub-module."
+description = "Standalone packaging for the Penguin sub-module."
 requires-python = ">=3.10"
-# Dependencies will be managed in setup.py
diff --git a/3rdparty/Penguin-workspace/setup.py b/3rdparty/Penguin-workspace/setup.py
new file mode 100644
index 0000000000..cd61a73035
--- /dev/null
+++ b/3rdparty/Penguin-workspace/setup.py
@@ -0,0 +1,111 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import sys
+import tomllib
+from pathlib import Path
+
+import setuptools
+
+final_packages = []
+final_package_dir = {}
+
+# If the submodule is present, expose `penguin` package from the checkout
+src_dir = Path("Penguin")
+
+
+CACHED_DEPENDENCIES = [
+    "openai<=2.6.1",
+    "tqdm",
+    "pydantic",
+    "pydantic_core",
+    "devtools",
+    "fastapi",
+    "uvicorn",
+    "uvloop",
+    "hydra-core",
+    "omegaconf",
+    "gradio",
+    "mlflow",
+    "tdigest>=0.5.2.2",
+    "aiohttp",
+    "yappi",
+    "ray[default]",
+]
+
+if src_dir.exists():
+    pyproject_toml_path = src_dir / "pyproject.toml"
+    with pyproject_toml_path.open("rb") as f:
+        pyproject_toml = tomllib.load(f)
+    if not pyproject_toml_path.exists():
+        raise FileNotFoundError(
+            f"[Penguin][setup] {pyproject_toml_path} not found; skipping dependency consistency check."
+        )
+
+    packages = pyproject_toml["tool"]["setuptools"]["packages"]["find"]["include"]
+
+    for package in packages:
+        final_packages.append(package)
+        final_package_dir[package] = src_dir / package
+
+    actual_dependencies = pyproject_toml["project"]["dependencies"]
+
+    ########################################
+    # Compare cached dependencies with the submodule's pyproject
+    ########################################
+
+    missing_in_cached = set(actual_dependencies) - set(CACHED_DEPENDENCIES)
+    extra_in_cached = set(CACHED_DEPENDENCIES) - set(actual_dependencies)
+
+    if missing_in_cached or extra_in_cached:
+        print(
+            "[Penguin][setup] Dependency mismatch between Penguin-workspace/Penguin/pyproject.toml vs Penguin-workspace/setup.py::CACHED_DEPENDENCIES.",
+            file=sys.stderr,
+        )
+        if missing_in_cached:
+            print(
+                "  - Present in Penguin-workspace/Penguin/pyproject.toml but missing from CACHED_DEPENDENCIES:",
+                file=sys.stderr,
+            )
+            for dep in sorted(missing_in_cached):
+                print(f"    * {dep}", file=sys.stderr)
+        if extra_in_cached:
+            print(
+                "  - Present in CACHED_DEPENDENCIES but not in Penguin-workspace/Penguin/pyproject.toml:",
+                file=sys.stderr,
+            )
+            for dep in sorted(extra_in_cached):
+                print(f"    * {dep}", file=sys.stderr)
+        print(
+            "  Please update CACHED_DEPENDENCIES or the submodule pyproject to keep them in sync.",
+            file=sys.stderr,
+        )
+        sys.exit(1)
+    else:
+        print(
+            "[Penguin][setup] Dependency sets are consistent with the submodule pyproject.",
+            file=sys.stderr,
+        )
+
+
+setuptools.setup(
+    name="penguin",
+    version="0.0.0",
+    description="Standalone packaging for the Penguin sub-module.",
+    author="NVIDIA",
+    author_email="nemo-toolkit@nvidia.com",
+    packages=final_packages,
+    package_dir=final_package_dir,
+    py_modules=["is_penguin_installed"],
+    install_requires=CACHED_DEPENDENCIES,
+)
diff --git a/CODING_GUIDELINES.md b/CODING_GUIDELINES.md
new file mode 100644
index 0000000000..3149579501
--- /dev/null
+++ b/CODING_GUIDELINES.md
@@ -0,0 +1,292 @@
+
+# NeMo-RL Coding Guidelines
+
+Note: This repository is Python-first. Prefer the Python guidelines in this document.
+
+## Style Guides We Follow
+
+- Python: [Google Python Style Guide](https://google.github.io/styleguide/pyguide.html)
+- Shell: [Google Shell Style Guide](https://google.github.io/styleguide/shellguide.html)
+
+## uv Guidelines
+
+### Use uv run instead of python
+
+Use `uv run` to execute scripts, rather than activating a virtual environment and calling `python` directly.
+
+Don't:
+
+```bash
+source .venv/bin/activate
+python examples/run_grpo_math.py
+```
+
+Do:
+
+```bash
+uv run examples/run_grpo_math.py
+```
+
+Exception: `Dockerfile.ngc_pytorch` is exempt from this rule.
+
+## Python Coding Guidelines
+### Python Standard
+1. The code developed for NeMo RL should conform to Python 3.12+.
+
+### Indentation
+1. Indent code with 4 spaces. Do not use tabs.
+
+### Naming
+
+#### Identifier Format
+1. Files
+- snake_case: `some_file.py`
+
+2. Classes
+- PascalCase: `class SomeClass`
+
+3. Functions and Methods
+- snake_case: `def my_awesome_function():`
+
+4. Local Variables
+- snake_case: `my_variable = ...`
+- prefix `k` for variable names that start with a number: `k_99th_percentile = ...`
+
+5. Global Variables
+- upper snake_case and prefix `G`: `G_MY_GLOBAL = ...`
+
+6. Constants
+- upper snake_case: `MY_CONSTANT = ...`
+
+#### Identifier Guidelines
+1. Avoid shadowing variables declared in an outer scope.
+2. Initialize all externally visible memberes of a class in the constructor.
+
+### Comments
+
+1. For interfaces that may be used outside a file, prefer docstrings over comments.
+2. Comments should be reserved for code within a function, or interfaces that are local to a file.
+3. If a piece of code is commented out, there should be a comment around that piece of code describing it's usage and why it's commented out. Otherwise that is a debug comment and it should be removed before merging
+
+### Docstring Syntax
+#### Classes and Functions
+Use the [Google style](https://google.github.io/styleguide/pyguide.html), which can be parsed by Sphinx.
+
+### Avoid Reflection
+Avoid using reflection when functionality can be easily achieved without reflection.
+
+For example, instead of:
+
+```python
+def make_complex(*args):
+    x, y = args
+    return dict(**locals())
+```
+
+Do:
+
+```python
+def make_complex(x, y):
+    return {'x': x, 'y': y}
+```
+
+### Error Handling
+1. When using try-except blocks, limit the except to the smallest set of errors possible.
+
+For example, instead of:
+
+```python
+try:
+    open(path, "r").read()
+except:
+    print("Failed to open file")
+```
+
+Do:
+
+```python
+try:
+    open(path, "r").read()
+except FileNotFoundError:
+    print("Failed to open file")
+```
+
+
+2. When using try-except blocks to handle multiple possible variable types (i.e. duck-typing), keep the body of the try as small as possible, using the else block to implement the logic.
+
+For example, instead of:
+
+```python
+try:
+    f.seek(0)
+    f.read()
+except AttributeError:
+    ... # Not a file-like object, do something else
+```
+
+Do:
+
+```python
+try:
+    f.seek # Do not call to minimize chance of unrelated failure
+except AttributeError:
+    ... # Not a file-like object, do something else
+else:
+    f.seek(0)
+    f.read()
+```
+
+### Configuration Defaults
+
+- **YAML is the single source of truth for defaults.** Do not set non-`None` defaults in the code for configuration values. The loaded YAML (and any user overrides) must supply required values.
+- **Access config directly and expect presence.** For required attributes, write code like `policy_cfg["precision"]` and assume it is present. Do not introduce hidden defaults deep in the code (e.g., defaulting `policy.precision` to `"bfloat16"`).
+- **Express optionality via `TypedDict`.** Use `typing.NotRequired` to mark optional attributes. Optional attributes may be absent/`None`; code may check for their presence.
+- **Where defaults live.** Exemplar configs under `examples/configs/*.yaml` include documented defaults. Recipe YAMLs under `examples/configs/recipes/**/*.yaml` are runnable snapshots and may omit documentation.
+- **Additions must be documented.** When adding a new config key to a `TypedDict` subclass, document the key’s purpose, valid values/types, and recommended default (if applicable), and reflect the default in the exemplar YAMLs under `examples/configs/*.yaml`.
+- **Rationale.** Centralizing defaults in YAML avoids surprising behavior and makes value provenance clear.
+
+Forbidden patterns:
+
+```python
+# Hidden default in code
+precision = policy_cfg.get("precision", "bfloat16")
+
+# Function parameter defaulting a config value
+def build_policy(policy_cfg, precision: str = "bfloat16"):
+    ...
+```
+
+Preferred patterns:
+
+```python
+# Required attribute: expect it to come from YAML or user override
+precision: str = policy_cfg["precision"]
+
+# Optional attribute: check for presence
+if "milestones" in scheduler_cfg:
+    configure_milestones(scheduler_cfg["milestones"])
+```
+
+See also: [TypedDict and Configuration Defaults](docs/design-docs/design-and-philosophy.md#typeddict-and-configuration-defaults).
+
+## Doc Guidelines
+
+### Ensure docs/index.md is up to date
+
+When a new markdown doc is added under `docs/**/*.md` or a markdown file is renamed, ensure that `docs/index.md` is updated and the document appears in the most appropriate section.
+
+## Tests
+
+### Coverage and Ray Actors
+
+- For any source file under `nemo_rl/*.py` that defines a class or function decorated with `@ray.remote`, add a coverage pragma because these run in separate Ray processes and are not reliably tracked by coverage.
+- Place `# pragma: no cover` on the `class` or `def` line (and on any remote functions), for example:
+
+```python
+import ray
+
+@ray.remote  # pragma: no cover
+class RolloutActor:
+    def run(self) -> None:
+        ...
+
+@ray.remote  # pragma: no cover
+def remote_eval(batch):
+    ...
+```
+
+### Nightly Tests for New Model Support
+
+When adding support for a new model, add a corresponding nightly test consisting of:
+
+1) Recipe YAML under `examples/configs/recipes/`
+- Place the YAML in the appropriate domain subdirectory (e.g., `examples/configs/recipes/llm/` or `examples/configs/recipes/vlm/`).
+- Name it following our recipe naming rules (see below). The YAML filename should mirror the driver script name but with `.yaml`.
+
+2) Driver script under `tests/test_suites/`
+- Create a shell script in the matching domain (e.g., `tests/test_suites/llm/` or `tests/test_suites/vlm/`).
+- The script should source any common environment (e.g., `common.env`) and invoke the training entrypoint with `uv run ... --config <path-to-yaml>` as appropriate.
+- Match the driver script filename to the YAML base name, with `.sh`.
+
+3) Add to nightly list
+- Append the driver script path (relative to `tests/test_suites/`) to `tests/test_suites/nightly.txt`.
+
+### Recipe Naming Rules (YAML and Driver Scripts)
+
+Base pattern (LLM):
+
+```
+<algo>-<model>-<nodes>n<gpus>g-<strategy-and-params>[-modifiers][-long][.vN].(yaml|sh)
+```
+
+- **algo**: task or algorithm, e.g., `sft`, `dpo`, `grpo`.
+- **model**: model identifier, e.g., `llama3.1-8b-instruct`, `qwen2.5-7b-instruct`.
+- **nodes/gpus**: cluster allocation, e.g., `1n8g`, `4n8g`, `8n8g`.
+- **strategy-and-params**: parallelism or framework detail, e.g., `fsdp2tp1`, `tp4pp2`, `megatron`, `dtensor2tp1`.
+- **modifiers** (optional): short flags like `sp` (sequence packing), `actckpt` (activation checkpointing), `fp8`, `noncolocated`, `quick`.
+- **-long** (optional): indicates long-running recipe.
+- **.vN** (optional): version suffix (e.g., `.v2`, `.v3`) reserved for convergence-impacting changes.
+
+Examples (from current tree):
+
+```
+sft-llama3.1-8b-1n8g-fsdp2tp1-long.yaml
+dpo-llama3.1-8b-instruct-4n8g-fsdp2tp4.yaml
+grpo-llama3.1-8b-instruct-1n8g-megatron-fp8.yaml
+grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.v3.yaml
+```
+
+VLM pattern:
+
+```
+vlm_<algo>-<model>-<nodes>n<gpus>g-<strategy>[-modifiers][.vN].(yaml|sh)
+```
+
+Examples:
+
+```
+vlm_grpo-qwen2.5-vl-3b-instruct-clevr-1n2g-dtensor2tp1.v1.yaml
+vlm_grpo-smolvlm2-2.2b-instruct-clevr-1n2g-dtensor2tp1.v1.sh
+```
+
+Known exceptions currently present:
+- Deepscaler recipes encode context length in place of the cluster tuple, e.g., `grpo-deepscaler-1.5b-8K.(yaml|sh)`. These are allowed but should document the intended hardware in the script body.
+- Some recipes include additional short flags in the strategy token (e.g., `fsdp2tp8sp`). Treat these as modifiers appended to the strategy.
+
+Directory placement:
+
+```
+examples/configs/recipes/
+  llm/
+    <name>.yaml
+  vlm/
+    <name>.yaml
+
+tests/test_suites/
+  llm/
+    common.env
+    <name>.sh
+  vlm/
+    common.env
+    <name>.sh
+  nightly.txt
+```
+
+## NVIDIA Copyright
+
+1. In NeMo-RL, add the following NVIDIA copyright header to all Python files and shell scripts and this header should include the current year. Exclude tests (e.g., files under `tests/` or test-only scripts). The header should appear at the top of the file.
+```py
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+```
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 3dc065655a..a830e0a83f 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -7,40 +7,95 @@ Thanks for your interest in contributing to Nemo-RL!
 ### Development Environment
 
 1. **Build and run the Docker container**:
-```bash
-docker buildx build -t nemo-rl -f Dockerfile .
+```sh
+docker buildx build -t nemo-rl:latest -f Dockerfile .
+```
+
+To start a shell in the container to interactively run/develop:
+```sh
 # Run the container with your local nemo-rl directory mounted
-docker run -it --gpus all -v /path/to/nemo-rl:/workspace/nemo-rl nemo-rl
+docker run -it --gpus all -v /path/to/nemo-rl:/nemo-rl nemo-rl:latest
+```
+
+If you are using VSCode/Cursor you can also use Dev Containers. Here's a devcontainer.json to get you started:
+```jsonc
+{
+    "name": "rl-dev",
+    "image": "nemo-rl:latest",
+    "runArgs": [
+        "--gpus",
+        "all",
+        "--ulimit",
+        "memlock=-1",
+        "--ulimit",
+        "stack=67108864",
+        "--shm-size=24g",
+        "--privileged",
+        "--pid=host"
+	]
+
+    // NOTE: Here is an example of how you can set up some common mounts, environment variables, and set up your shell.
+    //       Feel free to adapt to your development workflow and remember to replace the user `terryk` with your username.
+
+    //"mounts": [
+    //    {"source": "/home/terryk", "target": "/home/terryk", "type": "bind"},
+    //    {"source": "/home/terryk/.ssh", "target": "/root/terryk-ssh", "type": "bind"}
+    //],
+    //"containerEnv": {
+    //    "HF_TOKEN_PATH": "/home/terryk/.cache/huggingface/token",
+    //    "HF_HOME": "/home/terryk/.cache/huggingface",
+    //    "HF_DATASETS_CACHE": "/home/terryk/.cache/huggingface/datasets",
+    //    "WANDB_API_KEY": "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
+    //},
+    // // This (1) marks all directories safe (2) copies in ssh keys (3) sources user's bashrc file
+    //"postStartCommand": "git config --global --add safe.directory '*' && cp -r /root/terryk-ssh/* /root/.ssh/ && source /home/terryk/.bashrc"
+}
 ```
 
 ## Making Changes
 
-### Workflow: Clone and Branch (No Fork Required)
+### Workflow: For External Contributors (Fork Required)
 
 #### Before You Start: Install pre-commit
 
-From the [`nemo-rl` root directory](.), run:
-```bash
-python3 -m pip install pre-commit
-pre-commit install
-```
+Pre-commit checks (using `ruff`/`pyrefly`) will help ensure your code follows our formatting and style guidelines.
 
-Pre-commit checks (using `ruff`) will help ensure your code follows our formatting and style guidelines.
+If you're an external contributor, you'll need to fork the repository:
 
-We follow a direct clone and branch workflow for now:
+1. **Create a fork**: Click the "Fork" button on the [GitHub repository page](https://github.com/NVIDIA-NeMo/RL) or follow this direct link: https://github.com/NVIDIA-NeMo/RL/fork
 
-1. Clone the repository directly:
+2. **Clone your fork**:
    ```bash
-   git clone https://github.com/NVIDIA-NeMo/RL
+   git clone https://github.com/YOUR-USERNAME/RL nemo-rl
    cd nemo-rl
    ```
 
-2. Create a new branch for your changes:
+3. **Add upstream remote** to keep your fork updated:
    ```bash
-   git checkout -b your-feature-name
+   git remote add upstream https://github.com/NVIDIA-NeMo/RL.git
    ```
 
-3. Make your changes and commit them:
+4. **Install pre-commit**:
+   ```bash
+   # Requires `uv` to be installed
+   uv run --group dev pre-commit install
+   ```
+
+5. **Keep your fork updated** before starting new work:
+   ```bash
+   git fetch upstream
+   git checkout main
+   git merge upstream/main
+   git push origin main
+   ```
+
+6. **Create a new branch** for your changes:
+   ```bash
+   git checkout main
+   git switch -c your-feature-name
+   ```
+
+7. **Make your changes and commit** them:
    ```bash
    git add .
    git commit --signoff -m "Your descriptive commit message"
@@ -48,12 +103,46 @@ We follow a direct clone and branch workflow for now:
 
 We require signing commits with `--signoff` (or `-s` for short). See [Signing Your Work](#signing-your-work) for details.
 
-4. Push your branch to the repository:
+8. **Push to your fork**:
+   ```bash
+   git push origin your-feature-name
+   ```
+
+9. **Create a pull request** from your fork's branch to the main repository's `main` branch through the GitHub web interface. For example, if your GitHub username is `terrykong` and your feature branch is `your-feature-name`, the compare URL would look like: https://github.com/NVIDIA-NeMo/RL/compare/main...terrykong:RL:your-feature-name?expand=1
+
+### Workflow: For NVIDIA Contributors (Direct Access)
+
+If you have write access to the repository (NVIDIA contributors):
+
+1. Clone the repository directly:
+   ```bash
+   git clone https://github.com/NVIDIA-NeMo/RL nemo-rl
+   cd nemo-rl
+   ```
+
+2. **Install pre-commit** from the [`nemo-rl` root directory](.):
+   ```bash
+   # Requires `uv` to be installed
+   uv run --group dev pre-commit install
+   ```
+
+3. Create a new branch for your changes:
+   ```bash
+   git switch -c your-feature-name
+   ```
+
+4. Make your changes and commit them:
+   ```bash
+   git add .
+   git commit --signoff -m "Your descriptive commit message"
+   ```
+
+5. Push your branch to the repository:
    ```bash
-   git push origin feature/your-feature-name
+   git push origin your-feature-name
    ```
 
-5. Create a pull request from your branch to the `main` branch.
+6. Create a pull request from your branch to the `main` branch.
 
 ### Design Documentation Requirement
 
diff --git a/README.md b/README.md
index 780057ad0c..1fc6aa43de 100644
--- a/README.md
+++ b/README.md
@@ -1,80 +1,164 @@
-# Nemo RL: A Scalable and Efficient Post-Training Library
+# NeMo RL: A Scalable and Efficient Post-Training Library
 
-<!-- markdown all in one -->
-- [Nemo RL: A Scalable and Efficient Post-Training Library](#nemo-rl-a-scalable-and-efficient-post-training-library)
-  - [📣 News](#-news)
-  - [Features](#features)
-  - [Prerequisites](#prerequisites)
-  - [Training Backends](#training-backends)
-  - [GRPO](#grpo)
-    - [GRPO Single Node](#grpo-single-node)
-    - [GRPO Multi-node](#grpo-multi-node)
-      - [GRPO Qwen2.5-32B](#grpo-qwen25-32b)
-      - [GRPO Multi-Turn](#grpo-multi-turn)
-  - [Supervised Fine-Tuning (SFT)](#supervised-fine-tuning-sft)
-    - [SFT Single Node](#sft-single-node)
-    - [SFT Multi-node](#sft-multi-node)
-  - [DPO](#dpo)
-    - [DPO Single Node](#dpo-single-node)
-    - [DPO Multi-node](#dpo-multi-node)
-  - [Evaluation](#evaluation)
-    - [Convert Model Format (Optional)](#convert-model-format-optional)
-    - [Run Evaluation](#run-evaluation)
-  - [Set Up Clusters](#set-up-clusters)
-  - [Tips and Tricks](#tips-and-tricks)
-  - [Citation](#citation)
-  - [Contributing](#contributing)
-  - [Licenses](#licenses)
-
-**Nemo RL** is a scalable and efficient post-training library designed for models ranging from 1 GPU to thousands, and from tiny to over 100 billion parameters.
-
-What you can expect:
-
-- **Seamless integration with Hugging Face** for ease of use, allowing users to leverage a wide range of pre-trained models and tools.
-- **High-performance implementation with Megatron Core**, supporting various parallelism techniques for large models (>100B) and large context lengths.
-- **Efficient resource management using Ray**, enabling scalable and flexible deployment across different hardware configurations.
-- **Flexibility** with a modular design that allows easy integration and customization.
-- **Comprehensive documentation** that is both detailed and user-friendly, with practical examples.
+[![CICD NeMo RL](https://github.com/NVIDIA-NeMo/RL/actions/workflows/cicd-main.yml/badge.svg?branch=main&event=schedule)](https://github.com/NVIDIA-NeMo/RL/actions/workflows/cicd-main.yml)
 
 ## 📣 News
+* [10/10/2025] **DAPO Algorithm Support**  
+  NeMo RL now supports [Decoupled Clip and Dynamic Sampling Policy Optimization (DAPO)](https://arxiv.org/pdf/2503.14476) algorithm.  
+  DAPO extends GRPO with **Clip-Higher**, **Dynamic Sampling**, **Token-Level Policy Gradient Loss**, and **Overlong Reward Shaping** for more stable and efficient RL training. See the [DAPO guide](docs/guides/dapo.md) for more details.
+* [9/30/2025][Accelerated RL on GCP with NeMo RL!](https://discuss.google.dev/t/accelerating-reinforcement-learning-on-google-cloud-using-nvidia-nemo-rl/269579/4) 
+* [9/27/2025] [FP8 Quantization in NeMo RL](https://github.com/NVIDIA-NeMo/RL/discussions/1216)
+* [9/25/2025] On-policy Distillation 
+    * Student generates on-policy sequences and aligns logits to a larger teacher via KL, achieving near-larger-model quality at lower cost than RL. See [On-policy Distillation](#on-policy-distillation).
+
+<details>
+<summary>Previous News</summary>
+  
+* [8/15/2025] [NeMo-RL: Journey of Optimizing Weight Transfer in Large MoE Models by 10x](https://github.com/NVIDIA-NeMo/RL/discussions/1189)
+* [7/31/2025] [NeMo-RL V0.3: Scalable and Performant Post-training with Nemo-RL via Megatron-Core](https://github.com/NVIDIA-NeMo/RL/discussions/1161)
 * [7/25/2025] [Release v0.3.0!](https://github.com/NVIDIA-NeMo/RL/releases/tag/v0.3.0)
+    * 📝 [v0.3.0 Announcement](https://github.com/NVIDIA-NeMo/RL/discussions/1161)
     * 📊 View the release run metrics on [Google Colab](https://colab.research.google.com/drive/15kpesCV1m_C5UQFStssTEjaN2RsBMeZ0?usp=sharing) to get a head start on your experimentation.
+
 * [5/14/2025] [Reproduce DeepscaleR with NeMo RL!](docs/guides/grpo-deepscaler.md)
 * [5/14/2025] [Release v0.2.1!](https://github.com/NVIDIA-NeMo/RL/releases/tag/v0.2.1)
     * 📊 View the release run metrics on [Google Colab](https://colab.research.google.com/drive/1o14sO0gj_Tl_ZXGsoYip3C0r5ofkU1Ey?usp=sharing) to get a head start on your experimentation.
 
+</details>
+
+## Overview
+
+**NeMo RL** is an open-source post-training library under the [NVIDIA NeMo Framework](https://github.com/NVIDIA-NeMo), designed to streamline and scale reinforcement learning methods for multimodal models (LLMs, VLMs etc.). Designed for flexibility, reproducibility, and scale, NeMo RL enables both small-scale experiments and massive multi-GPU, multi-node deployments for fast experimentation in research and production environments.
+
+![NeMo RL Architecture Diagram](https://raw.githubusercontent.com/NVIDIA-NeMo/RL/refs/heads/main/docs/assets/RL_diagram.png)
+
+What you can expect:
+- **Flexibility** with a modular design that allows easy integration and customization.
+- **Efficient resource management using Ray**, enabling scalable and flexible deployment across different hardware configurations.
+- **Hackable** with native PyTorch-only paths for quick research prototypes.
+- **High performance with Megatron Core**, supporting various parallelism techniques for large models and large context lengths.
+- **Seamless integration with Hugging Face** for ease of use, allowing users to leverage a wide range of pre-trained models and tools.
+- **Comprehensive documentation** that is both detailed and user-friendly, with practical examples.
+
+Please refer to our [design documents](https://github.com/NVIDIA-NeMo/RL/tree/main/docs/design-docs) for more details on the architecture and design philosophy.
+
+### Training Backends
+NeMo RL supports multiple training backends to accommodate different model sizes and hardware configurations:
+
+- **DTensor** - PyTorch's next-generation distributed training with improved memory efficiency (PyTorch-native TP, SP, PP, CP, and FSDP2).
+- [**Megatron**](https://github.com/NVIDIA-NeMo/Megatron-Bridge) - NVIDIA's high-performance training framework for scaling to large models with 6D parallelisms.
+
+The training backend is automatically determined based on your YAML configuration settings. For detailed information on backend selection, configuration, and examples, see the [Training Backends documentation](docs/design-docs/training-backends.md).
+
+### Generation Backends
+NeMo RL supports multiple generation/rollout backends to accommodate different model sizes and hardware configurations:
+
+- [**vLLM**](https://github.com/vllm-project/vllm) - A high-throughput and memory-efficient popular inference and serving engine.
+- [**Megatron**](https://github.com/NVIDIA/Megatron-LM/tree/main/megatron/core/inference) - A high-performance Megatron-native inference backend which eliminates weight conversion between training and inference.
+
+For detailed information on backend selection, configuration, and examples, see the [Generation Backends documentation](docs/design-docs/generation.md).
+
 ## Features
 
 ✅ _Available now_ | 🔜 _Coming in v0.4_
 
-- ✅ **Fast Generation** - vLLM backend for optimized inference.
-- ✅ **HuggingFace Integration** - Works with 1-70B models (Qwen, Llama).
-- ✅ **Distributed Training** - Fully Sharded Data Parallel (FSDP2) support and Ray-based infrastructure.
-- ✅ **Environment Support** - Support for multi-environment training.
-- ✅ **Learning Algorithms** - GRPO (Group Relative Policy Optimization), SFT (Supervised Fine-Tuning), and DPO (Direct Preference Optimization).
-- ✅ **Multi-Turn RL** - Multi-turn generation and training for RL with tool use, games, etc.
-- ✅ **Large Model Support** - Native PyTorch support for models up to 70B parameters.
-- ✅ **Advanced Parallelism** - PyTorch native FSDP2, TP, CP, and SP for efficient training.
-- ✅ **(even) Larger Model Support with Long(er) Sequences** - Advanced parallelisms with Megatron Core (TP/PP/CP/SP/EP).
+- 🔜 **Megatron Inference** - Megatron Inference for fast Day-0 support for new Megatron models (avoid weight conversion).
+- 🔜 **Async RL** - Support for asynchronous rollouts and replay buffers for off-policy training, and enable a fully asynchronous GPRO.
+- 🔜 **Vision Language Models (VLM)** - Support SFT and GRPO on VLMs through the DTensor path.
+- 🔜 **Improved Native Performance** - Improve training time for native PyTorch models.
+- 🔜 **Improved Large MoE Performance** - Improve Megatron Core training performance and generation performance.
+- 🔜 **End-to-End FP8 Low-Precision Training** - Support for Megatron Core FP8 training and FP8 vLLM generation.
+- 🔜 **Megatron Bridge Integration** - Integrate Megatron Bridge to enable training features from Megatron Core.
+- 🔜 **NeMo Automodel Integration** - Integrate NeMo Automodel to power our DTensor path.
+- 🔜 **New Models** - gpt-oss.
+- 🔜 **Expand Algorithms** - DAPO, GSPO.
+- 🔜 **GB200** - Add container support for GB200.
+- ✅ **Distributed Training** - Ray-based infrastructure.
+- ✅ **Environment Support and Isolation** - Support for multi-environment training and dependency isolation between components.
 - ✅ **Worker Isolation** - Process isolation between RL Actors (no worries about global state).
-- ✅ **Environment Isolation** - Dependency isolation between components.
-- ✅ **Megatron Inference** - (static) Megatron Inference for day-0 support for new megatron models.
-- ✅ **MoE Models** - Support for DeepseekV3 and Qwen-3 MoE models
-- ✅ **Sequence Packing** - Sequence packing in both DTensor and MCore for huge training perf gains
+- ✅ **Learning Algorithms** - GRPO/GSPO, SFT, DPO, and On-policy distillation.
+- ✅ **Multi-Turn RL** - Multi-turn generation and training for RL with tool use, games, etc.
+- ✅ **Advanced Parallelism with DTensor** - PyTorch FSDP2, TP, CP, and SP for efficient training.
+- ✅ **Larger Model Support with Longer Sequences** - Performant parallelisms with Megatron Core (TP/PP/CP/SP/EP/FSDP).
+- ✅ **MoE Models** - Support for DeepSeekV3 and Qwen-3 MoE models (Megatron).
+- ✅ **Sequence Packing** - Sequence packing in both DTensor and Megatron Core for huge training performance gains.
+- ✅ **Fast Generation** - vLLM backend for optimized inference.
+- ✅ **Hugging Face Integration** - Works with 1B to 70B models (Qwen, Llama).
+
+## Table of Contents
+  - [Prerequisites](#prerequisites)
+  - [Quick Start](#quick-start)
+  - Support Matrix
+
+    <p></p>
+    
+    |Algorithms|Single Node|Multi-node|
+    |-|-|-|
+    |[GRPO](#grpo)|[GRPO Single Node](#grpo-single-node)|[GRPO Multi-node](#grpo-multi-node): [GRPO Qwen2.5-32B](#grpo-qwen25-32b), [GRPO Multi-Turn](#grpo-multi-turn)|
+    |[On-policy Distillation](#on-policy-distillation)|[Distillation Single Node](#on-policy-distillation-single-node)|[Distillation Multi-node](#on-policy-distillation-multi-node)|
+    |[Supervised Fine-Tuning (SFT)](#supervised-fine-tuning-sft)|[SFT Single Node](#sft-single-node)|[SFT Multi-node](#sft-multi-node)|
+    |[DPO](#dpo)|[DPO Single Node](#dpo-single-node)|[DPO Multi-node](#dpo-multi-node)|
+    |[RM](#rm)|[RM Single Node](#rm-single-node)|[RM Multi-node](#rm-multi-node)|
+
+    <p></p>
+
+  - [Evaluation](#evaluation)
+    - [Convert Model Format (Optional)](#convert-model-format-optional)
+    - [Run Evaluation](#run-evaluation)
+  - [Set Up Clusters](#set-up-clusters)
+  - [Tips and Tricks](#tips-and-tricks)
+  - [Citation](#citation)
+  - [Contributing](#contributing)
+  - [Licenses](#licenses)
+
+## Quick Start
+
+Use this quick start to get going with either the native PyTorch DTensor or Megatron Core training backends. 
 
+> [!NOTE]
+> Both training backends are independent — you can install and use either one on its own.
 
-- 🔜 **Improved Native Performance** - Improve training time for Native Pytorch Models.
-- 🔜 **Megatron Inference** - (dynamic) Megatron Inference for fast day-0 support for new megatron models.
+For more examples and setup details, continue to the [Prerequisites](#prerequisites) section.
+
+<table style="border-collapse:collapse; width:100%; table-layout:fixed;">
+  <thead>
+    <tr>
+      <th style="border:1px solid #d0d7de; padding:8px; text-align:left; width:50%; word-break:break-word; overflow-wrap:anywhere; white-space:normal;">Native PyTorch (DTensor)</th>
+      <th style="border:1px solid #d0d7de; padding:8px; text-align:left; width:50%; word-break:break-word; overflow-wrap:anywhere; white-space:normal;">Megatron Core</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td colspan="2" style="border:1px solid #d0d7de; padding:8px; vertical-align:top; word-break:break-word; overflow-wrap:anywhere; white-space:normal;">
+        <strong>Clone and create the environment</strong>
+        <pre style="white-space:pre-wrap; word-break:break-word; overflow-wrap:anywhere;"><code class="language-sh">git clone git@github.com:NVIDIA-NeMo/RL.git nemo-rl --recursive
+cd nemo-rl
+uv venv</code></pre>
+        <em>Note:</em> If you previously ran without checking out the submodules, you may need to rebuild virtual environments by setting <code>NRL_FORCE_REBUILD_VENVS=true</code>. See <a href="#tips-and-tricks">Tips and Tricks</a>.
+      </td>
+    </tr>
+    <tr>
+      <td style="border:1px solid #d0d7de; padding:8px; vertical-align:top; word-break:break-word; overflow-wrap:anywhere; white-space:normal;">
+        <strong>Run GRPO (DTensor)</strong>
+        <pre style="white-space:pre-wrap; word-break:break-word; overflow-wrap:anywhere;"><code class="language-sh">uv run python examples/run_grpo_math.py</code></pre>
+      </td>
+      <td style="border:1px solid #d0d7de; padding:8px; vertical-align:top; word-break:break-word; overflow-wrap:anywhere; white-space:normal;">
+        <strong>Run GRPO (Megatron)</strong>
+        <pre style="white-space:pre-wrap; word-break:break-word; overflow-wrap:anywhere;"><code class="language-sh">uv run examples/run_grpo_math.py &#92;
+--config examples/configs/grpo_math_1B_megatron.yaml</code></pre>
+      </td>
+    </tr>
+  </tbody>
+</table>
 
 ## Prerequisites
 
 Clone **NeMo RL**.
 ```sh
-git clone git@github.com:NVIDIA-NeMo/RL.git nemo-rl
+git clone git@github.com:NVIDIA-NeMo/RL.git nemo-rl --recursive
 cd nemo-rl
 
-# If you are using the Megatron backend, download the pinned versions of Megatron-LM and NeMo submodules 
-# by running (This is not necessary if you are using the pure Pytorch/DTensor path):
+# If you are already cloned without the recursive option, you can initialize the submodules recursively
 git submodule update --init --recursive
 
 # Different branches of the repo can have different pinned versions of these third-party submodules. Ensure
@@ -85,8 +169,8 @@ git submodule update --init --recursive
 # You will have to run the full `git submodule update --init --recursive` command in these situations.
 ```
 
-If you are using the Megatron backend on bare-metal (outside of a container), you may
-need to install the cudnn headers as well. Here is how you can check as well as install them:
+If you are using the Megatron backend on bare metal (outside of a container), you may
+need to install the cuDNN headers as well. Here is how you check and install them:
 ```sh
 # Check if you have libcudnn installed
 dpkg -l | grep cudnn.*cuda
@@ -95,57 +179,41 @@ dpkg -l | grep cudnn.*cuda
 # As an example, these are the "Linux Ubuntu 20.04 x86_64" instructions
 wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.1-1_all.deb
 sudo dpkg -i cuda-keyring_1.1-1_all.deb
-sudo apt-get update
-sudo apt-get install cudnn-cuda-12
+sudo apt update
+sudo apt install cudnn  # Will install cuDNN meta packages which points to the latest versions
+# sudo apt install cudnn9-cuda-12  # Will install cuDNN version 9.x.x compiled for cuda 12.x
+# sudo apt install cudnn9-cuda-12-8  # Will install cuDNN version 9.x.x compiled for cuda 12.8
 ```
 
-Install `uv`.
+If you encounter problems when installing vllm's dependency deep_ep on bare-metal (outside of a container), you may need to install libibverbs-dev as well. Here is how you can install it:
 ```sh
-# For faster setup and environment isolation, we use `uv`
-pip install uv
-
-# Initialize NeMo RL project virtual environment
-# NOTE: Please do not use -p/--python and instead allow uv venv to read it from .python-version
-#       This ensures that the version of python used is always what we prescribe.
-uv venv
-
-# If working outside a container, it can help to build flash-attn and warm the
-# uv cache before your first run. The NeMo RL Dockerfile will warm the uv cache
-# with flash-attn. See https://docs.nvidia.com/nemo/rl/latest/docker.html for
-# instructions if you are looking for the NeMo RL container.
-bash tools/build-flash-attn-in-uv-cache.sh
-# If sucessful, you should see "✅ flash-attn successfully added to uv cache"
-
-# If you cannot install at the system level, you can install for your user with
-# pip install --user uv
-
-# Use `uv run` to launch all commands. It handles pip installing implicitly and
-# ensures your environment is up to date with our lock file.
-
-# Note that it is not recommended to activate the venv and instead use `uv run` since
-# it ensures consistent environment usage across different shells and sessions.
-# Example: uv run python examples/run_grpo_math.py
+sudo apt-get update
+sudo apt-get install libibverbs-dev
 ```
 
-**Important Notes:**
-
-- Use the `uv run <command>` to execute scripts within the managed environment. This helps maintain consistency across different shells and sessions.
-- Ensure you have the necessary CUDA drivers and PyTorch installed compatible with your hardware.
-- On the first install, `flash-attn` can take a while to install (~45min with 48 CPU hyperthreads). After it is built once, it is cached in your `uv`'s cache dir making subsequent installs much quicker.
-- **Reminder**: Don't forget to set your `HF_HOME`, `WANDB_API_KEY`, and `HF_DATASETS_CACHE` (if needed). You'll need to do a `huggingface-cli login` as well for Llama models.
-
-## Training Backends
+For faster setup and environment isolation, we use [uv](https://docs.astral.sh/uv/).
+Follow [these instructions](https://docs.astral.sh/uv/getting-started/installation/) to install uv.
 
-NeMo RL supports multiple training backends to accommodate different model sizes and hardware configurations:
+Then, initialize the NeMo RL project virtual environment via:
+```sh
+uv venv
+```
+> [!NOTE]
+> Please do not use `-p/--python` and instead allow `uv venv` to read it from `.python-version`.
+> This ensures that the version of python used is always what we prescribe.
 
-- **DTensor (FSDP2)** - PyTorch's next-generation distributed training with improved memory efficiency
-- **Megatron** - NVIDIA's high-performance training framework for scaling to large models (>100B parameters)
+Use `uv run` to launch all commands. It handles pip installing implicitly and ensures your environment is up to date with our lock file.
+> [!NOTE]
+> - It is not recommended to activate the `venv`, and you should use `uv run <command>` instead to execute scripts within the managed environment.
+>   This ensures consistent environment usage across different shells and sessions. Example: `uv run python examples/run_grpo_math.py`
+> - Ensure your system has the appropriate CUDA drivers installed, and that your PyTorch version is compatible with both your CUDA setup and hardware.
+> - If you update your environment in `pyproject.toml`, it is necessary to force a rebuild of the virtual environments by setting `NRL_FORCE_REBUILD_VENVS=true` next time you launch a run.
+> - **Reminder**: Don't forget to set your `HF_HOME`, `WANDB_API_KEY`, and `HF_DATASETS_CACHE` (if needed). You'll need to do a `huggingface-cli login` as well for Llama models.
 
-The training backend is automatically determined based on your YAML configuration settings. For detailed information on backend selection, configuration, and examples, see the [Training Backends documentation](docs/design-docs/training-backends.md).
 
 ## GRPO
 
-We have a reference GRPO experiment config set up trained for math benchmarks using the [OpenInstructMath2](https://huggingface.co/datasets/nvidia/OpenMathInstruct-2) dataset.
+We provide a reference GRPO configuration for math benchmarks using the [OpenInstructMath2](https://huggingface.co/datasets/nvidia/OpenMathInstruct-2) dataset.
 
 You can read about the details of the GRPO implementation [here](docs/guides/grpo.md)
 
@@ -166,7 +234,7 @@ uv run python examples/run_grpo_math.py \
   cluster.gpus_per_node=8
 ```
 
-You can override any of the parameters listed in the yaml configuration file. For example,
+You can override any of the parameters listed in the YAML configuration file. For example,
 
 ```sh
 uv run python examples/run_grpo_math.py \
@@ -243,9 +311,49 @@ Reference example for training to play a Sliding Puzzle Game:
 uv run python examples/run_grpo_sliding_puzzle.py
 ```
 
+## On-policy Distillation
+
+We provide an example on-policy distillation experiment using the [DeepScaler dataset](https://huggingface.co/agentica-org/DeepScaleR-1.5B-Preview).
+
+### On-policy Distillation Single Node
+
+To run on-policy distillation on a single GPU using `Qwen/Qwen3-1.7B-Base` as the student and `Qwen/Qwen3-4B` as the teacher:
+
+```sh
+uv run python examples/run_distillation_math.py
+```
+
+Customize parameters with command-line overrides. For example:
+
+```sh
+uv run python examples/run_distillation_math.py \
+  policy.model_name="Qwen/Qwen3-1.7B-Base" \
+  teacher.model_name="Qwen/Qwen3-4B" \
+  cluster.gpus_per_node=8
+```
+
+### On-policy Distillation Multi-node
+
+```sh
+# Run from the root of NeMo RL repo
+NUM_ACTOR_NODES=2
+
+COMMAND="uv run ./examples/run_distillation_math.py --config examples/configs/distillation_math.yaml cluster.num_nodes=2 cluster.gpus_per_node=8 checkpointing.checkpoint_dir='results/distill_2nodes' logger.wandb_enabled=True logger.wandb.name='distill-2nodes'" \
+CONTAINER=YOUR_CONTAINER \
+MOUNTS="$PWD:$PWD" \
+sbatch \
+    --nodes=${NUM_ACTOR_NODES} \
+    --account=YOUR_ACCOUNT \
+    --job-name=YOUR_JOBNAME \
+    --partition=YOUR_PARTITION \
+    --time=4:0:0 \
+    --gres=gpu:8 \
+    ray.sub
+```
+
 ## Supervised Fine-Tuning (SFT)
 
-We provide an example SFT experiment using the [SQuAD dataset](https://rajpurkar.github.io/SQuAD-explorer/).
+We provide example SFT experiments using various datasets including [SQuAD](https://rajpurkar.github.io/SQuAD-explorer/), OpenAI format datasets (with tool calling support), and custom JSONL datasets. For detailed documentation on supported datasets and configurations, see the [SFT documentation](docs/guides/sft.md).
 
 ### SFT Single Node
 
@@ -300,7 +408,7 @@ The default DPO experiment is configured to run on a single GPU. To launch the e
 uv run python examples/run_dpo.py
 ```
 
-This trains `Llama3.2-1B-Instruct` on one GPU.
+This trains `Llama3.2-1B-Instruct` on 1 GPU.
 
 If you have access to more GPUs, you can update the experiment accordingly. To run on 8 GPUs, we update the cluster configuration and switch to an 8B Llama3.1 Instruct model:
 
@@ -334,7 +442,50 @@ For distributed DPO training across multiple nodes, modify the following script
 NUM_ACTOR_NODES=2
 
 COMMAND="uv run ./examples/run_dpo.py --config examples/configs/dpo.yaml cluster.num_nodes=2 cluster.gpus_per_node=8 dpo.val_global_batch_size=32 checkpointing.checkpoint_dir='results/dpo_llama81_2nodes' logger.wandb_enabled=True logger.wandb.name='dpo-llama1b'" \
-RAY_DEDUP_LOGS=0 \
+CONTAINER=YOUR_CONTAINER \
+MOUNTS="$PWD:$PWD" \
+sbatch \
+    --nodes=${NUM_ACTOR_NODES} \
+    --account=YOUR_ACCOUNT \
+    --job-name=YOUR_JOBNAME \
+    --partition=YOUR_PARTITION \
+    --time=4:0:0 \
+    --gres=gpu:8 \
+    ray.sub
+```
+
+## RM
+
+We provide a sample RM experiment that uses the [HelpSteer3 dataset](https://huggingface.co/datasets/nvidia/HelpSteer3) for preference-based training.
+
+### RM Single Node
+
+The default RM experiment is configured to run on a single GPU. To launch the experiment:
+
+```sh
+uv run python examples/run_rm.py
+```
+
+This trains a RM based on `meta-llama/Llama-3.2-1B-Instruct` on 1 GPU.
+
+If you have access to more GPUs, you can update the experiment accordingly. To run on 8 GPUs, we update the cluster configuration:
+
+```sh
+uv run python examples/run_rm.py cluster.gpus_per_node=8
+```
+
+Refer to the [RM documentation](docs/guides/rm.md) for more information.
+
+### RM Multi-node
+
+For distributed RM training across multiple nodes, modify the following script for your use case:
+
+```sh
+# Run from the root of NeMo RL repo
+## number of nodes to use for your job
+NUM_ACTOR_NODES=2
+
+COMMAND="uv run ./examples/run_rm.py --config examples/configs/rm.yaml cluster.num_nodes=2 cluster.gpus_per_node=8 checkpointing.checkpoint_dir='results/rm_llama1b_2nodes' logger.wandb_enabled=True logger.wandb.name='rm-llama1b-2nodes'" \
 CONTAINER=YOUR_CONTAINER \
 MOUNTS="$PWD:$PWD" \
 sbatch \
@@ -353,7 +504,7 @@ We provide evaluation tools to assess model capabilities.
 
 ### Convert Model Format (Optional)
 
-If you have trained a model and saved the checkpoint in the Pytorch DCP format, you first need to convert it to the Hugging Face format before running evaluation:
+If you have trained a model and saved the checkpoint in the PyTorch DCP format, you first need to convert it to the Hugging Face format before running evaluation:
 
 ```sh
 # Example for a GRPO checkpoint at step 170
@@ -362,19 +513,30 @@ uv run python examples/converters/convert_dcp_to_hf.py \
     --dcp-ckpt-path results/grpo/step_170/policy/weights/ \
     --hf-ckpt-path results/grpo/hf
 ```
+
+If you have a model saved in Megatron format, you can use the following command to convert it to Hugging Face format prior to running evaluation. This script requires Megatron Core, so make sure you launch with the mcore extra:
+
+```sh
+# Example for a GRPO checkpoint at step 170
+uv run --extra mcore python examples/converters/convert_megatron_to_hf.py \
+    --config results/grpo/step_170/config.yaml \
+    --megatron-ckpt-path results/grpo/step_170/policy/weights/iter_0000000 \
+    --hf-ckpt-path results/grpo/hf
+```
+
 > **Note:** Adjust the paths according to your training output directory structure.
 
 For an in-depth explanation of checkpointing, refer to the [Checkpointing documentation](docs/design-docs/checkpointing.md).
 
 ### Run Evaluation
 
-Run evaluation script with converted model:
+Run the evaluation script with the converted model:
 
 ```sh
 uv run python examples/run_eval.py generation.model_name=$PWD/results/grpo/hf
 ```
 
-Run evaluation script with custom settings:
+Run the evaluation script with custom settings:
 
 ```sh
 # Example: Evaluation of DeepScaleR-1.5B-Preview on MATH-500 using 8 GPUs
@@ -399,23 +561,41 @@ For detailed instructions on how to set up and launch NeMo RL on Slurm or Kubern
 
 ## Tips and Tricks
 - If you forget to initialize the NeMo and Megatron submodules when cloning the NeMo-RL repository, you may run into an error like this:
-  
+
   ```sh
   ModuleNotFoundError: No module named 'megatron'
   ```
   
-  If you see this error, there is likely an issue with your virtual environments. To fix this, first intialize the submodules:
+  If you see this error, there is likely an issue with your virtual environments. To fix this, first initialize the submodules:
 
   ```sh
   git submodule update --init --recursive
   ```
 
-  and then force a rebuild of the virutal environments by setting `NRL_FORCE_REBUILD_VENVS=true` next time you launch a run:
+  and then force a rebuild of the virtual environments by setting `NRL_FORCE_REBUILD_VENVS=true` next time you launch a run:
 
   ```sh
   NRL_FORCE_REBUILD_VENVS=true uv run examples/run_grpo.py ...
   ```
 
+- Large amounts of memory fragmentation might occur when running models without support for FlashAttention2.
+  If OOM occurs after a few iterations of training, it may help to tweak the allocator settings to reduce memory fragmentation.
+  To do so, specify [`max_split_size_mb`](https://docs.pytorch.org/docs/stable/notes/cuda.html#optimizing-memory-usage-with-pytorch-cuda-alloc-conf)
+  at **either** one of the following places:
+  1. Launch training with:
+  ```sh
+  # This will globally apply to all Ray actors
+  PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:64 uv run python examples/run_dpo.py ...
+  ```
+  2. Make the change more permanently by adding this flag in the training configuration:
+  ```yaml
+  policy:
+    # ...
+    dtensor_cfg:
+      env_vars:
+        PYTORCH_CUDA_ALLOC_CONF: "max_split_size_mb:64"
+  ```
+
 ## Citation
 
 If you use NeMo RL in your research, please cite it using the following BibTeX entry:
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 828156d039..a6ee5ff931 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -1,4 +1,14 @@
+# Usage:
+# Self-contained build (default: builds from main): docker buildx build -f docker/Dockerfile --tag <registry>/nemo-rl:latest --push .
+# Self-contained build (specific git ref): docker buildx build -f docker/Dockerfile --build-arg NRL_GIT_REF=r0.3.0 --tag <registry>/nemo-rl:r0.3.0 --push .
+# Self-contained build (remote NeMo RL source; no need for a local clone of NeMo RL): docker buildx build -f docker/Dockerfile --build-arg NRL_GIT_REF=r0.3.0 --tag <registry>/nemo-rl:r0.3.0 --push https://github.com/NVIDIA-NeMo/RL.git
+# Local NeMo RL source override: docker buildx build --build-context nemo-rl=. -f docker/Dockerfile --tag <registry>/nemo-rl:latest --push .
+
 ARG BASE_IMAGE=nvcr.io/nvidia/cuda-dl-base:25.05-cuda12.9-devel-ubuntu24.04
+FROM scratch AS nemo-rl
+ARG NRL_GIT_REF=main
+ADD --keep-git-dir=true https://github.com/NVIDIA-NeMo/RL.git#${NRL_GIT_REF} /
+
 FROM ${BASE_IMAGE} AS base
 
 # It is more convenient for users to run as root
@@ -31,7 +41,7 @@ rm -rf /var/lib/apt/lists/*
 EOF
 
 # Install uv and python
-ARG UV_VERSION=0.7.2
+ARG UV_VERSION=0.9.7
 ARG PYTHON_VERSION=3.12
 ENV PATH="/root/.local/bin:$PATH"
 RUN curl -LsSf https://astral.sh/uv/${UV_VERSION}/install.sh | sh && \
@@ -40,6 +50,10 @@ RUN curl -LsSf https://astral.sh/uv/${UV_VERSION}/install.sh | sh && \
 # Disable usage stats by default for users who are sensitive to sharing usage.
 # Users are encouraged to enable if the wish.
 ENV RAY_USAGE_STATS_ENABLED=0
+# After ray>=2.47, this feature is enabled by default which creates uv venvs for any py_executable starting with `uv run`.
+# There is severe contention and performance issues with this enabled considering our dependencies are so large and occasionally
+# need to be compiled, so NeMo RL has an implementation in nemo_rl/utils/venv.py that does it once per node as opposed to once per task.
+ENV RAY_ENABLE_UV_RUN_RUNTIME_ENV=0
 ENV NEMO_RL_VENV_DIR=/opt/ray_venvs
 
 
@@ -51,24 +65,25 @@ WORKDIR /opt/nemo-rl
 # setting these to 1.
 ARG MAX_JOBS
 ARG NVTE_BUILD_THREADS_PER_JOB
+# Only use for custom vllm installs. Learn more at https://github.com/NVIDIA-NeMo/RL/blob/main/docs/guides/use-custom-vllm.md
+ARG BUILD_CUSTOM_VLLM
 
 ENV UV_PROJECT_ENVIRONMENT=/opt/nemo_rl_venv
 ENV UV_LINK_MODE=copy
 
-# This step is to warm the uv cache with flash-attn without invalidating it due to COPY layers
-# This layer has to be manually updated
-RUN <<"EOF" bash -exu
-uv venv ${UV_PROJECT_ENVIRONMENT}
-
-VIRTUAL_ENV=$UV_PROJECT_ENVIRONMENT uv pip install --link-mode symlink setuptools torch==2.7.0 psutil ninja --torch-backend=cu128
-VIRTUAL_ENV=$UV_PROJECT_ENVIRONMENT uv pip install --link-mode symlink flash-attn==2.7.4.post1 --no-build-isolation
-EOF
+# Ensure DeepEP is built for H100 and B200 (also mcore inference unified memory API now invokes a torch API that requires these to be set)
+ENV TORCH_CUDA_ARCH_LIST="9.0 10.0"
 
 # First copy only the dependency files
-COPY pyproject.toml uv.lock ./
-COPY --link 3rdparty/ ./3rdparty/
+COPY --from=nemo-rl pyproject.toml uv.lock ./
+COPY --from=nemo-rl tools/build-custom-vllm.sh ./tools/build-custom-vllm.sh
+COPY --from=nemo-rl --link 3rdparty/ ./3rdparty/
 
 RUN <<"EOF" bash -exu
+if [[ -n "${BUILD_CUSTOM_VLLM:-}" ]]; then
+    bash tools/build-custom-vllm.sh
+    source 3rdparty/vllm/nemo-rl.env
+fi
 # uv sync has a more reliable resolver than simple uv pip install which can fail
 
 # Sync each training + inference backend one at a time (since they may conflict)
@@ -79,6 +94,7 @@ RUN <<"EOF" bash -exu
 uv sync --link-mode symlink --locked --no-install-project
 uv sync --link-mode symlink --locked --extra vllm --no-install-project
 uv sync --link-mode symlink --locked --extra mcore --no-install-project
+uv sync --link-mode symlink --locked --extra automodel --no-install-project
 uv sync --link-mode symlink --locked --all-groups --no-install-project
 EOF
 
@@ -92,6 +108,8 @@ FROM hermetic AS release
 ARG NEMO_RL_COMMIT
 ARG NVIDIA_BUILD_ID
 ARG NVIDIA_BUILD_REF
+ARG RC_DATE=00.00
+ARG TARGETARCH
 ENV NEMO_RL_COMMIT=${NEMO_RL_COMMIT:-<unknown>}
 ENV NVIDIA_BUILD_ID=${NVIDIA_BUILD_ID:-<unknown>}
 ENV NVIDIA_BUILD_REF=${NVIDIA_BUILD_REF:-<unknown>}
@@ -100,7 +118,13 @@ LABEL com.nvidia.build.ref="${NVIDIA_BUILD_REF}"
 
 ENV NEMO_RL_VENV_DIR=/opt/ray_venvs
 
-# Copy in source and prefetch all virtual environments
-COPY . /opt/nemo-rl
+# Copy in source from build context (defaults to cloned repo, can be overridden)
+COPY --from=nemo-rl . /opt/nemo-rl
+# Unshallow the repo to get the full history (in the case it was from the scratch layer).
+# Potentially not necessary if the repo is passed in as a complete repository (w/ full git history),
+# so do a quick check before trying to unshallow.
+RUN git rev-parse --is-shallow-repository | grep -q true && git fetch --unshallow || true
 RUN UV_LINK_MODE=symlink uv run nemo_rl/utils/prefetch_venvs.py
-
+# NOTICES.txt file points to where the OSS source code is archived
+RUN echo "This distribution includes open source which is archived at the following URL: https://opensource.nvidia.com/oss/teams/nvidia/nemo-rl/${RC_DATE}:linux-${TARGETARCH}/index.html" > NOTICES.txt && \
+    echo "For further inquiries or assistance, contact us at oss-requests@nvidia.com" >> NOTICES.txt
\ No newline at end of file
diff --git a/docker/Dockerfile.ngc_pytorch b/docker/Dockerfile.ngc_pytorch
new file mode 100644
index 0000000000..1f26ea8015
--- /dev/null
+++ b/docker/Dockerfile.ngc_pytorch
@@ -0,0 +1,143 @@
+# This Dockerfile is used to build a Docker image for NeMo RL with the NGC PyTorch base image.
+# However, it is still a work in progress and is not yet ready for production use.
+#
+# Usage:
+# Self-contained build (default: builds from main): docker buildx build -f docker/Dockerfile.ngc_pytorch --tag <registry>/nemo-rl:latest --push .
+# Self-contained build (specific git ref): docker buildx build -f docker/Dockerfile.ngc_pytorch --build-arg NRL_GIT_REF=r0.3.0 --tag <registry>/nemo-rl:r0.3.0 --push .
+# Self-contained build (remote NeMo RL source; no need for a local clone of NeMo RL): docker buildx build -f docker/Dockerfile.ngc_pytorch --build-arg NRL_GIT_REF=r0.3.0 --tag <registry>/nemo-rl:r0.3.0 --push https://github.com/NVIDIA-NeMo/RL.git
+# Local NeMo RL source override: docker buildx build --build-context nemo-rl=. -f docker/Dockerfile.ngc_pytorch --tag <registry>/nemo-rl:latest --push .
+#
+# If installing new dependencies in the container, then use "uv pip install new-dependency"
+ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:25.06-py3
+FROM scratch AS nemo-rl
+ARG NRL_GIT_REF=main
+ADD --keep-git-dir=true https://github.com/NVIDIA-NeMo/RL.git#${NRL_GIT_REF} /
+
+FROM ${BASE_IMAGE} AS base
+
+# It is more convenient for users to run as root
+USER root
+
+RUN <<"EOF" bash -exu -o pipefail
+export DEBIAN_FRONTEND=noninteractive
+export TZ=America/Los_Angeles
+
+apt-get update
+apt-get install -y --no-install-recommends \
+    jq \
+    curl \
+    git \
+    rsync \
+    wget \
+    less \
+    vim \
+
+
+apt-get clean
+rm -rf /var/lib/apt/lists/*
+EOF
+
+# Install uv at /usr/local/bin in case the root home directory is bind mounted
+ARG UV_VERSION=0.9.7
+RUN curl -LsSf https://astral.sh/uv/${UV_VERSION}/install.sh | XDG_BIN_HOME=/usr/local/bin sh
+
+# Disable usage stats by default for users who are sensitive to sharing usage.
+# Users are encouraged to enable if they wish.
+ENV RAY_USAGE_STATS_ENABLED=0
+# After ray>=2.47, this feature is enabled by default which creates uv venvs for any py_executable starting with `uv run`.
+# There is severe contention and performance issues with this enabled considering our dependencies are so large and occasionally
+# need to be compiled, so NeMo RL has an implementation in nemo_rl/utils/venv.py that does it once per node as opposed to once per task.
+ENV RAY_ENABLE_UV_RUN_RUNTIME_ENV=0
+ENV NEMO_RL_VENV_DIR=/opt/ray_venvs
+
+# Build vLLM from source to use with the NVIDIA PyTorch base image
+FROM base AS build_vllm
+
+ARG MAX_JOBS=32
+WORKDIR /opt
+COPY --from=nemo-rl uv.lock /tmp/uv.lock
+
+RUN <<"EOF" bash -exu
+echo "Building vLLM from source for PyTorch base image"
+VLLM_VERSION=$(grep -A 1 'name = "vllm"' /tmp/uv.lock | grep 'version =' | sed 's/version = "\(.*\)"/\1/') && \
+echo "Building vLLM version: $VLLM_VERSION"
+git clone https://github.com/vllm-project/vllm.git
+cd vllm
+git checkout v$VLLM_VERSION
+python use_existing_torch.py
+pip install -r requirements/build.txt
+pip wheel --no-deps --no-build-isolation -v .
+EOF
+
+FROM base AS hermetic
+
+WORKDIR /opt/nemo-rl
+
+# Variables to control the build of TE. If there are issues with parallelization, consider
+# setting these to 1.
+ARG MAX_JOBS
+ARG NVTE_BUILD_THREADS_PER_JOB
+
+ENV UV_PROJECT_ENVIRONMENT=/opt/nemo_rl_venv
+ENV UV_CACHE_DIR=/opt/uv_cache
+ENV UV_LINK_MODE=copy
+
+# Define the no-install-package arguments for PyTorch base images
+ARG BASE_IMAGE
+ARG UV_NO_INSTALL_PACKAGES="--no-install-package torch --no-install-package torchvision --no-install-package triton --no-install-package nvidia-cublas-cu12 --no-install-package nvidia-cuda-cupti-cu12 --no-install-package nvidia-cuda-nvrtc-cu12 --no-install-package nvidia-cuda-runtime-cu12 --no-install-package nvidia-cudnn-cu12 --no-install-package nvidia-cufft-cu12 --no-install-package nvidia-cufile-cu12 --no-install-package nvidia-curand-cu12 --no-install-package nvidia-cusolver-cu12 --no-install-package nvidia-cusparse-cu12 --no-install-package nvidia-cusparselt-cu12 --no-install-package nvidia-nccl-cu12 --no-install-package vllm --no-install-package flash-attn --no-install-package transformer-engine --no-install-package transformer-engine-cu12 --no-install-package transformer-engine-torch --no-install-package numpy"
+ENV UV_NO_INSTALL_PACKAGES=${UV_NO_INSTALL_PACKAGES}
+ENV PATH="/opt/nemo_rl_venv/bin:$PATH"
+# Ensure DeepEP is built for H100 and B200
+ENV TORCH_CUDA_ARCH_LIST="9.0 10.0"
+
+# First copy only the dependency files
+COPY --from=nemo-rl pyproject.toml uv.lock ./
+COPY --from=nemo-rl --link 3rdparty/ ./3rdparty/
+
+
+RUN --mount=type=bind,from=build_vllm,source=/opt/,target=/tmp/build_vllm/ <<"EOF" bash -exu
+# Remove torch requirements from extra-build-dependencies for build with NGC PyTorch base image
+sed -i 's/= \[{ requirement = "torch", match-runtime = true }\]/= []/g' pyproject.toml
+
+# uv sync has a more reliable resolver than simple uv pip install which can fail
+# The venv is symlinked to avoid bloating the layer size
+uv venv --system-site-packages ${UV_PROJECT_ENVIRONMENT}
+uv pip install --no-cache-dir --no-deps /tmp/build_vllm/vllm/vllm*.whl
+# Ensure nvshmem is installed before building DeepEP
+uv sync --link-mode symlink --locked --inexact --no-install-project $UV_NO_INSTALL_PACKAGES
+uv sync --link-mode symlink --locked --inexact --extra vllm --extra mcore --extra automodel --all-groups --no-install-project $UV_NO_INSTALL_PACKAGES
+EOF
+
+ENV NEMO_RL_VENV_DIR=/opt/ray_venvs
+
+WORKDIR /opt/nemo-rl
+
+FROM hermetic AS release
+
+ARG NEMO_RL_COMMIT
+ARG NVIDIA_BUILD_ID
+ARG NVIDIA_BUILD_REF
+ARG RC_DATE=00.00
+ARG TARGETARCH
+ENV UV_NO_SYNC=1
+ENV NEMO_RL_COMMIT=${NEMO_RL_COMMIT:-<unknown>}
+ENV NVIDIA_BUILD_ID=${NVIDIA_BUILD_ID:-<unknown>}
+ENV NVIDIA_BUILD_REF=${NVIDIA_BUILD_REF:-<unknown>}
+ENV NEMO_RL_PY_EXECUTABLES_SYSTEM=1
+# The 25.06 Pytorch container is not compatible with vllm standalone compile so we disable it
+ENV VLLM_USE_STANDALONE_COMPILE=0
+LABEL com.nvidia.build.id="${NVIDIA_BUILD_ID}"
+LABEL com.nvidia.build.ref="${NVIDIA_BUILD_REF}"
+
+ENV NEMO_RL_VENV_DIR=/opt/ray_venvs
+
+# Copy in source from build context (defaults to cloned repo, can be overridden)
+COPY --from=nemo-rl . /opt/nemo-rl
+# Unshallow the repo to get the full history (in the case it was from the scratch layer).
+# Potentially not necessary if the repo is passed in as a complete repository (w/ full git history),
+# so do a quick check before trying to unshallow.
+RUN git rev-parse --is-shallow-repository | grep -q true && git fetch --unshallow || true
+RUN UV_LINK_MODE=symlink uv sync --locked --inexact $UV_NO_INSTALL_PACKAGES
+# NOTICES.txt file points to where the OSS source code is archived
+RUN echo "This distribution includes open source which is archived at the following URL: https://opensource.nvidia.com/oss/teams/nvidia/nemo-rl/${RC_DATE}:linux-${TARGETARCH}/index.html" > NOTICES.txt && \
+    echo "For further inquiries or assistance, contact us at oss-requests@nvidia.com" >> NOTICES.txt
\ No newline at end of file
diff --git a/docker/README.md b/docker/README.md
index b21c3e7401..66b1da6855 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -3,8 +3,8 @@ NOTE: *We use `docker buildx` instead of `docker build` for these containers*
 
 This directory contains the `Dockerfile` for NeMo-RL Docker images.
 You can build two types of images:
-- A **base image**: A minimal image where Python dependencies can be specified at runtime.
-- A **hermetic image**: An image that includes default dependencies for offline use.
+- A **release image** (recommended): Contains everything from the hermetic image, plus the nemo-rl source code and pre-fetched virtual environments for isolated workers.
+- A **hermetic image**: Includes the base image plus pre-fetched NeMo RL python packages in the `uv` cache.
 
 
 For detailed instructions on building these images, please see [docs/docker.md](../docs/docker.md).
\ No newline at end of file
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 0000000000..a9a5be4d3d
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,83 @@
+# Makefile for building documentation with isolated UV environment
+
+.DEFAULT_GOAL := help
+
+# Cross-platform venv paths
+ifeq ($(OS),Windows_NT)
+    VENV_DIR = .venv/Scripts
+    PYTHON = $(VENV_DIR)/python.exe
+    ACTIVATE_CMD = .venv\Scripts\activate
+    RM = if exist _build rmdir /s /q _build
+else
+    VENV_DIR = .venv/bin
+    PYTHON = $(VENV_DIR)/python
+    ACTIVATE_CMD = source .venv/bin/activate
+    RM = rm -rf _build
+endif
+
+# ------------------------------
+# Help
+# ------------------------------
+help:
+	@echo ""
+	@echo "📚 Documentation Build System"
+	@echo "=============================="
+	@echo ""
+	@echo "Available targets:"
+	@echo "  make docs-html      Build HTML documentation"
+	@echo "  make docs-live      Start live-reload server"
+	@echo "  make docs-publish   Build docs (fail on warnings)"
+	@echo "  make docs-clean     Clean built documentation"
+	@echo ""
+	@echo "The environment is automatically set up on first run."
+	@echo "To manually activate the docs environment, run:"
+	@echo "  $(ACTIVATE_CMD)"
+	@echo ""
+
+# ------------------------------
+# Ensure UV and isolated docs environment
+# ------------------------------
+ensure-docs-env:
+	@command -v uv >/dev/null 2>&1 || ( \
+		echo ""; \
+		echo "❌ uv is not installed. See https://docs.astral.sh/uv/getting-started/installation/"; \
+		exit 1 \
+	)
+	@if [ ! -x "$(PYTHON)" ]; then \
+		echo "📦 Creating isolated docs environment..."; \
+		uv venv .venv; \
+		uv sync --no-config; \
+		echo "✅ Docs environment ready."; \
+		echo "📝 To activate it: $(ACTIVATE_CMD)"; \
+	fi
+
+# ------------------------------
+# Build HTML docs
+# ------------------------------
+docs-html: ensure-docs-env
+	@echo "Building HTML documentation..."
+	$(PYTHON) -m sphinx -b html . _build/html
+
+# ------------------------------
+# Build docs for publication (fail on warnings)
+# ------------------------------
+docs-publish: ensure-docs-env
+	@echo "Building HTML documentation (fail on warnings)..."
+	$(PYTHON) -m sphinx --fail-on-warning -b html . _build/html
+
+# ------------------------------
+# Start live-reload server
+# ------------------------------
+docs-live: ensure-docs-env
+	@echo "Starting live-reload server..."
+	$(PYTHON) -m sphinx_autobuild . _build/html --port 8001
+	@echo ""
+	@echo "📝 To manually activate the docs environment in a shell:"
+	@echo "  $(ACTIVATE_CMD)"
+
+# ------------------------------
+# Clean built docs
+# ------------------------------
+docs-clean:
+	@echo "Cleaning built documentation..."
+	$(RM)
diff --git a/docs/about/algorithms/dapo.md b/docs/about/algorithms/dapo.md
new file mode 100644
index 0000000000..2a3b4a1aec
--- /dev/null
+++ b/docs/about/algorithms/dapo.md
@@ -0,0 +1,84 @@
+# DAPO
+
+[Dual-Clip Asymmetric Policy Optimization (DAPO)](https://arxiv.org/pdf/2503.14476) extends GRPO by allowing asymmetric clipping with distinct minimum and maximum clip parameters. This provides more fine-grained control over policy updates.
+
+DAPO is implemented through the same `ClippedPGLossFn` as GRPO, but with the ability to set different values for `ratio_clip_min` and `ratio_clip_max`. For standard GRPO/PPO, these parameters are set to the same value.
+
+## Key Differences from GRPO
+
+- **Asymmetric Clipping**: DAPO allows `ratio_clip_min` ≠ `ratio_clip_max`, providing asymmetric bounds on the probability ratio
+- **Same Infrastructure**: Uses the same training infrastructure and configurations as GRPO
+
+## DAPO Single Node
+
+To run DAPO on a single GPU, use the GRPO script with asymmetric clip parameters:
+
+```sh
+# Run DAPO with asymmetric clipping
+uv run python examples/run_grpo_math.py \
+  policy.model_name="Qwen/Qwen2.5-1.5B" \
+  grpo.ratio_clip_min=0.15 \
+  grpo.ratio_clip_max=0.25 \
+  checkpointing.checkpoint_dir="results/dapo_math" \
+  logger.wandb_enabled=True \
+  logger.wandb.name="dapo-math"
+```
+
+For multi-GPU setups:
+
+```sh
+uv run python examples/run_grpo_math.py \
+  cluster.gpus_per_node=8 \
+  grpo.ratio_clip_min=0.15 \
+  grpo.ratio_clip_max=0.25 \
+  checkpointing.checkpoint_dir="results/dapo_8gpu" \
+  logger.wandb_enabled=True \
+  logger.wandb.name="dapo-8gpu"
+```
+
+## DAPO Multi-node
+
+DAPO can be run on multiple nodes using the same approach as GRPO:
+
+```sh
+# Run from the root of NeMo RL repo
+NUM_ACTOR_NODES=2
+
+COMMAND="uv run ./examples/run_grpo_math.py \
+  --config examples/configs/grpo_math_8B.yaml \
+  cluster.num_nodes=2 \
+  grpo.ratio_clip_min=0.15 \
+  grpo.ratio_clip_max=0.25 \
+  checkpointing.checkpoint_dir='results/dapo_2nodes' \
+  logger.wandb_enabled=True \
+  logger.wandb.name='dapo-multinode'" \
+CONTAINER=YOUR_CONTAINER \
+MOUNTS="$PWD:$PWD" \
+sbatch \
+    --nodes=${NUM_ACTOR_NODES} \
+    --account=YOUR_ACCOUNT \
+    --job-name=YOUR_JOBNAME \
+    --partition=YOUR_PARTITION \
+    --time=4:0:0 \
+    --gres=gpu:8 \
+    ray.sub
+```
+
+## Configuration
+
+DAPO uses the same configuration structure as GRPO. The key parameters are:
+
+```yaml
+grpo:
+  ratio_clip_min: 0.15  # Minimum clip value (can be different from max)
+  ratio_clip_max: 0.25  # Maximum clip value (can be different from min)
+  # ... other GRPO parameters ...
+```
+
+For more details on other configuration options, refer to the [GRPO documentation](grpo.md).
+
+## Additional Resources
+
+- [DAPO Paper](https://arxiv.org/pdf/2503.14476)
+- [GRPO Documentation](grpo.md)
+- [Training Backends](../../design-docs/training-backends.md)
diff --git a/docs/about/algorithms/dpo.md b/docs/about/algorithms/dpo.md
new file mode 100644
index 0000000000..474170ceeb
--- /dev/null
+++ b/docs/about/algorithms/dpo.md
@@ -0,0 +1,58 @@
+# DPO
+
+We provide a sample DPO experiment that uses the [HelpSteer3 dataset](https://huggingface.co/datasets/nvidia/HelpSteer3) for preference-based training.
+
+## DPO Single Node
+
+The default DPO experiment is configured to run on a single GPU. To launch the experiment:
+
+```sh
+uv run python examples/run_dpo.py
+```
+
+This trains `Llama3.2-1B-Instruct` on 1 GPU.
+
+If you have access to more GPUs, you can update the experiment accordingly. To run on 8 GPUs, we update the cluster configuration and switch to an 8B Llama3.1 Instruct model:
+
+```sh
+uv run python examples/run_dpo.py \
+  policy.model_name="meta-llama/Llama-3.1-8B-Instruct" \
+  policy.train_global_batch_size=256 \
+  cluster.gpus_per_node=8
+```
+
+Any of the DPO parameters can be customized from the command line. For example:
+
+```sh
+uv run python examples/run_dpo.py \
+  dpo.sft_loss_weight=0.1 \
+  dpo.preference_average_log_probs=True \
+  checkpointing.checkpoint_dir="results/llama_dpo_sft" \
+  logger.wandb_enabled=True \
+  logger.wandb.name="llama-dpo-sft"
+```
+
+Refer to `examples/configs/dpo.yaml` for a full list of parameters that can be overridden. For an in-depth explanation of how to add your own DPO dataset, refer to the [DPO documentation](../../guides/dpo.md).
+
+## DPO Multi-node
+
+For distributed DPO training across multiple nodes, modify the following script for your use case:
+
+```sh
+# Run from the root of NeMo RL repo
+## number of nodes to use for your job
+NUM_ACTOR_NODES=2
+
+COMMAND="uv run ./examples/run_dpo.py --config examples/configs/dpo.yaml cluster.num_nodes=2 cluster.gpus_per_node=8 dpo.val_global_batch_size=32 checkpointing.checkpoint_dir='results/dpo_llama81_2nodes' logger.wandb_enabled=True logger.wandb.name='dpo-llama1b'" \
+CONTAINER=YOUR_CONTAINER \
+MOUNTS="$PWD:$PWD" \
+sbatch \
+    --nodes=${NUM_ACTOR_NODES} \
+    --account=YOUR_ACCOUNT \
+    --job-name=YOUR_JOBNAME \
+    --partition=YOUR_PARTITION \
+    --time=4:0:0 \
+    --gres=gpu:8 \
+    ray.sub
+```
+
diff --git a/docs/about/algorithms/grpo.md b/docs/about/algorithms/grpo.md
new file mode 100644
index 0000000000..ad79b944ac
--- /dev/null
+++ b/docs/about/algorithms/grpo.md
@@ -0,0 +1,102 @@
+# GRPO
+
+We provide a reference GRPO configuration for math benchmarks using the [OpenInstructMath2](https://huggingface.co/datasets/nvidia/OpenMathInstruct-2) dataset.
+
+You can read about the details of the GRPO implementation [here](../../guides/grpo.md).
+
+## GRPO Single Node
+
+To run GRPO on a single GPU for `Qwen/Qwen2.5-1.5B`:
+
+```sh
+# Run the GRPO math example using a 1B parameter model
+uv run python examples/run_grpo_math.py
+```
+
+By default, this uses the configuration in `examples/configs/grpo_math_1B.yaml`. You can customize parameters with command-line overrides. For example, to run on 8 GPUs:
+
+```sh
+# Run the GRPO math example using a 1B parameter model using 8 GPUs
+uv run python examples/run_grpo_math.py \
+  cluster.gpus_per_node=8
+```
+
+You can override any of the parameters listed in the YAML configuration file. For example:
+
+```sh
+uv run python examples/run_grpo_math.py \
+  policy.model_name="meta-llama/Llama-3.2-1B-Instruct" \
+  checkpointing.checkpoint_dir="results/llama1b_math" \
+  logger.wandb_enabled=True \
+  logger.wandb.name="grpo-llama1b_math" \
+  logger.num_val_samples_to_print=10
+```
+
+The default configuration uses the DTensor training backend. We also provide a config `examples/configs/grpo_math_1B_megatron.yaml` which is set up to use the Megatron backend out of the box.
+
+To train using this config on a single GPU:
+
+```sh
+# Run a GRPO math example on 1 GPU using the Megatron backend
+uv run python examples/run_grpo_math.py \
+  --config examples/configs/grpo_math_1B_megatron.yaml
+```
+
+For additional details on supported backends and how to configure the training backend to suit your setup, refer to the [Training Backends documentation](../../design-docs/training-backends.md).
+
+## GRPO Multi-node
+
+```sh
+# Run from the root of NeMo RL repo
+NUM_ACTOR_NODES=2
+
+# grpo_math_8b uses Llama-3.1-8B-Instruct model
+COMMAND="uv run ./examples/run_grpo_math.py --config examples/configs/grpo_math_8B.yaml cluster.num_nodes=2 checkpointing.checkpoint_dir='results/llama8b_2nodes' logger.wandb_enabled=True logger.wandb.name='grpo-llama8b_math'" \
+CONTAINER=YOUR_CONTAINER \
+MOUNTS="$PWD:$PWD" \
+sbatch \
+    --nodes=${NUM_ACTOR_NODES} \
+    --account=YOUR_ACCOUNT \
+    --job-name=YOUR_JOBNAME \
+    --partition=YOUR_PARTITION \
+    --time=4:0:0 \
+    --gres=gpu:8 \
+    ray.sub
+```
+
+The required `CONTAINER` can be built by following the instructions in the [Docker documentation](../../docker.md).
+
+## GRPO Qwen2.5-32B
+
+This section outlines how to run GRPO for Qwen2.5-32B with a 16k sequence length.
+
+```sh
+# Run from the root of NeMo RL repo
+NUM_ACTOR_NODES=32
+
+# Download Qwen before the job starts to avoid spending time downloading during the training loop
+HF_HOME=/path/to/hf_home huggingface-cli download Qwen/Qwen2.5-32B
+
+# Ensure HF_HOME is included in your MOUNTS
+HF_HOME=/path/to/hf_home \
+COMMAND="uv run ./examples/run_grpo_math.py --config examples/configs/grpo_math_8B.yaml policy.model_name='Qwen/Qwen2.5-32B' policy.generation.vllm_cfg.tensor_parallel_size=4 policy.max_total_sequence_length=16384 cluster.num_nodes=${NUM_ACTOR_NODES} policy.dtensor_cfg.enabled=True policy.dtensor_cfg.tensor_parallel_size=8 policy.dtensor_cfg.sequence_parallel=True policy.dtensor_cfg.activation_checkpointing=True checkpointing.checkpoint_dir='results/qwen2.5-32b' logger.wandb_enabled=True logger.wandb.name='qwen2.5-32b'" \
+CONTAINER=YOUR_CONTAINER \
+MOUNTS="$PWD:$PWD" \
+sbatch \
+    --nodes=${NUM_ACTOR_NODES} \
+    --account=YOUR_ACCOUNT \
+    --job-name=YOUR_JOBNAME \
+    --partition=YOUR_PARTITION \
+    --time=4:0:0 \
+    --gres=gpu:8 \
+    ray.sub
+```
+
+## GRPO Multi-Turn
+
+We also support multi-turn generation and training (tool use, games, etc.). Reference example for training to play a Sliding Puzzle Game:
+
+```sh
+uv run python examples/run_grpo_sliding_puzzle.py
+```
+
diff --git a/docs/about/algorithms/index.md b/docs/about/algorithms/index.md
new file mode 100644
index 0000000000..9f4bec628b
--- /dev/null
+++ b/docs/about/algorithms/index.md
@@ -0,0 +1,27 @@
+# Algorithms
+
+NeMo RL supports multiple training algorithms for post-training large language models.
+
+## Support Matrix
+
+| Algorithms | Single Node | Multi-node |
+|------------|-------------|------------|
+| [GRPO](grpo.md) | [GRPO Single Node](grpo.md#grpo-single-node) | [GRPO Multi-node](grpo.md#grpo-multi-node): [GRPO Qwen2.5-32B](grpo.md#grpo-qwen25-32b), [GRPO Multi-Turn](grpo.md#grpo-multi-turn) |
+|DAPO (dapo.md)| similar to GRPO example| similar to GRPO example|
+| [DAPO](dapo.md) | [DAPO Single Node](dapo.md#dapo-single-node) | [DAPO Multi-node](dapo.md#dapo-multi-node) |
+| [On-policy Distillation](on-policy-distillation.md) | [Distillation Single Node](on-policy-distillation.md#on-policy-distillation-single-node) | [Distillation Multi-node](on-policy-distillation.md#on-policy-distillation-multi-node) |
+| [Supervised Fine-Tuning (SFT)](sft.md) | [SFT Single Node](sft.md#sft-single-node) | [SFT Multi-node](sft.md#sft-multi-node) |
+| [DPO](dpo.md) | [DPO Single Node](dpo.md#dpo-single-node) | [DPO Multi-node](dpo.md#dpo-multi-node) |
+| [RM](rm.md) | [RM Single Node](rm.md#rm-single-node) | [RM Multi-node](rm.md#rm-multi-node) |
+On-policy distillation is also supported in the PyTorch DTensor path.
+```{toctree}
+:maxdepth: 2
+:hidden:
+
+grpo
+dapo
+on-policy-distillation
+sft
+dpo
+rm
+```
diff --git a/docs/about/algorithms/on-policy-distillation.md b/docs/about/algorithms/on-policy-distillation.md
new file mode 100644
index 0000000000..7c44945de9
--- /dev/null
+++ b/docs/about/algorithms/on-policy-distillation.md
@@ -0,0 +1,43 @@
+# On-policy Distillation
+
+We provide an example on-policy distillation experiment using the [DeepScaler dataset](https://huggingface.co/agentica-org/DeepScaleR-1.5B-Preview).
+
+> [!NOTE]
+> Distillation currently supports the DTensor and vLLM generation backend. Megatron generation/training paths are not supported yet.
+
+## On-policy Distillation Single Node
+
+To run on-policy distillation on a single GPU using `Qwen/Qwen3-1.7B-Base` as the student and `Qwen/Qwen3-4B` as the teacher:
+
+```sh
+uv run python examples/run_distillation_math.py
+```
+
+Customize parameters with command-line overrides. For example:
+
+```sh
+uv run python examples/run_distillation_math.py \
+  policy.model_name="Qwen/Qwen3-1.7B-Base" \
+  teacher.model_name="Qwen/Qwen3-4B" \
+  cluster.gpus_per_node=8
+```
+
+## On-policy Distillation Multi-node
+
+```sh
+# Run from the root of NeMo RL repo
+NUM_ACTOR_NODES=2
+
+COMMAND="uv run ./examples/run_distillation_math.py --config examples/configs/distillation_math.yaml cluster.num_nodes=2 cluster.gpus_per_node=8 checkpointing.checkpoint_dir='results/distill_2nodes' logger.wandb_enabled=True logger.wandb.name='distill-2nodes'" \
+CONTAINER=YOUR_CONTAINER \
+MOUNTS="$PWD:$PWD" \
+sbatch \
+    --nodes=${NUM_ACTOR_NODES} \
+    --account=YOUR_ACCOUNT \
+    --job-name=YOUR_JOBNAME \
+    --partition=YOUR_PARTITION \
+    --time=4:0:0 \
+    --gres=gpu:8 \
+    ray.sub
+```
+
diff --git a/docs/about/algorithms/rm.md b/docs/about/algorithms/rm.md
new file mode 100644
index 0000000000..741651567b
--- /dev/null
+++ b/docs/about/algorithms/rm.md
@@ -0,0 +1,44 @@
+# RM
+
+We provide a sample RM experiment that uses the [HelpSteer3 dataset](https://huggingface.co/datasets/nvidia/HelpSteer3) for preference-based training.
+
+## RM Single Node
+
+The default RM experiment is configured to run on a single GPU. To launch the experiment:
+
+```sh
+uv run python examples/run_rm.py
+```
+
+This trains a RM based on `meta-llama/Llama-3.2-1B-Instruct` on 1 GPU.
+
+If you have access to more GPUs, you can update the experiment accordingly. To run on 8 GPUs, we update the cluster configuration:
+
+```sh
+uv run python examples/run_rm.py cluster.gpus_per_node=8
+```
+
+Refer to the [RM documentation](../../guides/rm.md) for more information.
+
+## RM Multi-node
+
+For distributed RM training across multiple nodes, modify the following script for your use case:
+
+```sh
+# Run from the root of NeMo RL repo
+## number of nodes to use for your job
+NUM_ACTOR_NODES=2
+
+COMMAND="uv run ./examples/run_rm.py --config examples/configs/rm.yaml cluster.num_nodes=2 cluster.gpus_per_node=8 checkpointing.checkpoint_dir='results/rm_llama1b_2nodes' logger.wandb_enabled=True logger.wandb.name='rm-llama1b-2nodes'" \
+CONTAINER=YOUR_CONTAINER \
+MOUNTS="$PWD:$PWD" \
+sbatch \
+    --nodes=${NUM_ACTOR_NODES} \
+    --account=YOUR_ACCOUNT \
+    --job-name=YOUR_JOBNAME \
+    --partition=YOUR_PARTITION \
+    --time=4:0:0 \
+    --gres=gpu:8 \
+    ray.sub
+```
+
diff --git a/docs/about/algorithms/sft.md b/docs/about/algorithms/sft.md
new file mode 100644
index 0000000000..c36824deaa
--- /dev/null
+++ b/docs/about/algorithms/sft.md
@@ -0,0 +1,45 @@
+# Supervised Fine-Tuning (SFT)
+
+We provide example SFT experiments using various datasets including [SQuAD](https://rajpurkar.github.io/SQuAD-explorer/), OpenAI format datasets (with tool calling support), and custom JSONL datasets. For detailed documentation on supported datasets and configurations, see the [SFT documentation](../../guides/sft.md).
+
+## SFT Single Node
+
+The default SFT configuration is set to run on a single GPU. To start the experiment:
+
+```sh
+uv run python examples/run_sft.py
+```
+
+This fine-tunes the `Llama3.2-1B` model on the SQuAD dataset using a 1 GPU.
+
+To use multiple GPUs on a single node, you can modify the cluster configuration. This adjustment will also let you potentially increase the model and batch size:
+
+```sh
+uv run python examples/run_sft.py \
+  policy.model_name="meta-llama/Meta-Llama-3-8B" \
+  policy.train_global_batch_size=128 \
+  sft.val_global_batch_size=128 \
+  cluster.gpus_per_node=8
+```
+
+Refer to `examples/configs/sft.yaml` for a full list of parameters that can be overridden.
+
+## SFT Multi-node
+
+```sh
+# Run from the root of NeMo RL repo
+NUM_ACTOR_NODES=2
+
+COMMAND="uv run ./examples/run_sft.py --config examples/configs/sft.yaml cluster.num_nodes=2 cluster.gpus_per_node=8 checkpointing.checkpoint_dir='results/sft_llama8b_2nodes' logger.wandb_enabled=True logger.wandb.name='sft-llama8b'" \
+CONTAINER=YOUR_CONTAINER \
+MOUNTS="$PWD:$PWD" \
+sbatch \
+    --nodes=${NUM_ACTOR_NODES} \
+    --account=YOUR_ACCOUNT \
+    --job-name=YOUR_JOBNAME \
+    --partition=YOUR_PARTITION \
+    --time=4:0:0 \
+    --gres=gpu:8 \
+    ray.sub
+```
+
diff --git a/docs/about/backends.md b/docs/about/backends.md
new file mode 100644
index 0000000000..34fb8e3e95
--- /dev/null
+++ b/docs/about/backends.md
@@ -0,0 +1,20 @@
+# Training and Generation Backends
+
+## Training Backends
+
+NeMo RL supports multiple training backends to accommodate different model sizes and hardware configurations:
+
+- **PyTorch** - This leverages [NeMo AutoModel](https://github.com/NVIDIA-NeMo/Automodel) to provide accelerated PyTorch training with improved memory efficiency (PyTorch-native TP, SP, PP, CP, and FSDP2)
+- [**Megatron**](https://github.com/NVIDIA-NeMo/Megatron-Bridge) - NVIDIA's high-performance training framework for scaling to large models with 6D parallelisms
+
+The training backend is automatically determined based on your YAML configuration settings. For detailed information on backend selection, configuration, and examples, see the [Training Backends documentation](../design-docs/training-backends.md).
+
+## Generation Backends
+
+NeMo RL supports multiple generation/rollout backends to accommodate different model sizes and hardware configurations:
+
+- [**vLLM**](https://github.com/vllm-project/vllm) - A high-throughput and memory-efficient popular inference and serving engine
+- [**Megatron**](https://github.com/NVIDIA/Megatron-LM/tree/main/megatron/core/inference) - A high-performance Megatron-native inference backend which eliminates weight conversion between training and inference
+
+For detailed information on backend selection, configuration, and examples, see the [Generation Backends documentation](../design-docs/generation.md).
+
diff --git a/docs/about/clusters.md b/docs/about/clusters.md
new file mode 100644
index 0000000000..cfb6041d87
--- /dev/null
+++ b/docs/about/clusters.md
@@ -0,0 +1,4 @@
+# Installation: Set Up Clusters
+
+For detailed instructions on how to set up and launch NeMo RL on Slurm or Kubernetes clusters, please refer to the dedicated [Cluster Start](../cluster.md) documentation.
+
diff --git a/docs/about/evaluation.md b/docs/about/evaluation.md
new file mode 100644
index 0000000000..9bd992a46b
--- /dev/null
+++ b/docs/about/evaluation.md
@@ -0,0 +1,60 @@
+# Evaluation
+
+We provide evaluation tools to assess model capabilities.
+
+## Convert Model Format (Optional)
+
+If you have trained a model and saved the checkpoint in the PyTorch DCP format, you first need to convert it to the Hugging Face format before running evaluation:
+
+```sh
+# Example for a GRPO checkpoint at step 170
+uv run python examples/converters/convert_dcp_to_hf.py \
+    --config results/grpo/step_170/config.yaml \
+    --dcp-ckpt-path results/grpo/step_170/policy/weights/ \
+    --hf-ckpt-path results/grpo/hf
+```
+
+If you have a model saved in Megatron format, you can use the following command to convert it to Hugging Face format prior to running evaluation. This script requires Megatron Core, so make sure you launch with the mcore extra:
+
+```sh
+# Example for a GRPO checkpoint at step 170
+uv run --extra mcore python examples/converters/convert_megatron_to_hf.py \
+    --config results/grpo/step_170/config.yaml \
+    --megatron-ckpt-path results/grpo/step_170/policy/weights/iter_0000000 \
+    --hf-ckpt-path results/grpo/hf
+```
+
+> [!NOTE]
+> Adjust the paths according to your training output directory structure.
+
+For an in-depth explanation of checkpointing, refer to the [Checkpointing documentation](../design-docs/checkpointing.md).
+
+## Run Evaluation
+
+Run the evaluation script with the converted model:
+
+```sh
+uv run python examples/run_eval.py generation.model_name=$PWD/results/grpo/hf
+```
+
+Run the evaluation script with custom settings:
+
+```sh
+# Example: Evaluation of DeepScaleR-1.5B-Preview on MATH-500 using 8 GPUs
+#          Pass@1 accuracy averaged over 16 samples for each problem
+uv run python examples/run_eval.py \
+    --config examples/configs/evals/math_eval.yaml \
+    generation.model_name=agentica-org/DeepScaleR-1.5B-Preview \
+    generation.temperature=0.6 \
+    generation.top_p=0.95 \
+    generation.vllm_cfg.max_model_len=32768 \
+    data.dataset_name=math500 \
+    eval.num_tests_per_prompt=16 \
+    cluster.gpus_per_node=8
+```
+
+> [!NOTE]
+> Evaluation results may vary slightly due to various factors, such as sampling parameters, random seed, inference engine version, and inference engine settings.
+
+Refer to `examples/configs/evals/eval.yaml` for a full list of parameters that can be overridden. For an in-depth explanation of evaluation, refer to the [Evaluation documentation](../guides/eval.md).
+
diff --git a/docs/about/features.md b/docs/about/features.md
new file mode 100644
index 0000000000..147b9b7aee
--- /dev/null
+++ b/docs/about/features.md
@@ -0,0 +1,32 @@
+# Features and Roadmap
+
+_Available now_ | _Coming in v0.4_
+
+## Coming in v0.4
+
+- **Megatron Inference** - Megatron Inference for fast Day-0 support for new Megatron models (avoid weight conversion)
+- **Async RL** - Support for asynchronous rollouts and replay buffers for off-policy training, and enable a fully asynchronous GRPO
+- **Vision Language Models (VLM)** - Support SFT and GRPO on VLMs through the DTensor path
+- **Improved Native Performance** - Improve training time for native PyTorch models
+- **Improved Large MoE Performance** - Improve Megatron Core training performance and generation performance
+- **End-to-End FP8 Low-Precision Training** - Support for Megatron Core FP8 training and FP8 vLLM generation
+- **Megatron Bridge Integration** - Integrate Megatron Bridge to enable training features from Megatron Core
+- **NeMo Automodel Integration** - Integrate NeMo Automodel to power the DTensor path
+- **New Models** - `gpt-oss`
+- **Expand Algorithms** - DAPO, GSPO, On-policy Distillation
+- **GB200** - Add container support for GB200
+
+## Available Now
+
+- **Distributed Training** - Ray-based infrastructure
+- **Environment Support and Isolation** - Support for multi-environment training and dependency isolation between components
+- **Worker Isolation** - Process isolation between RL Actors (no worries about global state)
+- **Learning Algorithms** - GRPO/GSPO, SFT, and DPO
+- **Multi-Turn RL** - Multi-turn generation and training for RL with tool use, games, etc
+- **Advanced Parallelism with DTensor** - PyTorch FSDP2, TP, CP, and SP for efficient training
+- **Larger Model Support with Longer Sequences** - Performant parallelisms with Megatron Core (TP/PP/CP/SP/EP/FSDP)
+- **MoE Models** - Support for DeepSeekV3 and Qwen-3 MoE models (Megatron)
+- **Sequence Packing** - Sequence packing in both DTensor and Megatron Core for huge training performance gains
+- **Fast Generation** - vLLM backend for optimized inference
+- **Hugging Face Integration** - Works with 1B–70B models (Qwen, Llama)
+
diff --git a/docs/about/installation.md b/docs/about/installation.md
new file mode 100644
index 0000000000..4b7c9ba89b
--- /dev/null
+++ b/docs/about/installation.md
@@ -0,0 +1,92 @@
+# Installation and Prerequisites
+
+## Clone the Repository
+
+Clone **NeMo RL** with submodules:
+
+```sh
+git clone git@github.com:NVIDIA-NeMo/RL.git nemo-rl --recursive
+cd nemo-rl
+
+# If you are already cloned without the recursive option, you can initialize the submodules recursively
+git submodule update --init --recursive
+
+# Different branches of the repo can have different pinned versions of these third-party submodules. Ensure
+# submodules are automatically updated after switching branches or pulling updates by configuring git with:
+# git config submodule.recurse true
+
+# **NOTE**: this setting will not download **new** or remove **old** submodules with the branch's changes.
+# You will have to run the full `git submodule update --init --recursive` command in these situations.
+```
+
+## Install System Dependencies
+
+### cuDNN (For Megatron Backend)
+
+If you are using the Megatron backend on bare metal (outside of a container), you may need to install the cuDNN headers. Here is how you check and install them:
+
+```sh
+# Check if you have libcudnn installed
+dpkg -l | grep cudnn.*cuda
+
+# Find the version you need here: https://developer.nvidia.com/cudnn-downloads?target_os=Linux&target_arch=x86_64&Distribution=Ubuntu&target_version=20.04&target_type=deb_network
+# As an example, these are the "Linux Ubuntu 20.04 x86_64" instructions
+wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.1-1_all.deb
+sudo dpkg -i cuda-keyring_1.1-1_all.deb
+sudo apt update
+sudo apt install cudnn  # Will install cuDNN meta packages which points to the latest versions
+# sudo apt install cudnn9-cuda-12  # Will install cuDNN version 9.x.x compiled for cuda 12.x
+# sudo apt install cudnn9-cuda-12-8  # Will install cuDNN version 9.x.x compiled for cuda 12.8
+```
+
+### libibverbs (For vLLM Dependencies)
+
+If you encounter problems when installing vllm's dependency `deepspeed` on bare-metal (outside of a container), you may need to install `libibverbs-dev`:
+
+```sh
+sudo apt-get update
+sudo apt-get install libibverbs-dev
+```
+
+## Install UV Package Manager
+
+For faster setup and environment isolation, we use [uv](https://docs.astral.sh/uv/).
+
+Follow [these instructions](https://docs.astral.sh/uv/getting-started/installation/) to install uv.
+
+Quick install:
+```sh
+curl -LsSf https://astral.sh/uv/install.sh | sh
+```
+
+## Create Virtual Environment
+
+Initialize the NeMo RL project virtual environment:
+
+```sh
+uv venv
+```
+
+> [!NOTE]
+> Please do not use `-p/--python` and instead allow `uv venv` to read it from `.python-version`.
+> This ensures that the version of python used is always what we prescribe.
+
+## Using UV to Run Commands
+
+Use `uv run` to launch all commands. It handles pip installing implicitly and ensures your environment is up to date with our lock file.
+
+```sh
+# Example: Run GRPO with DTensor backend
+uv run python examples/run_grpo_math.py
+
+# Example: Run GRPO with Megatron backend
+uv run python examples/run_grpo_math.py --config examples/configs/grpo_math_1B_megatron.yaml
+```
+
+> [!NOTE]
+> - It is not recommended to activate the `venv`, and you should use `uv run <command>` instead to execute scripts within the managed environment.
+>   This ensures consistent environment usage across different shells and sessions.
+> - Ensure your system has the appropriate CUDA drivers installed, and that your PyTorch version is compatible with both your CUDA setup and hardware.
+> - If you update your environment in `pyproject.toml`, it is necessary to force a rebuild of the virtual environments by setting `NRL_FORCE_REBUILD_VENVS=true` next time you launch a run.
+> - **Reminder**: Don't forget to set your `HF_HOME`, `WANDB_API_KEY`, and `HF_DATASETS_CACHE` (if needed). You'll need to do a `huggingface-cli login` as well for Llama models.
+
diff --git a/docs/about/overview.md b/docs/about/overview.md
new file mode 100644
index 0000000000..b0eda113cc
--- /dev/null
+++ b/docs/about/overview.md
@@ -0,0 +1,18 @@
+# Overview
+
+**NeMo RL** is an open-source post-training library within the [NeMo Framework](https://github.com/NVIDIA-NeMo), designed to streamline and scale reinforcement learning methods for multimodal models (LLMs, VLMs, etc.). Designed for flexibility, reproducibility, and scale, NeMo RL enables both small-scale experiments and massive multi-GPU, multi-node deployments for fast experimentation in research and production environments.
+
+## What You Can Expect
+
+- **Flexibility** with a modular design that allows easy integration and customization.
+- **Efficient resource management using Ray**, enabling scalable and flexible deployment across different hardware configurations.
+- **Hackable** with native PyTorch-only paths for quick research prototypes.
+- **High performance with Megatron Core**, supporting various parallelism techniques for large models and large context lengths.
+- **Seamless integration with Hugging Face** for ease of use, allowing users to leverage a wide range of pre-trained models and tools.
+- **Comprehensive documentation** that is both detailed and user-friendly, with practical examples.
+
+For more details on the architecture and design philosophy, see the [design documents](../design-docs/design-and-philosophy.md).
+
+## Releases
+
+For a complete list of releases and detailed changelogs, visit the [GitHub Releases page](https://github.com/NVIDIA-NeMo/RL/releases).
diff --git a/docs/about/quick-start.md b/docs/about/quick-start.md
new file mode 100644
index 0000000000..2cc0849006
--- /dev/null
+++ b/docs/about/quick-start.md
@@ -0,0 +1,40 @@
+# Quick Start
+
+Use this quick start to get going with either the native PyTorch DTensor or Megatron Core training backends.
+
+> [!NOTE]
+> Both training backends are independent — you can install and use either one on its own.
+
+For more examples and setup details, continue to the [Prerequisites](installation.md) section.
+
+## Quick Start Options
+
+| Native PyTorch (DTensor) | Megatron Core |
+|--------------------------|---------------|
+| **Clone and create the environment** | |
+
+```sh
+git clone git@github.com:NVIDIA-NeMo/RL.git nemo-rl
+cd nemo-rl
+git submodule update --init --recursive
+uv venv
+```
+
+> [!NOTE]
+> If you previously ran without checking out the submodules, you may need to rebuild virtual environments by setting `NRL_FORCE_REBUILD_VENVS=true`. See [Tips and Tricks](tips-and-tricks.md).
+
+| Native PyTorch (DTensor) | Megatron Core |
+|--------------------------|---------------|
+| **Run GRPO (DTensor)** | **Run GRPO (Megatron)** |
+
+```sh
+# DTensor
+uv run python examples/run_grpo_math.py
+```
+
+```sh
+# Megatron
+uv run examples/run_grpo_math.py \
+  --config examples/configs/grpo_math_1B_megatron.yaml
+```
+
diff --git a/docs/about/tips-and-tricks.md b/docs/about/tips-and-tricks.md
new file mode 100644
index 0000000000..60f91a6e0d
--- /dev/null
+++ b/docs/about/tips-and-tricks.md
@@ -0,0 +1,43 @@
+# Tips and Tricks
+
+## Missing Submodules Error
+
+If you forget to initialize the NeMo and Megatron submodules when cloning the NeMo-RL repository, you may run into an error like this:
+
+```sh
+ModuleNotFoundError: No module named 'megatron'
+```
+
+If you see this error, there is likely an issue with your virtual environments. To fix this, first initialize the submodules:
+
+```sh
+git submodule update --init --recursive
+```
+
+and then force a rebuild of the virtual environments by setting `NRL_FORCE_REBUILD_VENVS=true` next time you launch a run:
+
+```sh
+NRL_FORCE_REBUILD_VENVS=true uv run examples/run_grpo.py ...
+```
+
+## Memory Fragmentation
+
+Large amounts of memory fragmentation might occur when running models without support for FlashAttention2. If OOM occurs after a few iterations of training, it may help to tweak the allocator settings to reduce memory fragmentation. To do so, specify [`max_split_size_mb`](https://docs.pytorch.org/docs/stable/notes/cuda.html#optimizing-memory-usage-with-pytorch-cuda-alloc-conf) at **either** one of the following places:
+
+1. Launch training with:
+
+```sh
+# This will globally apply to all Ray actors
+PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:64 uv run python examples/run_dpo.py ...
+```
+
+2. Make the change more permanently by adding this flag in the training configuration:
+
+```yaml
+policy:
+  # ...
+  dtensor_cfg:
+    env_vars:
+      PYTORCH_CUDA_ALLOC_CONF: "max_split_size_mb:64"
+```
+
diff --git a/docs/adding-new-models.md b/docs/adding-new-models.md
index e0de97ae40..c9951acdc7 100644
--- a/docs/adding-new-models.md
+++ b/docs/adding-new-models.md
@@ -152,3 +152,163 @@ uv run --extra vllm tools/model_diagnostics/2.long_generation_decode_vs_prefill.
 # ...
 # [Qwen/Qwen2.5-1.5B] ALL GOOD!
 ```
+
+## [3.check_hf_model_embeddings_untrained.py](https://github.com/NVIDIA-NeMo/RL/blob/main/tools/model_diagnostics/3.check_hf_model_embeddings_untrained.py)
+
+Detects untrained or improperly initialized Hugging Face model embeddings by scanning for near-zero rows and rows with near-identical values in both input and output embeddings. The script also reports whether word embeddings are tied and summarizes basic statistics.
+
+```sh
+# Example run
+uv run --extra mcore tools/model_diagnostics/3.check_hf_model_embeddings_untrained.py --model nvidia/Nemotron-H-8B-Base-8K
+
+# ....
+#================================================================================
+#EMBEDDING SUMMARIES
+#================================================================================
+#
+#--- Input Embeddings Summary ---
+#Shape: torch.Size([131072, 4096]), Dtype: torch.bfloat16
+#Near-zero embeddings (abs < 1.00e-10): 1039/131072 (0.8%)
+#  Indices: 0-1,3-999,1192-1193,1245-1255,55014,77579,81772,81819,82312,82500,82725,82737,82977,84020,84121,84521,84794,85015,86409,87411,89412,90320,91368,94485,96385,104097,108262,112147,112327,112497,114755
+#Identical embeddings (std < 1.00e-08): 1041/131072 (0.8%)
+#  Indices: 0-1,3-999,1192-1193,1245-1255,55014,77579,81772,81819,82312,82500,82725,82737,82977,83855,84020,84121,84521,84794,85015,86409,87411,89412,90320,91368,94485,96385,101707,104097,108262,112147,112327,112497,114755
+#Statistics: mean_abs=0.007874, max_abs=0.196289, std_range=[0.000000, 0.015442]
+#⚠️  POTENTIAL ISSUES: 1039 near-zero embeddings, 1041 identical embeddings
+#
+#--- Output Embeddings Summary (Tied: False) ---
+#Shape: torch.Size([131072, 4096]), Dtype: torch.bfloat16
+#Near-zero embeddings (abs < 1.00e-10): 0/131072 (0.0%)
+#Identical embeddings (std < 1.00e-08): 0/131072 (0.0%)
+#Statistics: mean_abs=0.006775, max_abs=0.200195, std_range=[0.004089, 0.021240]
+#✅ No obvious untrained patterns detected
+#
+#=== Final Summary ===
+#Model: nvidia/Nemotron-H-8B-Base-8K
+#Analysis complete.
+```
+
+- Thresholds can be adjusted via flags:
+  - `--near-zero-threshold` (default: `1e-10`)
+  - `--identical-threshold` (default: `1e-8`)
+- If any near-zero or identical rows are reported, the model may have issues of numerical instability (e.g., inf grad norms) during post-training if any of these problematic tokens are encountered. We have observed this happening when special tokens are reserved in the tokenizer and embedding, but none are encountered during pre-training. It may help to initialize these embeddings similar to how they were initialize during pre-training.
+
+## [4.vllm_precision_compilation_test.py](https://github.com/NVIDIA-NeMo/RL/blob/main/tools/model_diagnostics/4.vllm_precision_compilation_test.py)
+
+Tests vLLM precision compilation by comparing log probabilities across different compilation modes and configurations. This script helps diagnose numerical precision issues that commonly arise when using different vLLM compilation settings. **Note that this is not a strict pass/fail test** - it's designed to help you understand and investigate numerical discrepancies.
+
+```sh
+# Example run
+uv run --extra vllm tools/model_diagnostics/4.vllm_precision_compilation_test.py --model deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
+
+# Typical output shows mixed results:
+# Eager and cuda graph mode lps: FAILED - Arrays are different
+...
+# Eager and cuda graph mode lps with torch inductor precision flag: FAILED - Arrays are different  
+...
+# Eager and cuda graph mode lps with use_inductor disabled: PASSED - Arrays are close within tolerance (atol=0.001, rtol=0.001)
+```
+
+See example for model `deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B`
+```
+====================================================================================================
+Eager and cuda graph mode lps (prompt lps): FAILED - Arrays are different
+  Detailed error: 
+Not equal to tolerance rtol=0.001, atol=0.001
+
+Mismatched elements: 96 / 515 (18.6%)
+Max absolute difference among violations: 0.3885002
+Max relative difference among violations: 0.20179409
+ ACTUAL: array([[-1.424489e+01, -3.924684e-01, -3.135911e+00, -4.258007e-01,
+        -3.443364e-04,           nan,           nan,           nan,
+                  nan,           nan,           nan,           nan,...
+ DESIRED: array([[-1.420929e+01, -3.619126e-01, -3.241854e+00, -4.308376e-01,
+        -3.047717e-04,           nan,           nan,           nan,
+                  nan,           nan,           nan,           nan,...
+====================================================================================================
+====================================================================================================
+Eager and cuda graph mode lps (generation lps): FAILED - Arrays are different
+  Detailed error: 
+Not equal to tolerance rtol=0.001, atol=0.001
+
+nan location mismatch:
+ ACTUAL: array([[-1.231834e+01, -1.411233e-01, -3.764260e-01, ...,           nan,
+                  nan,           nan],
+       [-8.567932e+00, -1.066314e+01, -4.463661e-01, ...,           nan,...
+ DESIRED: array([[-1.226752e+01, -1.508305e-01, -4.024158e-01, ...,           nan,
+                  nan,           nan],
+       [-8.610202e+00, -1.067061e+01, -4.593382e-01, ..., -1.060957e-05,...
+====================================================================================================
+...
+====================================================================================================
+Eager and cuda graph mode lps with torch inductor precision flag (prompt lps): FAILED - Arrays are different
+  Detailed error: 
+Not equal to tolerance rtol=0.001, atol=0.001
+
+Mismatched elements: 96 / 515 (18.6%)
+Max absolute difference among violations: 0.3885002
+Max relative difference among violations: 0.20179409
+ ACTUAL: array([[-1.424489e+01, -3.924684e-01, -3.135911e+00, -4.258007e-01,
+        -3.443364e-04,           nan,           nan,           nan,
+                  nan,           nan,           nan,           nan,...
+ DESIRED: array([[-1.420929e+01, -3.619126e-01, -3.241854e+00, -4.308376e-01,
+        -3.047717e-04,           nan,           nan,           nan,
+                  nan,           nan,           nan,           nan,...
+====================================================================================================
+====================================================================================================
+Eager and cuda graph mode lps with torch inductor precision flag (generation lps): FAILED - Arrays are different
+  Detailed error: 
+Not equal to tolerance rtol=0.001, atol=0.001
+
+nan location mismatch:
+ ACTUAL: array([[-1.231834e+01, -1.411233e-01, -3.764260e-01, ...,           nan,
+                  nan,           nan],
+       [-8.567932e+00, -1.066314e+01, -4.463661e-01, ...,           nan,...
+ DESIRED: array([[-1.226752e+01, -1.508305e-01, -4.024158e-01, ...,           nan,
+                  nan,           nan],
+       [-8.610202e+00, -1.067061e+01, -4.593382e-01, ..., -1.060957e-05,...
+====================================================================================================
+...
+Eager and cuda graph mode lps with use_inductor disabled (prompt lps): PASSED - Arrays are close within tolerance (atol=0.001, rtol=0.001)
+Eager and cuda graph mode lps with use_inductor disabled (generation lps): PASSED - Arrays are close within tolerance (atol=0.001, rtol=0.001)
+```
+
+**What this script tests:**
+
+The script is to compare both prompt and generation logprobs under the following setups:
+
+1. **Eager vs CUDA Graph Mode**: Compares log probabilities between eager execution (ground truth) and CUDA graph compilation mode
+   - **⚠️ Commonly fails**: This comparison often shows discrepancies due to compilation optimizations
+2. **Torch Inductor Precision**: Tests with `TORCHINDUCTOR_EMULATE_PRECISION_CASTS=1` environment variable
+   - **⚠️ May help**: This flag may help but typically doesn't resolve all the numerical differences
+3. **Inductor Disabled**: Verifies that disabling Torch Inductor compilation (`use_inductor=False`) maintains output consistency
+   - **✅ Usually works well**: This configuration often produces results very close to eager mode
+   - **Note**: `use_inductor=False` disables Inductor compilation but keeps CUDA graph capture active for compatible operations
+
+**Performance vs Accuracy Trade-offs:**
+
+The different compilation modes offer distinct trade-offs between accuracy and performance:
+
+- **Eager Mode** (`enforce_eager=True`): Highest accuracy (ground truth) but slowest execution
+- **CUDA Graph Mode with Inductor Disabled** (`enforce_eager=False` and `compilation_config={"use_inductor": False}`): Near-eager accuracy with significant speedup from CUDA graph optimization
+- **CUDA Graph Mode with Inductor Enabled** (`enforce_eager=False` and `compilation_config={"use_inductor": True}`): Potentially fastest execution with custom Triton kernels (since Triton is the current backend of Inductor), but may introduce numerical differences. For accuracy improvement, try the torch inductor precision flag: `export TORCHINDUCTOR_EMULATE_PRECISION_CASTS=1`
+
+**Note**: Performance characteristics vary by model. For example, `deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B` shows similar speed performance between `use_inductor=True` and `use_inductor=False`, making the accuracy-preserving option preferable.
+
+**Why this matters:**
+
+- **Debugging**: Helps identify which compilation settings cause numerical differences
+- **Configuration**: Shows which settings work best for your model
+- **Understanding**: Reveals how compilation affects model outputs
+
+**When to use:**
+
+- **Model integration** - understand numerical behavior across vLLM configurations
+- **Debugging** - investigate differences between development and production
+- **Research** - study compilation strategy impacts on precision
+
+**Interpreting results:**
+
+- **Eager vs CUDA Graph failures are normal** - don't panic if this fails
+- **Focus on patterns** - some models are more sensitive than others
+- **Use as guidance** - helps choose reliable compilation settings
+- **Balance precision vs performance** - choose what works for your use case
\ No newline at end of file
diff --git a/docs/assets/RL_diagram.png b/docs/assets/RL_diagram.png
new file mode 100644
index 0000000000..7a47b5fa06
Binary files /dev/null and b/docs/assets/RL_diagram.png differ
diff --git a/docs/assets/dapo_train_reward.png b/docs/assets/dapo_train_reward.png
new file mode 100644
index 0000000000..efe8dda10b
Binary files /dev/null and b/docs/assets/dapo_train_reward.png differ
diff --git a/docs/assets/dapo_val_acc.png b/docs/assets/dapo_val_acc.png
new file mode 100644
index 0000000000..8b1c5ddba9
Binary files /dev/null and b/docs/assets/dapo_val_acc.png differ
diff --git a/docs/assets/fp8_curves.png b/docs/assets/fp8_curves.png
new file mode 100644
index 0000000000..1825877a9e
Binary files /dev/null and b/docs/assets/fp8_curves.png differ
diff --git a/docs/assets/fp8_e2e_curve.png b/docs/assets/fp8_e2e_curve.png
new file mode 100644
index 0000000000..d479602102
Binary files /dev/null and b/docs/assets/fp8_e2e_curve.png differ
diff --git a/docs/assets/nsys-multi-report-view.png b/docs/assets/nsys-multi-report-view.png
new file mode 100644
index 0000000000..4eac23c40b
Binary files /dev/null and b/docs/assets/nsys-multi-report-view.png differ
diff --git a/docs/assets/train-reward-sliding-puzzle.png b/docs/assets/train-reward-sliding-puzzle.png
new file mode 100644
index 0000000000..82d319f4f2
Binary files /dev/null and b/docs/assets/train-reward-sliding-puzzle.png differ
diff --git a/docs/assets/valid_acc-sliding-puzzle.png b/docs/assets/valid_acc-sliding-puzzle.png
new file mode 100644
index 0000000000..7b6d539916
Binary files /dev/null and b/docs/assets/valid_acc-sliding-puzzle.png differ
diff --git a/docs/cluster.md b/docs/cluster.md
index 2327b813c1..73e2225a1b 100644
--- a/docs/cluster.md
+++ b/docs/cluster.md
@@ -25,9 +25,8 @@ sbatch \
     ray.sub
 ```
 
-:::{tip}
-Depending on your Slurm cluster configuration, you may or may not need to include the `--gres=gpu:8` option in the `sbatch` command.
-:::
+> [!TIP]
+> Depending on your Slurm cluster configuration, you may or may not need to include the `--gres=gpu:8` option in the `sbatch` command.
 
 Upon successful submission, Slurm will print the `SLURM_JOB_ID`:
 ```text
@@ -40,9 +39,8 @@ tail -f 1980204-logs/ray-driver.log
 
 ### Interactive Launching
 
-:::{tip}
-A key advantage of running interactively on the head node is the ability to execute multiple multi-node jobs without needing to requeue in the Slurm job queue. This means that during debugging sessions, you can avoid submitting a new `sbatch` command each time. Instead, you can debug and re-submit your NeMo RL job directly from the interactive session.
-:::
+> [!TIP]
+> A key advantage of running interactively on the head node is the ability to execute multiple multi-node jobs without needing to requeue in the Slurm job queue. This means that during debugging sessions, you can avoid submitting a new `sbatch` command each time. Instead, you can debug and re-submit your NeMo RL job directly from the interactive session.
 
 To run interactively, launch the same command as [Batched Job Submission](#batched-job-submission), but omit the `COMMAND` line:
 ```sh
@@ -111,14 +109,13 @@ sbatch ray.sub \
   - Sets the cache dir for downloaded Huggingface datasets.
 ``````
 
-:::{tip}
-When `HF_TOKEN`, `WANDB_API_KEY`, `HF_HOME`, and `HF_DATASETS_CACHE` are set in your shell environment using `export`, they are automatically passed to `ray.sub`. For instance, if you set:
-
-```sh
-export HF_TOKEN=XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
-```
-this token will be available to your NeMo RL run. Consider adding these exports to your shell configuration file, such as `~/.bashrc`.
-:::
+> [!TIP]
+> When `HF_TOKEN`, `WANDB_API_KEY`, `HF_HOME`, and `HF_DATASETS_CACHE` are set in your shell environment using `export`, they are automatically passed to `ray.sub`. For instance, if you set:
+>
+> ```sh
+> export HF_TOKEN=XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+> ```
+> this token will be available to your NeMo RL run. Consider adding these exports to your shell configuration file, such as `~/.bashrc`.
 
 #### Advanced Environment Configuration
 ``````{list-table}
@@ -170,10 +167,9 @@ this token will be available to your NeMo RL run. Consider adding these exports
   - Maximum port in the range for Ray worker processes.
 ``````
 
-:::{note}
-For the most part, you will not need to change ports unless these
-are already taken by some other service backgrounded on your cluster.
-:::
+> [!NOTE]
+> For the most part, you will not need to change ports unless these
+> are already taken by some other service backgrounded on your cluster.
 
 ## Kubernetes
 
diff --git a/docs/conf.py b/docs/conf.py
index 60bcecf32f..4e4b34630d 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -22,6 +22,15 @@
 
 import os
 import sys
+import urllib.parse
+from pathlib import Path
+from typing import Any
+
+import git
+from docutils import nodes
+from docutils.transforms import Transform
+from sphinx import addnodes
+from sphinx.application import Sphinx
 
 project = "NeMo-RL"
 copyright = "2025, NVIDIA Corporation"
@@ -33,15 +42,18 @@
 
 extensions = [
     "myst_parser",  # For our markdown docs
-    "autodoc2",  # Generates API docs
+    "autodoc2",  # For automatic API documentation generation
     "sphinx.ext.viewcode",  # For adding a link to view source code in docs
     "sphinx.ext.doctest",  # Allows testing in docstrings
     "sphinx.ext.napoleon",  # For google style docstrings
-    "sphinx_copybutton",  # For copy button in code blocks
+    "sphinx_copybutton",  # For copy button in code blocks,
+    "sphinx_design",  # For grid layout
+    "sphinx.ext.ifconfig",  # For conditional content
+    "sphinxcontrib.mermaid",  # For Mermaid diagrams
 ]
 
 templates_path = ["_templates"]
-exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", ".venv"]
 
 # -- Options for MyST Parser (Markdown) --------------------------------------
 # MyST Parser settings
@@ -98,3 +110,150 @@
     },
 }
 html_extra_path = ["project.json", "versions1.json"]
+
+
+# Github links are now getting rate limited from the Github Actions
+linkcheck_ignore = [
+    ".*github\\.com.*",
+    ".*githubusercontent\\.com.*",
+]
+
+
+def _convert_gh_admonitions_inplace(contents: list[str]) -> None:
+    """Mutate contents to convert GitHub blockquote admonitions to MyST.
+
+    https://github.com/executablebooks/MyST-Parser/issues/845
+    """
+    _github_admonitions = {
+        "> [!NOTE]": "note",
+        "> [!TIP]": "tip",
+        "> [!IMPORTANT]": "important",
+        "> [!WARNING]": "warning",
+        "> [!CAUTION]": "caution",
+    }
+    # Use 8 backticks for admonition fences to allow code blocks with 3 or 6 backticks inside
+    FENCE = "````````"
+    for i, orig_content in enumerate(contents):
+        orig_line_splits = orig_content.split("\n")
+        replacing = False
+        for j, line in enumerate(orig_line_splits):
+            line_roi = line.lstrip()
+            for admonition_key, admonition_name in _github_admonitions.items():
+                if line_roi.startswith(admonition_key):
+                    replacement = f"{FENCE}{{{admonition_name}}}"
+                    if replacing:
+                        # Close previous fence before starting new admonition
+                        # Add blank line between admonitions for proper MyST parsing
+                        line = (
+                            f"{FENCE}\n\n{line.replace(admonition_key, replacement, 1)}"
+                        )
+                    else:
+                        line = line.replace(admonition_key, replacement, 1)
+                    replacing = True
+                    break
+            else:
+                if not replacing:
+                    continue
+                if line_roi.startswith("> "):
+                    line = line.replace("> ", "", 1)
+                elif line_roi.rstrip() == ">":
+                    line = line.replace(">", "", 1)
+                else:
+                    line = f"{FENCE}\n{line}"
+                    replacing = False
+            orig_line_splits[j] = line
+        if replacing:
+            orig_line_splits.append(FENCE)
+            replacing = False
+        contents[i] = "\n".join(orig_line_splits)
+
+
+def _convert_gh_admonitions(
+    _app: Sphinx, _relative_path: Path, _parent_docname: str, contents: list[str]
+) -> None:
+    _convert_gh_admonitions_inplace(contents)
+
+
+def _convert_gh_admonitions_source(
+    _app: Sphinx, _docname: str, source: list[str]
+) -> None:
+    # Sphinx "source-read" event
+    _convert_gh_admonitions_inplace(source)
+
+
+class _GitHubLinkTransform(Transform):
+    """Converting the relative path to a file in a Markdown to the URL of that file on GitHub."""
+
+    default_priority = 500  # type: ignore[bad-override]
+
+    @staticmethod
+    def _get_github_source_url(repo: git.Repo) -> str:
+        # Find out which remote GitHub repo should be the source.
+        if "origin" in repo.remotes:
+            url = repo.remotes.origin.url
+        elif len(repo.remotes) == 1:
+            url = repo.remotes[0].url
+        else:
+            raise ValueError(
+                "Cannot determine which remote repo on GitHub this local repo is from."
+            )
+        # Canonicalize the URL.
+        if url.startswith("git@github.com:"):
+            url = url.replace("git@github.com:", "https://github.com/", 1)
+        if url.endswith(".git"):
+            url = url[: -len(".git")]
+        return url
+
+    def apply(self, **kwargs: Any) -> None:  # type: ignore[bad-override]
+        try:
+            local_repo = git.Repo(search_parent_directories=True)
+            # Hardcode github url for now due to errors when building in a different environment
+            # remote_repo_url = self._get_github_source_url(local_repo)
+            remote_repo_url = "https://github.com/NVIDIA-NeMo/RL"
+        except Exception:
+            # Cannot figure out which source url it should be; leave links as-is.
+            return
+        if local_repo.working_tree_dir is None:
+            # If the local repo is a bare repo, the method below won't work.
+            return
+        wt_dir = local_repo.working_tree_dir
+
+        for node in self.document.traverse(addnodes.download_reference):
+            md_dir = Path(node["refdoc"]).parent
+            dst_path = md_dir / Path(node["reftarget"])
+            try:
+                dst_path = dst_path.resolve(strict=True)
+            except OSError:
+                # If the path doesn't exist or a symlink loop is encountered.
+                continue
+            if dst_path.is_file():
+                kind = "blob"
+            elif dst_path.is_dir():
+                kind = "tree"
+            else:
+                # Cannot figure out what type of thing this path is pointing to.
+                continue
+            refuri = "/".join(
+                (
+                    remote_repo_url.rstrip("/"),
+                    kind,
+                    local_repo.head.object.hexsha,
+                    urllib.parse.quote(dst_path.relative_to(wt_dir).as_posix()),
+                )
+            )
+            new_node = nodes.reference(rawsource=node.rawsource, refuri=refuri)
+            # Preserve styling and title if present.
+            if "classes" in node:
+                new_node["classes"] = list(node["classes"])
+            if "title" in node:
+                new_node["title"] = node["title"]
+            if node.children:
+                new_node += node.children
+            node.replace_self(new_node)
+
+
+def setup(app: Sphinx) -> None:
+    app.add_transform(_GitHubLinkTransform)
+    # Convert GH admonitions for included files and top-level sources
+    app.connect("include-read", _convert_gh_admonitions)
+    app.connect("source-read", _convert_gh_admonitions_source)
diff --git a/docs/debugging.md b/docs/debugging.md
index aa40015d25..cd3b55d354 100644
--- a/docs/debugging.md
+++ b/docs/debugging.md
@@ -68,10 +68,10 @@ Note that you can jump between breakpoints across all workers with this process.
 
 ![Ray Debugger Extension Step 4](./assets/ray-debug-step4.png)
 
-## Debug the Driver Script
+## Debug with legacy Ray debugger
 
-By default, setting breakpoints in the driver script (outside of  `@ray.remote`) will not pause program execution when using Ray. To enable pausing at these breakpoints, set the environment variable to `RAY_DEBUG=legacy`:
+To use legacy ray debugger, you can use two ways
+1. In general, set `RAY_DEBUG=legacy` and add `--ray-debugger-external` to your `ray start` command
+2. If you are using `ray.sub` in a slurm cluster, you can simply set `RAY_DEBUG=legacy` before `sbatch ray.sub`, the script can detect this environment variable and attach `--ray-debugger-external` automatically. 
 
-```sh
-RAY_DEBUG=legacy uv run ....
-```
+After you start ray with these changes, you can add `breakpoint` to your code. When you run the program, it will stop at where breakpoints are inserted. Then you can use a separate terminal to attach to the header node via `bash <JOB_ID>-attach.sh` (this script should automatically be generated by `ray.sub`), and run `ray debug` to see all the breakpoints. You can enter any breakpoint and interactively debug. Please refer to [Ray documentation](https://docs.ray.io/en/latest/ray-observability/user-guides/debug-apps/ray-debugging.html) for more info on this debugging approach.
\ No newline at end of file
diff --git a/docs/design-docs/chat-datasets.md b/docs/design-docs/chat-datasets.md
index fafd387109..eaad89f3bd 100644
--- a/docs/design-docs/chat-datasets.md
+++ b/docs/design-docs/chat-datasets.md
@@ -27,7 +27,7 @@ Hugging Face chat datasets are expected to have the following structure: Each ex
 
 ## Chat Templates
 
-Formatting the data in this way allows us to take advantage of the Hugging Face tokenizers' `apply_chat_template` functionality to combine the messages. Chat templates can be used to add special tokens or task-specific information to each example in the dataset. Refer to the [HuggingFace apply_chat_template documentation](https://huggingface.co/docs/transformers/main/en/chat_templating#applychattemplate) for details.
+Formatting the data in this way allows us to take advantage of the Hugging Face tokenizers' `apply_chat_template` functionality to combine the messages. Chat templates can be used to add special tokens or task-specific information to each example in the dataset. Refer to the [HuggingFace apply_chat_template documentation](https://huggingface.co/docs/transformers/main/en/chat_templating#using-applychattemplate) for details.
 
 By default, `apply_chat_template` attempts to apply the `chat_template` associated with the tokenizer. However, in some cases, users might want to specify their own chat template. Also, note that many tokenizers do not have associated `chat_template`s, in which case an explicit chat template is required. Users can specify an explicit chat template string using Jinja format and can pass that string to `apply_chat_template`. 
 The following is an example using a simple template which prepends a role header to each turn:
@@ -60,4 +60,4 @@ assert output == expected_output
 :hide:
 ```
 
-For more details on creating chat templates, refer to the [Hugging Face documentation](https://huggingface.co/docs/transformers/v4.34.0/en/chat_templating#how-do-i-create-a-chat-template).
\ No newline at end of file
+For more details on creating chat templates, refer to the [Hugging Face documentation](https://huggingface.co/docs/transformers/v4.34.0/en/chat_templating#how-do-i-create-a-chat-template).
diff --git a/docs/design-docs/checkpointing.md b/docs/design-docs/checkpointing.md
index 5d3feae680..185b6e0358 100644
--- a/docs/design-docs/checkpointing.md
+++ b/docs/design-docs/checkpointing.md
@@ -1,8 +1,10 @@
-# Checkpointing with Hugging Face Models 
+# Exporting Checkpoints to Hugging Face Format
 
 NeMo RL provides two checkpoint formats for Hugging Face models: Torch distributed and Hugging Face format. Torch distributed is used by default for efficiency, and Hugging Face format is provided for compatibility with Hugging Face's `AutoModel.from_pretrained` API. Note that Hugging Face format checkpoints save only the model weights, ignoring the optimizer states. It is recommended to use Torch distributed format to save intermediate checkpoints and to save a Hugging Face checkpoint only at the end of training. 
 
-A checkpoint converter is provided to convert a Torch distributed checkpoint checkpoint to Hugging Face format after training:
+## Converting Torch Distributed Checkpoints to Hugging Face Format
+
+A checkpoint converter is provided to convert a Torch distributed checkpoint to Hugging Face format after training:
 
 ```sh
 uv run examples/converters/convert_dcp_to_hf.py --config=<YAML CONFIG USED DURING TRAINING> <ANY CONFIG OVERRIDES USED DURING TRAINING> --dcp-ckpt-path=<PATH TO DIST CHECKPOINT TO CONVERT> --hf-ckpt-path=<WHERE TO SAVE HF CHECKPOINT>
@@ -17,3 +19,13 @@ CKPT_DIR=results/sft/step_10
 uv run examples/converters/convert_dcp_to_hf.py --config=$CKPT_DIR/config.yaml --dcp-ckpt-path=$CKPT_DIR/policy/weights --hf-ckpt-path=${CKPT_DIR}-hf
 rsync -ahP $CKPT_DIR/policy/tokenizer ${CKPT_DIR}-hf/
 ```
+
+## Converting Megatron Checkpoints to Hugging Face Format
+
+For models that were originally trained using the Megatron-LM backend, a separate converter is available to convert Megatron checkpoints to Hugging Face format. This script requires Megatron-Core, so make sure to launch the conversion with the `mcore` extra. For example,
+
+```sh
+CKPT_DIR=results/sft/step_10
+
+uv run --extra mcore examples/converters/convert_megatron_to_hf.py --config=$CKPT_DIR/config.yaml --megatron-ckpt-path=$CKPT_DIR/policy/weights/iter_0000000/ --hf-ckpt-path=<path_to_save_hf_ckpt>
+```
diff --git a/docs/design-docs/design-and-philosophy.md b/docs/design-docs/design-and-philosophy.md
index eec3b399a7..a2327c7428 100644
--- a/docs/design-docs/design-and-philosophy.md
+++ b/docs/design-docs/design-and-philosophy.md
@@ -1,6 +1,6 @@
 # Design and Philosophy
 
-This section introduces the NeMo RL APIs and addresses the challenges of online Reinforcement Learning (RL). Coordinating various software components, known as RL Actors, requires effective resource allocation, isolation, coordination, and communication. Our design philosophy focuses on creating modular abstractions for these tasks, ensuring scalability from one GPU to thousands, regardless of the RL Actor's implementation.
+This section introduces the NeMo RL APIs, configuration patterns with TypedDicts, and addresses the challenges of online Reinforcement Learning (RL). Coordinating various software components, known as RL Actors, requires effective resource allocation, isolation, coordination, and communication. Our design philosophy focuses on creating modular abstractions for these tasks, ensuring scalability from one GPU to thousands, regardless of the RL Actor's implementation.
 
 ## Motivation
 
@@ -112,3 +112,25 @@ def grpo_train(
         policy.train(generations, logprobs, reference_logprobs, GRPOLossFn)
 ```
 For a complete implementation of GRPO, including validation, checkpointing, memory movement, and the data processing steps not detailed here, see [grpo_train](../../nemo_rl/algorithms/grpo.py).
+
+
+### TypedDict and Configuration Defaults
+
+In NeMo RL, we use YAML files for configuration and load them with `omegaconf` into a recursive `dict`. Within the codebase,
+the root `dict` and sub-`dict`s are typed with `TypedDict` subclasses to provide type hints when accessing attributes. This
+allows our type checker to validate if an undocumented attribute is accessed when not present in the `TypedDict` subclass,
+or to identify an incompatible type.
+
+We chose this design because it's simple and gives users the flexibility to use older configuration files without encountering errors during config loading due to unexpected attributes, whether obsolete or user defined. While we considered using dataclasses or other structured configuration formats, those approaches introduce more boilerplate and would require config versioning to support loading across different versions of NeMo RL.
+
+We follow a few design principles regarding configuration:
+
+1. We forbid defaults in the code, except in limited cases (e.g., alpha features). Defaults should be defined in YAML configuration files. Setting defaults in code makes it difficult to trace where values originate during debugging.
+    * Forbidden examples include:
+        * `grpo_config.get("num_prompts_per_step", 32)`
+        * `policy_config.get("model_name", "meta-llama/Llama-3.1-8B-Instruct")`
+    * Acceptable examples:
+        * If an attribute is typed `typing.NotRequired[...]`, it is okay for the code to check for absence/`None`, e.g., `assert "milestones" in scheduler_cfg` or `if "milestones" in scheduler_cfg`
+1. All configs under [examples/configs/*.yaml](https://github.com/NVIDIA-NeMo/RL/tree/main/examples/configs) are exemplars and should contain the defaults for `typing.Required` or `typing.NotRequired` attributes, along with accompanying documentation.
+   * All configs under [examples/configs/recipes/**/*.yaml](https://github.com/NVIDIA-NeMo/RL/tree/main/examples/configs/recipes) do not require documentation and are snapshots of functional configurations.
+1. All configs under [examples/configs/**/*.yaml](https://github.com/NVIDIA-NeMo/RL/tree/main/examples/configs) should adhere to their `TypedDict` subclass configuration. Unit tests in [tests/unit/test_config_validation.py](https://github.com/NVIDIA-NeMo/RL/blob/main/tests/unit/test_config_validation.py) are run to validate compliance.
diff --git a/docs/design-docs/env-vars.md b/docs/design-docs/env-vars.md
new file mode 100644
index 0000000000..da2bf2db6b
--- /dev/null
+++ b/docs/design-docs/env-vars.md
@@ -0,0 +1,31 @@
+# Environment Variable Precedence in NeMo RL
+
+There are a number of ways to pass environment variables to Ray workers in NeMo RL. This document explains each of the methods and why they are useful.
+
+## Precedence Order
+
+### 1. Ray Runtime Environment Variables (lowest)
+- Set via `ray.remote(runtime_env={'env_vars': {...}})` decorators.
+- Applied to all instances of specific worker classes. These define the default environment variables for the class if not overwritten by a method of higher precedence.
+- Example: `@ray.remote(runtime_env=get_runtime_env_for_policy_worker("megatron_policy_worker"))`. See [here](https://github.com/NVIDIA-NeMo/RL/blob/def76820d7838c63c1ee4900e63f73a93d927ff2/nemo_rl/models/policy/megatron_policy_worker.py#L338) where `get_runtime_env_for_policy_worker` will be applied to all instances of `MegatronPolicyWorker`.
+
+### 2. System-level Environment Variables (medium)
+- Set via `export` in shell or `os.environ` in Python.
+- Useful for controlling environment variables from a high level. If not overwritten by higher priority methods, all workers will inherit these environment variables.
+- Example: `export HF_TOKEN=<your_token>`
+
+### 3. YAML Configuration `env_vars` (high)
+- Set in YAML config files under `policy.megatron_cfg.env_vars` or `policy.dtensor_cfg.env_vars`.
+- Useful for controlling environment variables on an experiment level.
+- Example:
+  ```yaml
+  policy:
+    megatron_cfg:
+      env_vars:
+        PYTORCH_CUDA_ALLOC_CONF: "expandable_segments:False"
+  ```
+
+### 4. Worker-specific `configure_worker` Method (highest)
+- Set via static `configure_worker` method in worker classes.
+- Applied to specific worker instances based on configuration.
+- See an example in `VllmGenerationWorker` [here](https://github.com/NVIDIA-NeMo/RL/blob/def76820d7838c63c1ee4900e63f73a93d927ff2/nemo_rl/models/generation/vllm.py#L88).
diff --git a/docs/design-docs/generation.md b/docs/design-docs/generation.md
index 275625f371..6890f0b2ac 100644
--- a/docs/design-docs/generation.md
+++ b/docs/design-docs/generation.md
@@ -16,7 +16,7 @@ The core of the generation system is defined in `interfaces.py`, which establish
        max_new_tokens: int       # Maximum number of tokens to generate
        temperature: float        # Sampling temperature
        top_p: float              # Top-p sampling parameter
-       top_k: int                # Top-k sampling parameter
+       top_k: int | None         # Top-k sampling parameter
        model_name: str           # Name or path of the model
    ```
 
@@ -62,7 +62,7 @@ A key design principle for generation backends is that they process tokens direc
 
 ## VLLM Backend
 
-The VLLM backend (`models/generation/vllm.py`) implements the {py:class}`GenerationInterface <nemo_rl.models.generation.interfaces.GenerationInterface>` to provide efficient text generation using the VLLM library, which is optimized for large language models.
+The VLLM backend (`models/generation/vllm/vllm_generation.py`) implements the {py:class}`GenerationInterface <nemo_rl.models.generation.interfaces.GenerationInterface>` to provide efficient text generation using the VLLM library, which is optimized for large language models.
 
 ### VllmGeneration Class
 
diff --git a/docs/design-docs/logger.md b/docs/design-docs/logger.md
index 1c45529540..dad321c6a1 100644
--- a/docs/design-docs/logger.md
+++ b/docs/design-docs/logger.md
@@ -1,6 +1,6 @@
 # Logger
 
-The logger is designed to track key training metrics (including distributed metrics with reductions and timing), as well as providing integration with logging backends like WandB, Tensorboard, and MLflow.
+The logger is designed to track key training metrics (including distributed metrics with reductions and timing), as well as providing integration with logging backends like WandB, Tensorboard, MLflow and Swanlab.
 
 ## Requirements
 
@@ -10,12 +10,13 @@ The logger is designed to track key training metrics (including distributed metr
    * WandB
    * Tensorboard
    * MLflow
+   * Swanlab
 
 ## Overall Design
 
 Since there is a single controller, the single process running the main training loop will gather the metrics and do the logging.
 
-To handle multiple logger backends, we will have a {py:class}`LoggerInterface <nemo_rl.utils.logger.LoggerInterface>` interface that the {py:class}`TensorboardLogger <nemo_rl.utils.logger.TensorboardLogger>`, {py:class}`WandbLogger <nemo_rl.utils.logger.WandbLogger>`, and {py:class}`MLflowLogger <nemo_rl.utils.logger.MLflowLogger>` will implement:
+To handle multiple logger backends, we will have a {py:class}`LoggerInterface <nemo_rl.utils.logger.LoggerInterface>` interface that the {py:class}`TensorboardLogger <nemo_rl.utils.logger.TensorboardLogger>`, {py:class}`WandbLogger <nemo_rl.utils.logger.WandbLogger>`, {py:class}`MLflowLogger <nemo_rl.utils.logger.MLflowLogger>` and {py:class}`SwanlabLogger <nemo_rl.utils.logger.SwanlabLogger>` will implement:
 
 ```python
 class LoggerInterface(ABC):
@@ -35,7 +36,7 @@ class LoggerInterface(ABC):
 A {py:class}`Logger <nemo_rl.utils.logger.Logger>` wrapper class will also implement {py:class}`LoggerInterface <nemo_rl.utils.logger.LoggerInterface>` and maintain a list of loggers to which it delegates writing logs. This will be the main class the user uses in the training loop. Usage example:
 
 ```python
-# Initialize logger with wandb, tensorboard, and mlflow enabled
+# Initialize logger with wandb, tensorboard, mlflow and swanlab enabled
 logging_config = {
     "wandb_enabled": True,
     "tensorboard_enabled": False,
@@ -45,6 +46,10 @@ logging_config = {
         "project": "grpo-dev",
         "name": "grpo-dev-logging",
     },
+    "swanlab": {
+        "project": "nemo-rl",
+        "name": "grpo-dev-logging",
+    },
     "tensorboard": {
         "log_dir": "logs",
     },
@@ -74,6 +79,13 @@ The logger supports three main logging backends:
 - Includes built-in hyperparameter logging
 - Offers rich visualization and collaboration features
 
+### Swanlab
+- Training visualization (Android, iOS, Wechat public account and Web)
+- Automatic logging
+- Hyperparameter recording
+- Experiment comparison
+- Multi-user collaboration
+
 ### Tensorboard
 - Local file-based logging
 - Standard TensorBoard visualization
@@ -121,6 +133,7 @@ The logger supports pretty-formatted logging of validation samples to help visua
 ```python
 logger:
   wandb_enabled: false
+  swanlab_enabled: false
   tensorboard_enabled: false
   mlflow_enabled: false
   num_val_samples_to_print: 10
@@ -140,7 +153,7 @@ When enabled, the pretty logging will generate formatted text similar to:
 
 ## GPU Metric Logging
 
-NeMo RL monitors GPU memory and utilization through [system metrics](https://docs.ray.io/en/latest/ray-observability/reference/system-metrics.html#system-metrics) exposed by Ray nodes. While Ray makes these metrics available for tools like Prometheus, NeMo RL directly polls GPU memory and utilization data and logs them to TensorBoard, WandB, and/or MLflow.
+NeMo RL monitors GPU memory and utilization through [system metrics](https://docs.ray.io/en/latest/ray-observability/reference/system-metrics.html#system-metrics) exposed by Ray nodes. While Ray makes these metrics available for tools like Prometheus, NeMo RL directly polls GPU memory and utilization data and logs them to TensorBoard, WandB, MLflow and/or SwanLab.
 
 This approach allows us to offer the same GPU metric tracking on all loggers and simplifies the implementation greatly.
 
@@ -149,6 +162,7 @@ This feature is enabled with the `monitor_gpus` configuration parameter. The fre
 ```python
 logger:
   wandb_enabled: false
+  swanlab_enabled: false
   tensorboard_enabled: false
   mlflow_enabled: false
   monitor_gpus: true
@@ -157,13 +171,12 @@ logger:
     flush_interval: 10
 ```
 
-:::{note}
-While it is feasible to monitor using remote workers, the implementation requires careful attention to details to ensure:
-* Logs sent back to the driver do not introduce significant overhead.
-* Metrics remain clear and interpretable, avoiding issues like double counting caused by colocated workers.
-* Workers can gracefully flush their logs in case of failure.
-* Logging behaves consistently across TensorBoard, WandB, and MLflow.
-* Workers that spawn other workers accurately report the total resource usage of any grandchild workers.
-
-Due to these complexities, we opted for a simpler approach: collecting metrics exposed by the Ray metrics server from the driver.
-:::
\ No newline at end of file
+> [!NOTE]
+> While it is feasible to monitor using remote workers, the implementation requires careful attention to details to ensure:
+> * Logs sent back to the driver do not introduce significant overhead.
+> * Metrics remain clear and interpretable, avoiding issues like double counting caused by colocated workers.
+> * Workers can gracefully flush their logs in case of failure.
+> * Logging behaves consistently across TensorBoard, WandB, MLflow and Swanlab.
+> * Workers that spawn other workers accurately report the total resource usage of any grandchild workers.
+>
+> Due to these complexities, we opted for a simpler approach: collecting metrics exposed by the Ray metrics server from the driver.
diff --git a/docs/docker.md b/docs/docker.md
index 1157e92ebc..f6f93fc1b8 100644
--- a/docs/docker.md
+++ b/docs/docker.md
@@ -1,39 +1,50 @@
 # Build Docker Images
 
-This guide provides three methods for building Docker images:
+This guide provides two methods for building Docker images:
 
 * **release**: Contains everything from the hermetic image, plus the nemo-rl source code and pre-fetched virtual environments for isolated workers.
 * **hermetic**: Includes the base image plus pre-fetched NeMo RL python packages in the `uv` cache.
-* **base**: A minimal image with CUDA, `ray`, and `uv` installed, ideal for specifying Python dependencies at runtime.
 
 Use the:
 * **release** (recommended): if you want to pre-fetch the NeMo RL [worker virtual environments](./design-docs/uv.md#worker-configuration) and copy in the project source code.
 * **hermetic**: if you want to pre-fetch NeMo RL python packages into the `uv` cache to eliminate the initial overhead of program start.
-* **base**: if you just need a minimal image with CUDA, `ray`, and `uv` installed and are okay with dynamically downloading your requirements at runtime. This option trades off fast container download/startup with slower initial overhead to download python packages.
 
 ## Release Image
 
 The release image is our recommended option as it provides the most complete environment. It includes everything from the hermetic image, plus the nemo-rl source code and pre-fetched virtual environments for isolated workers. This is the ideal choice for production deployments.
 
 ```sh
-cd docker/
-docker buildx build --target release -t nemo_rl -f Dockerfile ..
+# Self-contained build (default: builds from main):
+docker buildx build --target release -f docker/Dockerfile --tag <registry>/nemo-rl:latest --push .
+
+# Self-contained build (specific git ref):
+docker buildx build --target release -f docker/Dockerfile --build-arg NRL_GIT_REF=r0.3.0 --tag <registry>/nemo-rl:r0.3.0 --push .
+
+# Self-contained build (remote NeMo RL source; no need for a local clone of NeMo RL):
+docker buildx build --target release -f docker/Dockerfile --build-arg NRL_GIT_REF=r0.3.0 --tag <registry>/nemo-rl:r0.3.0 --push https://github.com/NVIDIA-NeMo/RL.git
+
+# Local NeMo RL source override:
+docker buildx build --target release --build-context nemo-rl=. -f docker/Dockerfile --tag <registry>/nemo-rl:latest --push .
 ```
 
+**Note:** The `--tag <registry>/nemo-rl:latest --push` flags are not necessary if you just want to build locally.
+
 ## Hermetic Image
 
 The hermetic image includes all Python dependencies pre-downloaded in the `uv` cache, eliminating the initial overhead of downloading packages at runtime. This is useful when you need a more predictable environment or have limited network connectivity.
 
 ```sh
-cd docker/
-docker buildx build --target hermetic -t nemo_rl -f Dockerfile ..
-```
+# Self-contained build (default: builds from main):
+docker buildx build --target hermetic -f docker/Dockerfile --tag <registry>/nemo-rl:latest --push .
 
-## Base Image
+# Self-contained build (specific git ref):
+docker buildx build --target hermetic -f docker/Dockerfile --build-arg NRL_GIT_REF=r0.3.0 --tag <registry>/nemo-rl:r0.3.0 --push .
 
-The base image provides a minimal environment with CUDA, `ray`, and `uv` installed. While it's the smallest image, it requires downloading Python dependencies at runtime, which may not be ideal for all use cases.
+# Self-contained build (remote NeMo RL source; no need for a local clone of NeMo RL):
+docker buildx build --target hermetic -f docker/Dockerfile --build-arg NRL_GIT_REF=r0.3.0 --tag <registry>/nemo-rl:r0.3.0 --push https://github.com/NVIDIA-NeMo/RL.git
 
-```sh
-cd docker/
-docker buildx build --target base -t nemo_rl -f Dockerfile ..
+# Local NeMo RL source override:
+docker buildx build --target hermetic --build-context nemo-rl=. -f docker/Dockerfile --tag <registry>/nemo-rl:latest --push .
 ```
+
+**Note:** The `--tag <registry>/nemo-rl:latest --push` flags are not necessary if you just want to build locally.
diff --git a/docs/documentation.md b/docs/documentation.md
index 69145c06af..ccdde6d024 100644
--- a/docs/documentation.md
+++ b/docs/documentation.md
@@ -22,6 +22,20 @@ uv run --group docs sphinx-build . _build/html
 * The resulting HTML files are generated in a `_build/html` folder that is created under the project `docs/` folder.
 * The generated python API docs are placed in `apidocs` under the `docs/` folder.
 
+## Checking for Broken Links
+
+To check for broken http links in the docs, run this command:
+
+```sh
+cd docs/
+uv run --group docs sphinx-build --builder linkcheck . _build/linkcheck
+```
+
+It will output a JSON file at `_build/linkcheck/output.json` with links it found while building the
+docs. Records will have a status of `broken` if the link is not reachable. The `docs/conf.py` file is
+configured to ignore github links because the CI test will often experience rate limit errors.
+Comment out the `linkcheck_ignore` variable there to check all the links.
+
 ## Live Building
 
 When writing documentation, it can be helpful to serve the documentation and have it update live while you edit.
diff --git a/docs/fp8.md b/docs/fp8.md
new file mode 100644
index 0000000000..1459cd9b8d
--- /dev/null
+++ b/docs/fp8.md
@@ -0,0 +1,97 @@
+# FP8 Quantization in NeMo RL
+
+This module provides a suite of tools to enable FP8 quantization for large language models. It is currently under active development.
+
+## Supported Features
+
+### FP8 Generation
+- Implements **Deepseek-style FP8** quantization using **sub-channel scaling**.
+
+### FP8 Training
+- Uses **TransformerEngine** for linear layer implementation.
+- Supports both **Deepseek-style sub-channel scaling** and **per-tensor scaling**.
+
+## Integration with NeMo RL
+
+NeMo RL applies monkey patches to several core `vLLM` components to enable FP8 generation for reinforcement learning.  
+When the `init_fp8` function is called, it modifies the following:
+
+### RayDistributedExecutor
+- For multi-GPU inference, the executor is patched to ensure that every worker process applies the same FP8 patches **before model initialization**.
+
+### Quantization Utilities
+- Functions within `vllm.model_executor.layers.quantization` are replaced with custom implementations that support:
+  - **Power-of-2 scaling**
+  - Other custom features
+
+### Weight Loading
+- A custom `load_weights` function performs on-the-fly quantization of model weights from higher-precision formats to FP8.
+
+
+## Usage
+
+FP8 generations are recommended to be configured with the following settings:
+
+   ```
+    loss_fn:
+        # importance sampling helps improve stability
+        use_importance_sampling_correction: true
+
+    policy:
+        generation:
+            vllm_cfg:
+                precision: 'fp8'
+                # DeepGemm is much more performant than vLLM's default cutlass fp8 subchannel scaling kernels
+                use_deep_gemm: true
+                # Users can specify number of layers to be kept in BF16 precision in their experiments
+                # and by default they are set to 0
+                num_last_layers_in_bf16: 0
+                num_first_layers_in_bf16: 0
+                # Use FP32 scaling factors. Rounding scaling factors to the nearest pow2 may improve quantization 
+                # fidelity however this feature is still under research.
+                use_weight_pow2_scale: False
+                use_activation_pow2_scale: False
+```
+
+"To train with FP8, you need to set the Megatron path and configure it using the following settings:
+
+```
+    policy:
+        megatron_cfg:
+            fp8_cfg:
+                fp8: "hybrid"               # choices: [hybrid, e4m3]
+                fp8_recipe: "tensorwise"    # choices: [tensorwise, blockwise]
+                fp8_param: false            # boolean value
+```
+
+## Compatibility Note for Deepseek-Style FP8 Training
+
+When using FP8 training with Deepseek-style FP8 (sub-channel scaling), be aware of the following compatibility issue:
+
+The TransformerEngine implementation for this recipe requires **cuBLAS version ≥ 12.9**. However, `nemo-rl` currently depends on **Torch 2.7.1**, which in turn requires **CUDA 12.8**. As a result, attempting to use the default setup will trigger the following error:
+
+```
+File "/opt/ray_venvs/nemo_rl.models.policy.megatron_policy_worker.MegatronPolicyWorker/lib/python3.12/site-packages/transformer_engine/pytorch/fp8.py", line 646, in fp8_autocast
+FP8GlobalStateManager.fp8_autocast_enter(
+File "/opt/ray_venvs/nemo_rl.models.policy.megatron_policy_worker.MegatronPolicyWorker/lib/python3.12/site-packages/transformer_engine/pytorch/fp8.py", line 465, in fp8_autocast_enter
+assert fp8_block_available, reason_for_no_fp8_block
+           ^^^^^^^^^^^^^^^^^^^
+AssertionError: FP8 block scaled GEMM requires Hopper and CUDA >= 12.9.
+```
+This issue will be resolved once the Torch version is upgraded to **≥ 2.8.0** (Please follow [#1122](https://github.com/NVIDIA-NeMo/RL/issues/1122) for more progress on the upgrade). In the meantime, you can enable Deepseek-style FP8 training using the following workaround:
+
+- **Build the NGC PyTorch container** from `docker/Dockerfile.ngc_pytorch`.  
+  This setup uses the system Python environment, which includes **CUDA version 12.9 or higher**, meeting the requirements for TransformerEngine’s FP8 implementation.
+
+
+
+## Accuracy
+
+![Llama-3.1-8B-Instruct GRPO Curve BF16 vs FP8](assets/fp8_e2e_curve.png)
+
+The above results are from Llama-3.1-8B-Instruct GRPO experiments. You can run them with the following example configs:
+* For BF16: `examples/configs/grpo_math_8B_megatron.yaml`
+* For FP8: `examples/configs/grpo_math_8B_megatron_fp8.yaml`
+
+In the experiment in this figure, enabling FP8 rollout and training gives 15%-25% decrease in step time, and the validation accuracy curves match up to 1000 step.
+Efforts are ongoing to performs longer runs and further optimize performance.
diff --git a/docs/guides/async-grpo.md b/docs/guides/async-grpo.md
new file mode 100644
index 0000000000..0beac8204a
--- /dev/null
+++ b/docs/guides/async-grpo.md
@@ -0,0 +1,208 @@
+# Train with Async GRPO
+
+Async GRPO is an asynchronous training mode that allows trajectory generation and policy training to run concurrently, improving GPU utilization and throughput compared to synchronous GRPO.
+
+## Configure Async GRPO
+
+This section covers how to configure async GRPO by modifying your settings and includes a complete example configuration.
+### Enable Async GRPO
+
+To use async GRPO, make these configuration changes:
+
+1. **Enable vLLM async engine**:
+```yaml
+policy:
+  generation:
+    backend: "vllm"
+    vllm_cfg:
+      async_engine: true
+```
+
+2. **Enable importance sampling correction** (required for convergence):
+```yaml
+loss_fn:
+  use_importance_sampling_correction: true
+```
+
+3. **Disable colocated inference** (required for async mode):
+```yaml
+policy:
+  generation:
+    colocated:
+      enabled: false
+      resources:
+        num_nodes: 1  # or more
+        gpus_per_node: 2  # adjust based on your setup
+```
+
+4. **Add async GRPO configuration**:
+```yaml
+grpo:
+  async_grpo:
+    enabled: true
+    max_trajectory_age_steps: 1  # Maximum age, in training steps, for trajectories
+    in_flight_weight_updates: false  # Enable for faster weight synchronization
+    recompute_kv_cache_after_weight_updates: false # Invalidates kv cache after in-flight-weight-updates
+```
+
+### Complete Example Config
+```yaml
+policy:
+  generation:
+    backend: "vllm"
+    colocated:
+      enabled: false
+      resources:
+        num_nodes: 1
+        gpus_per_node: 2
+    vllm_cfg:
+      async_engine: true
+
+loss_fn:
+  use_importance_sampling_correction: true
+
+grpo:
+  num_prompts_per_step: 32
+  num_generations_per_prompt: 4
+  async_grpo:
+    enabled: true
+    max_trajectory_age_steps: 1
+    in_flight_weight_updates: false  # Enable for faster weight synchronization
+    recompute_kv_cache_after_weight_updates: false # Invalidates kv cache after in-flight-weight-updates
+
+cluster:
+  num_nodes: 2
+  gpus_per_node: 4
+```
+
+## Implementation Structure
+This section covers the internal architecture of async GRPO and includes detailed explanations of how the core components interact.
+### Core Components
+
+The async GRPO implementation consists of three main components:
+
+#### 1. Main Training Loop (`async_grpo_train` in `grpo.py`)
+- Coordinates overall training process
+- Samples trajectories from replay buffer
+- Runs policy training steps
+- Handles validation and checkpointing
+- Manages weight synchronization between training and generation
+
+#### 2. Async Trajectory Collector (`AsyncTrajectoryCollector` in `async_utils.py`)
+- Runs in background Ray actor
+- Continuously generates trajectories using current policy weights
+- Manages generation scheduling and weight version tracking
+- Handles pause/resume for weight updates and validation
+- Coordinates with replay buffer for trajectory storage
+
+#### 3. Replay Buffer (`ReplayBuffer` in `async_utils.py`)
+- Stores generated trajectories with metadata
+- Tracks weight versions for both generation and intended training use
+- Implements age-based filtering to prevent stale trajectories
+- Provides sampling interface for training steps
+
+### Weight Version Tracking
+
+Async GRPO uses a weight versioning system:
+- **Generation Weight Version**: The policy weights used to generate a trajectory
+- **Target Weight Version**: The training step where the trajectory will be used
+- **Max Trajectory Age**: How many steps old a trajectory can be before being discarded
+
+Example with `max_trajectory_age_steps: 1`:
+- Trajectory generated with weights v10 can be used for training steps v10 or v11
+- At training step v12, trajectories from v10 are too old and discarded
+
+### Coordination Flow
+
+1. **Startup**: Trajectory collector starts generating trajectories in background
+2. **Buffer Fill**: Training waits until buffer has sufficient trajectories
+3. **Training Step**: 
+   - Sample trajectories from buffer
+   - Run policy training
+   - Update weights and notify collector
+4. **Weight Sync**: Collector pauses, waits for weight refit, then resumes
+5. **Repeat**: Process continues with updated weights
+
+
+### Architecture Diagram
+
+The following sequence diagram illustrates the interactions between the three main components:
+
+```
+sequenceDiagram
+    participant Training as Training Loop
+    participant Collector as Trajectory Collector
+    participant Buffer as Replay Buffer
+    
+    Note over Training, Buffer: Startup
+    Training->>Collector: Start generation
+    Training->>Buffer: Initialize
+    
+    Note over Training, Buffer: Main Loop
+    loop Async Training
+        par Background Generation
+            Collector->>Buffer: Store trajectories
+        and Training Steps
+            Training->>Buffer: Sample trajectories
+            Buffer-->>Training: Return valid data
+            Training->>Training: Update policy weights
+            Training->>Collector: Sync new weights
+        end
+    end
+```
+
+## Usage Tips
+
+1. **Buffer Sizing**: The replay buffer size is automatically calculated as:
+   ```
+   buffer_size = num_prompts_per_step × max_trajectory_age_steps × 2
+   ```
+
+2. **Age Limits**: Start with `max_trajectory_age_steps: 1` and increase if needed for higher throughput
+
+3. **Resource Allocation**: Ensure sufficient GPU memory for both the training and generation clusters
+
+4. **In-Flight Weight Updates**: Enable `in_flight_weight_updates: true` when using `async_engine: true` for updating the weights of vLLM engine during generation. This prevents stalling training pipeline until longest generation finishes and provides significant performance benefits.
+
+5. **Recompute KV Cache After Weight Updates**: While using in-flight weight update, user can choose whether to recompute
+KV caches after weight udpate by configuring `recompute_kv_cache_after_weight_update` configuration.
+
+## Why Importance Sampling Correction Is Required for Async
+
+### The GRPO Objective
+
+The standard GRPO loss function (without KL penalty) is:
+
+$$
+L(\theta) = E_{x \sim \pi_{\theta_{\text{old}}}} \Big[ \min \Big(\frac{\pi_\theta(x)}{\pi_{\theta_{\text{old}}}(x)}A_t, \text{clip} \big( \frac{\pi_\theta(x)}{\pi_{\theta_{\text{old}}}(x)}, 1 - \varepsilon, 1 + \varepsilon \big) A_t \Big) \Big]
+$$
+
+where:
+- $\pi_\theta$ is the policy model we are currently optimizing
+- $\pi_{\theta_{\text{old}}}$ is the previous policy model (from the beginning of this step)
+- $A_t$ is the advantage estimate
+- $\varepsilon$ is a clipping hyperparameter
+
+In standard GRPO, we assume trajectories are sampled from $\pi_{\theta_{\text{old}}}$. However, in async GRPO, trajectories are actually sampled from $\pi_{\theta_{\text{generator}}}$, which is the policy weights from N training steps ago (where N ≥ 1 depending on `max_trajectory_age_steps`).
+
+Without importance sampling correction, the GRPO objective becomes fundamentally incorrect:
+
+1. **Incorrect probability ratios**: The ratio $\frac{\pi_\theta(x)}{\pi_{\theta_{\text{old}}}(x)}$ uses $\pi_{\theta_{\text{old}}}$ probabilities that were never actually used to generate the trajectories.
+
+2. **Biased gradient estimates**: Since we're computing gradients based on samples from the wrong distribution, the policy updates become biased and can lead to instability.
+
+When we enable importance sampling correction (`use_importance_sampling_correction: true`), we introduce the corrective term:
+
+$$
+\frac{\pi_{\text{training}}(x)}{\pi_{\text{generator}}(x)}
+$$
+
+This transforms our loss function to properly account for the distribution mismatch. The corrected objective becomes:
+
+$$
+L(\theta) = E_{x \sim \pi_{\theta_{\text{generator}}}} \Big[ \frac{\pi_{\text{training}}(x)}{\pi_{\text{generator}}(x)} \min \Big(\frac{\pi_\theta(x)}{\pi_{\theta_{\text{old}}}(x)}A_t, \text{clip} \big( \frac{\pi_\theta(x)}{\pi_{\theta_{\text{old}}}(x)}, 1 - \varepsilon, 1 + \varepsilon \big) A_t \Big) \Big]
+$$
+
+The importance sampling ratio $\frac{\pi_{\text{training}}(x)}{\pi_{\text{generator}}(x)}$ is effectively $\frac{\pi_{\theta_{\text{old}}}(x)}{\pi_{\theta_{\text{generator}}}(x)}$, which corrects for the N-step gap between the generator policy and the policy we assume we're sampling from.
+
+This correction ensures that we have unbiased gradient estimates and stable convergence.
diff --git a/docs/guides/dapo.md b/docs/guides/dapo.md
new file mode 100644
index 0000000000..fe5dbb6c30
--- /dev/null
+++ b/docs/guides/dapo.md
@@ -0,0 +1,100 @@
+# An in-depth Walkthrough of DAPO in NeMo RL
+
+This guide covers the [Decoupled Clip and Dynamic Sampling Policy Optimization (DAPO)](https://arxiv.org/pdf/2503.14476) implementation in NeMo RL.
+
+DAPO introduces four key improvements over Group Relative Policy Optimization (GRPO):
+1. **Clip-Higher**, which promotes the diversity of the system and avoids entropy collapse
+2. **Dynamic Sampling**, which improves training efficiency and stability
+3. **Token-Level Policy Gradient Loss**, which is critical in long-CoT RL scenarios
+4. **Overlong Reward Shaping**, which reduces reward noise and stabilizes training
+
+This document focuses on DAPO-specific features: Dynamic Sampling and Overlong Reward Shaping. For foundational concepts on GRPO including data handling, policy training, generation, and loss functions, see the [NeMo RL GRPO Guide](grpo.md).
+
+
+## Quickstart: Launch a DAPO Run
+
+To get started quickly, use the example configuration [examples/configs/recipes/llm/dapo-qwen2.5-7b.yaml](../../examples/configs/recipes/llm/dapo-qwen2.5-7b.yaml). You can launch this using the same script as GRPO:
+
+```bash
+uv run examples/run_grpo_math.py --config examples/configs/recipes/llm/dapo-qwen2.5-7b.yaml {overrides}
+```
+
+**Reminder**: Don't forget to set your HF_HOME, WANDB_API_KEY, and HF_DATASETS_CACHE (if needed). You'll need to do a `huggingface-cli login` as well for LLaMA models.
+
+## Dynamic Sampling
+
+Standard GRPO trains on all generated responses, even when they have identical rewards (zero gradient signal) within a prompt group of generations. Dynamic sampling filters to keep only groups with diverse rewards (`std > 0`), and accumulates them across batches until reaching the target batch size. Dynamic sampling can be enabled by setting `use_dynamic_sampling=True` in your configuration. For implementation details, see the [`dynamic_sampling`](../../nemo_rl/algorithms/grpo.py) function. 
+
+**Algorithm**: For each training step:
+
+1. Sample `batch_multiplier × num_prompts_per_step` prompts from the dataset. The default value of `batch_multiplier` is 1.
+2. Generate `num_generations_per_prompt` responses per prompt and compute rewards.
+3. Compute the baseline and standard deviation for each prompt group.
+4. Filter prompt groups where `std > 0`.
+5. Store these prompts in a cache until reaching the target training batch size of `num_prompts_per_step × num_generations_per_prompt` samples.
+6. Samples are accumulated until the maximum number of allowed batches (`dynamic_sampling_max_gen_batches`) is reached. If the cache still does not meet the target rollout batch size at that point, an error is raised. To resolve this, consider adjusting parameters such as `num_prompts_per_step` or `num_generations_per_prompt` to increase sample diversity, or revisit the complexity of your data.
+7. Perform training on the collected samples with nonzero standard deviation
+
+### About batch_multiplier
+
+`batch_multiplier` (a float ≥ 1.0) controls the initial prompt pool size by sampling `batch_multiplier × num_prompts_per_step` prompts before dynamic sampling. Higher values increase memory and compute requirements, while very low values (e.g., 1.0) may slow the cache accumulation of prompt groups with nonzero standard deviation. The optimal value depends on the dataset, model capacity, and overall training setup.  When **dynamic sampling** is enabled, we also log two additional metrics:
+
+ * `dynamic_sampling_num_gen_batches`: The number of generation rounds required to produce `num_prompts_per_step * num_generations_per_prompt` samples with a nonzero standard deviation. If this number remains consistently high across iterations, try increasing the `batch_multiplier`. The maximum allowed value for this parameter is determined by `dynamic_sampling_max_gen_batches`.
+ * `dynamic_sampling_num_discarded_valid_samples`: The number of samples with a nonzero standard deviation that are discarded because the total exceeds `num_prompts_per_step * num_generations_per_prompt`. If this value is frequently high (e.g., above `0.5 * num_prompts_per_step * num_generations_per_prompt`) and `dynamic_sampling_num_gen_batches` is consistently 1, it suggests that a large fraction of the dataset is being discarded unnecessarily. To improve data efficiency, consider decreasing the `batch_multiplier`.
+
+## Reward Shaping
+DAPO introduces an overlong reward shaping mechanism to reduce reward noise and stabilize training. This approach penalizes responses that exceed a specified length threshold, helping to prevent the model from generating excessively long outputs while maintaining solution quality.
+
+For a detailed explanation of the overlong reward shaping mechanism, please refer to Section 3.4 of the [DAPO paper](https://arxiv.org/pdf/2503.14476). For implementation details, see the [`apply_reward_shaping`](../../nemo_rl/algorithms/reward_functions.py) function.
+
+## Configuration
+
+```yaml
+grpo:
+  use_dynamic_sampling: true  # Enable DAPO dynamic sampling
+  num_prompts_per_step: 512   # Target number of prompts per training step
+  num_generations_per_prompt: 16  # Generations per prompt
+  batch_multiplier: 3    # Dataloader batch size = batch_multiplier × num_prompts_per_step
+  dynamic_sampling_max_gen_batches: 10     # Maximum number of batches to be used for accumulating non-zero std prompts
+  reward_scaling:
+    enabled: true
+    source_min: 0.0
+    source_max: 1.0
+    target_min: -1.0
+    target_max: 1.0
+  
+  reward_shaping:
+    enabled: true
+    overlong_buffer_length: 4096     # Threshold before penalties apply (paper uses 4096)
+    overlong_buffer_penalty: 1.0     # Penalty per excess token
+    max_response_length: 20480       # Hard maximum generation length
+```
+
+**Key Parameters:**
+- **`use_dynamic_sampling`**: When enabled, activates DAPO's dynamic sampling algorithm to filter and accumulate prompt groups with nonzero standard deviation
+- **`batch_multiplier`**: Factor that scales the initial prompt pool size for sampling.
+- **`dynamic_sampling_max_gen_batches`**: Maximum number of batches to be used for accumulating nonzero standard deviation prompts.
+- **`reward_scaling`**: When enabled, clamps each reward in the batch to [source_min, source_max] and linearly rescales it to [target_min, target_max]. Defaults: source_min=0.0, source_max=1.0, target_min=0.0, target_max=1.0.
+- **`reward_shaping`**: When enabled, applies the overlong penalty mechanism described in the Reward Shaping section above. Responses exceeding `max_response_length - overlong_buffer_length` receive penalties proportional to their excess length, helping to reduce reward noise and stabilize training.
+
+> [!NOTE]
+> When dynamic sampling is enabled, monitor the `filtered_reward` metric to track the average reward of the prompts with std > 0.
+
+> [!NOTE]
+> **Clip-Higher** and **Token-Level Policy Gradient Loss** are already supported in NeMo RL and can be configured through the `loss_fn` section of your experiment config:
+> - Set `ratio_clip_max` to enable Clip-Higher (e.g., `ratio_clip_max: 0.28`)
+> - Set `token_level_loss: true` to enable Token-Level Policy Gradient Loss
+> 
+> See the full [DAPO example config](../../examples/configs/recipes/llm/dapo-qwen2.5-7b.yaml) for reference.
+
+## Example Training Results
+Using the [DAPO example config](../../examples/configs/recipes/llm/dapo-qwen2.5-7b.yaml), you can expect to see intermediate plots such as the training reward curve and validation accuracy on AIME24 for Qwen/Qwen2.5-Math-7B. These plots serve as reference outputs to help verify reproducibility. They are not intended to reflect the best accuracy that can be achieved using DAPO for this model.
+
+![DAPO Qwen2.5-7B Training Reward](../assets/dapo_train_reward.png)
+![DAPO Qwen2.5-7B Validation Accuracy](../assets/dapo_val_acc.png)
+
+## References
+
+- **DAPO Paper**: [Decoupled Clip and Dynamic Sampling Policy Optimization](https://arxiv.org/pdf/2503.14476)
+- **GRPO Paper**: [Group Relative Policy Optimization](https://arxiv.org/abs/2402.03300)
+- **[NeMo RL GRPO Guide](grpo.md)**
\ No newline at end of file
diff --git a/docs/guides/dpo.md b/docs/guides/dpo.md
index 46c4d56197..f00dde9f12 100644
--- a/docs/guides/dpo.md
+++ b/docs/guides/dpo.md
@@ -32,130 +32,97 @@ uv run examples/run_dpo.py \
 
 ## Datasets
 
-Each class representing a NeMo RL DPO dataset is expected to have the following attributes:
-1. `formatted_ds`: The dictionary of formatted datasets. This dictionary should contain `train` and `validation` splits, and each split should conform to the format described below.
-2. `task_spec`: The `TaskDataSpec` for this dataset. This should specify the name you choose for this dataset.
-
-DPO datasets are expected to follow a specific format with three key fields:
-- `prompt`: The input prompt/context
-- `chosen_response`: The preferred/winning response
-- `rejected_response`: The non-preferred/losing response
-
-[data/hf_datasets/helpsteer3.py](../../nemo_rl/data/hf_datasets/helpsteer3.py) provides an example of how to format data for DPO:
-
-```python
-def format_helpsteer3(data):
-    response_1 = data["response1"]
-    response_2 = data["response2"]
-    overall_preference = data["overall_preference"]
-
-    if overall_preference < 0:
-        chosen = response_1
-        rejected = response_2
-    elif overall_preference == 0:
-        chosen = response_1
-        rejected = response_1
-    else:
-        chosen = response_2
-        rejected = response_1
-
-    return {
-        "prompt": data["context"],
-        "chosen_response": chosen,
-        "rejected_response": rejected,
+Each DPO dataset class is expected to have the following attributes:
+1. `formatted_ds`: The dictionary of formatted datasets, where each dataset should be formatted like
+```json
+{
+  "context": [], // list of dicts - The prompt message (including previous turns, if any)
+  "completions": [ // list of dicts — The list of completions
+    {
+      "rank": 0, // int — The rank of the completion (lower rank is preferred)
+      "completion": [] // list of dicts — The completion message(s)
+    },
+    {
+      "rank": 1, // int — The rank of the completion (lower rank is preferred)
+      "completion": [] // list of dicts — The completion message(s)
     }
+  ]
+}
 ```
+2. `task_spec`: The `TaskDataSpec` for this dataset. This should specify the name you choose for this dataset.
 
-We also provide a [DPODataset](../../nemo_rl/data/hf_datasets/dpo.py) class that is compatible with jsonl-formatted preference datsets. This class assumes train and validation datasets have been split and processed into the expected format offline. The jsonl files should consist of examples with `prompt`, `chosen_response`, and `rejected_response` keys.
-
-## Adding Custom DPO Datasets
-
-Adding a new DPO dataset is straightforward. Your custom dataset class should:
-1. Implement the required format conversion in the constructor
-2. Set up the appropriate `task_spec`
-
-Here's a minimal example which simply re-keys an existing jsonl dataset:
-
-```{testcode}
-from datasets import load_dataset
-from nemo_rl.data.interfaces import TaskDataSpec
-from docs.helpers import make_dpo_dataset
-
-class CustomDPODataset:
-    def preprocess_dataset(
-        self,
-        data,
-        prompt_key: str = "context",
-        chosen_key: str = "chosen",
-        rejected_key: str = "rejected"
-    ):
-        return {
-            "prompt": data[prompt_key],
-            "chosen_response": data[chosen_key],
-            "rejected_response": data[rejected_key],
+DPO training supports only two completions (where the lowest rank is preferred and the highest one is rejected), with each completion being a single response. For example:
+```json
+{
+    "context": [
+        {
+            "role": "user",
+            "content": "What's the capital of France?"
+        },
+        {
+            "role": "assistant",
+            "content": "The capital of France is Paris."
+        },
+        {
+            "role": "user",
+            "content": "Thanks! And what's the capital of Germany?"
         }
-    
-    def __init__(
-        self,
-        train_data_path: str,
-        val_data_path: str,
-        prompt_key: str,
-        chosen_key: str,
-        rejected_key: str,
-    ):
-        # Load and format your dataset
-        fn_kwargs={
-                "prompt_key": prompt_key, 
-                "chosen_key": chosen_key, 
-                "rejected_key": rejected_key
-            }
-        formatted_ds = {
-            "train": load_dataset("json", data_files=train_data_path, split="train").map(
-                self.preprocess_dataset, 
-                fn_kwargs=fn_kwargs,
-            ),
-            "validation": load_dataset("json", data_files=val_data_path, split="train").map(
-                self.preprocess_dataset, 
-                fn_kwargs=fn_kwargs,
-            ),
+    ],
+    "completions": [
+        {
+            "rank": 0,
+            "completion": [
+                {
+                    "role": "assistant",
+                    "content": "The capital of Germany is Berlin."
+                }
+            ]
+        },
+        {
+            "rank": 1,
+            "completion": [
+                {
+                    "role": "assistant",
+                    "content": "The capital of Germany is Munich."
+                }
+            ]
         }
-        
-        # Initialize task spec with dataset name
-        self.task_spec = TaskDataSpec(
-            task_name="custom_dpo",
-        )
-        self.formatted_ds = formatted_ds
-
-# Create temporary files using helper function
-train_file, val_file = make_dpo_dataset()
-
-# Initialize dataset
-dataset = CustomDPODataset(
-    train_data_path=train_file.name,
-    val_data_path=val_file.name,
-    prompt_key="context",
-    chosen_key="chosen",
-    rejected_key="rejected"
-)
-
-# Test dataset properties
-print(f"Task name: {dataset.task_spec.task_name}")
-print(f"Train examples: {len(dataset.formatted_ds['train'])}")
-print(f"Validation examples: {len(dataset.formatted_ds['validation'])}")
-print(f"First train example prompt: {dataset.formatted_ds['train'][0]['prompt']}")
-print(f"First train example chosen response: {dataset.formatted_ds['train'][0]['chosen_response']}")
-print(f"First train example rejected response: {dataset.formatted_ds['train'][0]['rejected_response']}")
+    ]
+}
 ```
 
-```{testoutput}
-Task name: custom_dpo
-Train examples: 2
-Validation examples: 2
-First train example prompt: What is 2+2?
-First train example chosen response: 4
-First train example rejected response: 5
+By default, NeMo RL has support for [HelpSteer3](../../nemo_rl/data/datasets/preference_datasets/helpsteer3.py) and [Tulu3Preference](../../nemo_rl/data/datasets/preference_datasets/tulu3.py) datasets. Both of these datasets are downloaded from HuggingFace and preprocessed on-the-fly, so there's no need to provide a path to any datasets on disk.
+
+We provide a [PreferenceDataset](../../nemo_rl/data/datasets/preference_datasets/preference_dataset.py) class that is compatible with jsonl-formatted preference datasets for loading datasets from local path or HuggingFace. You can modify your config as follows to use such a custom preference dataset:
+```yaml
+data:
+  dataset_name: PreferenceDataset
+  train_data_path: <PathToTrainingDataset>  # e.g., /path/to/local/dataset.jsonl or hf_org/hf_dataset_name (HuggingFace)
+  # multiple validation sets is supported
+  val_data_paths:
+    <NameOfValidationDataset>: <PathToValidationDataset1>
+    <NameOfValidationDataset2>: <PathToValidationDataset2>
+  train_split: <TrainSplit>, default is None  # used for HuggingFace datasets
+  val_split: <ValSplit>, default is None  # used for HuggingFace datasets
 ```
 
+We also provide a [BinaryPreferenceDataset](../../nemo_rl/data/datasets/preference_datasets/binary_preference_dataset.py) class, which is a simplified version of PreferenceDataset for pairwise ranked preference with single turn completions. You can use `prompt_key`, `chosen_key` and `rejected_key` to specify which fields in your data correspond to the question, chosen answer and rejected answer respectively. Here's an example configuration:
+```yaml
+data:
+  dataset_name: BinaryPreferenceDataset
+  train_data_path: <PathToTrainingDataset>  # e.g., /path/to/local/dataset.jsonl or hf_org/hf_dataset_name (HuggingFace)
+  val_data_path: <PathToValidationDataset>
+  prompt_key: <PromptKey>, default is "prompt"
+  chosen_key: <ChosenKey>, default is "chosen"
+  rejected_key: <RejectedKey>, default is "rejected"
+  train_split: <TrainSplit>, default is None  # used for HuggingFace datasets
+  val_split: <ValSplit>, default is None  # used for HuggingFace datasets
+```
+
+Please note:
+- If you are using a logger, the prefix used for each validation set will be `validation-<NameOfValidationDataset>`. The total validation time, summed across all validation sets, is reported under `timing/validation/total_validation_time`.
+- If you are doing checkpointing, the `metric_name` value in your `checkpointing` config should reflect the metric and validation set to be tracked. For example, `validation-<NameOfValidationDataset1>_loss`.
+
 ## DPO-Specific Parameters
 
 The DPO implementation in NeMo RL supports several key parameters that can be adjusted:
diff --git a/docs/guides/environments.md b/docs/guides/environments.md
new file mode 100644
index 0000000000..be025be164
--- /dev/null
+++ b/docs/guides/environments.md
@@ -0,0 +1,102 @@
+# Environments for GRPO Training
+
+GRPO supports several examples of environments for different tasks. Each environment provides a standardized interface for reward computation and evaluation.
+
+## Math Environment
+
+The Math Environment is designed for mathematical reasoning tasks. It evaluates responses to math problems using `math-verify` and provides rewards based on correctness.
+
+### Key Features
+- Evaluates mathematical reasoning
+- Supports multiple mathematical domains
+- Provides detailed feedback on solution correctness
+
+### Usage
+```python
+from nemo_rl.environments.math_environment import MathEnvironment
+
+env_config = {
+    "num_workers": 2,
+}
+
+math_env = MathEnvironment.remote(env_config)
+```
+
+## Code Environment
+
+The Code Environment is designed for code generation and execution tasks. It provides a sandboxed environment for executing Python code and evaluating the results.
+
+### Usage
+```python
+from nemo_rl.environments.code_environment import CodeEnvironment
+
+env_config = {
+    "num_workers": 2,
+    "terminate_on_evaluation": True,  # Terminate after code execution
+}
+
+code_env = CodeEnvironment.remote(env_config)
+```
+
+### Configuration
+- `num_workers`: Number of parallel workers for code execution
+- `terminate_on_evaluation`: Whether to terminate after code execution (True for single-turn, False for multi-turn)
+
+We’re tracking an end-to-end example of this environment in [#858](https://github.com/NVIDIA-NeMo/RL/issues/858). Add a 👍 to show your interest.
+
+## Reward Model Environment
+
+The Reward Model Environment uses pre-trained reward models to score conversation quality. 
+
+### Usage
+```python
+from nemo_rl.environments.reward_model_environment import RewardModelEnvironment
+
+env_config = {
+    "enabled": True,
+    "model_name": "Skywork/Skywork-Reward-V2-Qwen3-0.6B",
+    "tokenizer": {"name": "Skywork/Skywork-Reward-V2-Qwen3-0.6B"},
+    "precision": "bfloat16",
+    "batch_size": 32,
+    "resources": {"gpus_per_node": 1, "num_nodes": 1},
+    "reward_model_cfg": {
+        "enabled": True,
+        "reward_model_type": "bradley_terry",
+    },
+}
+
+reward_env = RewardModelEnvironment.remote(env_config)
+```
+
+### Resource Allocation in GRPO Training
+
+In GRPO training, resources are allocated across three main components:
+
+- **Policy Actor**: The trained model
+- **Generation Actor**: Used for generating responses during rollouts (can be colocated with policy or on separate nodes/GPUs).
+- **Reward Model Environment Actor**: Evaluates generated responses and computes rewards
+
+The resource allocation logic works as follows:
+
+#### Single-Node Setup (`num_nodes: 1`)
+- All components share the same node
+- GPUs are divided between policy training, generation, and reward model
+- Example: 
+    1. Policy and generation colocated: 8 GPUs total = 4 for colocated policy and generation + 4 for reward model
+    2. Policy and generation non-colocated: 8 GPUs total = 2 for policy + 2 for generation + 4 for reward model
+
+#### Multi-Node Setup (`num_nodes > 1`)
+- Policy training, generation, and reward model environment can be distributed across different nodes
+- Reward model gets dedicated resources as specified in `env.reward_model.resources`
+- Generation gets dedicated resources as specified in `policy.generation.colocated.resources`
+- Remaining nodes are allocated to policy training
+
+In the future, the resource control part will be refactored to enable fine-grained resource configuration for each actor. For detailed resource management and optimization strategies, see [#1100](https://github.com/NVIDIA-NeMo/RL/issues/1100).
+
+### Complete GRPO Training with Reward Model Environments
+
+See [examples/run_grpo_rm.py](../../examples/run_grpo_rm.py) for a complete example of using the reward model environment with GRPO training.
+
+### Configuration Examples
+
+See [examples/configs/grpo_rm_1B.yaml](../../examples/configs/grpo_rm_1B.yaml) for a complete configuration example.
\ No newline at end of file
diff --git a/docs/guides/eval.md b/docs/guides/eval.md
index c82b4bff37..74eee7a004 100644
--- a/docs/guides/eval.md
+++ b/docs/guides/eval.md
@@ -4,10 +4,10 @@ This document explains how to use an evaluation script for assessing model capab
 
 ## Prepare for Evaluation
 
-To prepare for evaluation, first ensure your model is in the correct format, which may involve an optional conversion of PyTorch DCP checkpoints to the Hugging Face format. Following this, you need to prepare the evaluation configuration, which includes defining prompt templates and any custom settings required to run the evaluation.
+To prepare for evaluation, first ensure your model is in the correct format, which may involve an optional conversion of PyTorch DCP checkpoints to the HuggingFace format. Following this, you need to prepare the evaluation configuration, which includes defining prompt templates and any custom settings required to run the evaluation.
 
 ### Convert DCP to HF (Optional)
-If you have trained a model and saved the checkpoint in the Pytorch DCP format, you first need to convert it to the Hugging Face format before running evaluation.
+If you have trained a model and saved the checkpoint in the Pytorch DCP format, you first need to convert it to the HuggingFace format before running evaluation.
 
 Use the `examples/converters/convert_dcp_to_hf.py` script. You'll need the path to the training configuration file (`config.yaml`), the DCP checkpoint directory, and specify an output path for the HF format model.
 
@@ -37,9 +37,9 @@ For open-source models, we recommend setting `tokenizer.chat_template=default`,
 
 ## Run the Evaluation Script
 
-We will use the `run_eval.py` script to run an evaluation using a model directly from the Hugging Face Hub or from a local path that is already in Hugging Face format.
+We will use the `run_eval.py` script to run an evaluation using a model directly from the HuggingFace Hub or from a local path that is already in HuggingFace format.
 
-Note that the evaluation script only supports the Hugging Face format model. If you haven't converted your DCP format model, you should back to [Convert DCP to HF](#convert-dcp-to-hf-optional) and follow the guide to convert your model.
+Note that the evaluation script only supports the HuggingFace format model. If you haven't converted your DCP format model, you should back to [Convert DCP to HF](#convert-dcp-to-hf-optional) and follow the guide to convert your model.
 
 ```sh
 # Run evaluation script with default config (examples/configs/evals/eval.yaml)
@@ -48,14 +48,28 @@ uv run python examples/run_eval.py
 # Run evaluation script with converted model
 uv run python examples/run_eval.py generation.model_name=$PWD/results/grpo/hf
 
+# Run evaluation script with Qwen3 model under thinking mode
+uv run python examples/run_eval.py \
+    generation.model_name=Qwen/Qwen3-8B \
+    generation.temperature=0.6 \
+    generation.top_p=0.95 \
+    generation.top_k=20 \
+    generation.vllm_cfg.max_model_len=38912 \
+    tokenizer.chat_template_kwargs.enable_thinking=true \
+    data.prompt_file=examples/prompts/cot.txt
+
 # Run evaluation script with custom config file
 uv run python examples/run_eval.py --config path/to/custom_config.yaml
 
 # Run evaluation script on one of the supported benchmarks (e.g., GPQA)
 uv run python examples/run_eval.py --config examples/configs/evals/gpqa_eval.yaml
 
-# Run evaluation script with a local dataset that is prefetched as a csv file.
-uv run python examples/run_eval.py --config examples/configs/evals/local_eval.yaml
+# Run evaluation script with a local dataset where the problem and solution keys are "Question" and "Answer" respectively.
+uv run python examples/run_eval.py \
+    --config examples/configs/evals/local_eval.yaml \
+    data.dataset_name=/path/to/local/dataset \
+    data.problem_key=Question \
+    data.solution_key=Answer
 
 # Override specific config values via command line
 # Example: Evaluation of DeepScaleR-1.5B-Preview on MATH-500 using 8 GPUs
@@ -79,9 +93,9 @@ When you complete the evaluation, you will receive a summary similar to the foll
 ```
 ============================================================
 model_name='Qwen2.5-Math-1.5B-Instruct' dataset_name='aime2024'
-max_new_tokens=2048 temperature=0.0 top_p=1.0 top_k=-1
+max_new_tokens=2048 temperature=0.0 top_p=1.0 top_k=-1 seed=42
 
-metric='pass@k' pass_k_value=1 num_tests_per_prompt=1
+metric=pass@1 num_tests_per_prompt=1
 
 score=0.1000 (3.0/30)
 ============================================================
@@ -89,9 +103,10 @@ score=0.1000 (3.0/30)
 
 ## List of currently supported benchmarks
 
-- [AIME-2024](../../nemo_rl/data/eval_datasets/aime2024.py)
-- [GPQA and GPQA-diamond](../../nemo_rl/data/eval_datasets/gpqa.py)
-- [MATH and MATH-500](../../nemo_rl/data/eval_datasets/math.py)
-- [MMLU](../../nemo_rl/data/eval_datasets/mmlu.py): this also includes MMMLU (Multilingual MMLU), a total of 14 languages.
-- [MMLU-Pro](../../nemo_rl/data/eval_datasets/mmlu_pro.py)
+- [AIME-2024 and AIME-2025](../../nemo_rl/data/datasets/eval_datasets/aime.py): the corresponding `data.dataset_name` are `"aime2024"` and `"aime2025"`.
+- [GPQA and GPQA-diamond](../../nemo_rl/data/datasets/eval_datasets/gpqa.py): the corresponding `data.dataset_name` are `"gpqa"` and `"gpqa_diamond"`.
+- [MATH and MATH-500](../../nemo_rl/data/datasets/eval_datasets/math.py): the corresponding `data.dataset_name` are `"math"` and `"math500"`.
+- [MMLU](../../nemo_rl/data/datasets/eval_datasets/mmlu.py): this also includes MMMLU (Multilingual MMLU), a total of 14 languages. When `data.dataset_name` is set to `mmlu`, the English version is used. If one wants to run evaluation on another language, `data.dataset_name` should be set to `mmlu_{language}` where `language` is one of following 14 values, `["AR-XY", "BN-BD", "DE-DE", "ES-LA", "FR-FR", "HI-IN", "ID-ID", "IT-IT", "JA-JP", "KO-KR", "PT-BR", "ZH-CN", "SW-KE", "YO-NG"]`.
+- [MMLU-Pro](../../nemo_rl/data/datasets/eval_datasets/mmlu_pro.py): the corresponding `data.dataset_name` is `"mmlu_pro"`.
 
+More details can be found in [load_eval_dataset](../../nemo_rl/data/datasets/eval_datasets/__init__.py).
diff --git a/docs/guides/grpo-deepscaler.md b/docs/guides/grpo-deepscaler.md
index bc177f2fda..7b62025783 100644
--- a/docs/guides/grpo-deepscaler.md
+++ b/docs/guides/grpo-deepscaler.md
@@ -5,12 +5,12 @@ This guide explains how to use NeMo RL to train long Chain of Thought (CoT) reas
 
 ## Train the Model
 We follow the DeepScaleR recipe and train the model in three stages. In the first stage, we train with an 8K context window. In the second stage, we train with a 16K context window. In the third stage, we train with a 24K context window.
-To train the model using NeMo RL, use the `examples/configs/grpo-deepscaler-1.5b-8K.yaml` config file. This file closely matches the experiment settings in the original DeepScaleR recipe. We then train with `examples/configs/grpo-deepscaler-1.5b-16K.yaml` and `examples/configs/grpo-deepscaler-1.5b-24K.yaml` for the second and third stages, respectively.
+To train the model using NeMo RL, use the `examples/configs/recipes/llm/grpo-deepscaler-1.5b-8K.yaml` config file. This file closely matches the experiment settings in the original DeepScaleR recipe. We then train with `examples/configs/recipes/llm/grpo-deepscaler-1.5b-16K.yaml` and `examples/configs/recipes/llm/grpo-deepscaler-1.5b-24K.yaml` for the second and third stages, respectively.
 
 ```sh
-uv run examples/run_grpo_math.py --config=examples/configs/grpo-deepscaler-1.5b-8K.yaml
-uv run examples/run_grpo_math.py --config=examples/configs/grpo-deepscaler-1.5b-16K.yaml policy.model_name=/path/to/8K/checkpoint/hf
-uv run examples/run_grpo_math.py --config=examples/configs/grpo-deepscaler-1.5b-24K.yaml policy.model_name=/path/to/16K/checkpoint/hf
+uv run examples/run_grpo_math.py --config=examples/configs/recipes/llm/grpo-deepscaler-1.5b-8K.yaml
+uv run examples/run_grpo_math.py --config=examples/configs/recipes/llm/grpo-deepscaler-1.5b-16K.yaml policy.model_name=/path/to/8K/checkpoint/hf
+uv run examples/run_grpo_math.py --config=examples/configs/recipes/llm/grpo-deepscaler-1.5b-24K.yaml policy.model_name=/path/to/16K/checkpoint/hf
 ```
 
 At the end of each stage, you need to specify the Hugging Face checkpoint to continue training with. To get this checkpoint, we convert a model checkpoint to a Hugging Face checkpoint with the following command:
@@ -35,11 +35,17 @@ Throughout training, the checkpoints of the model will be saved to the `results`
 uv run examples/run_eval.py \
     generation.model_name=results/grpo-deepscaler-1.5b-8K/step_240/hf \
     data.prompt_file=examples/prompts/cot.txt \
-    generation.vllm_cfg.max_model_len=32768
+    generation.vllm_cfg.max_model_len=32768 \
+    generation.vllm_cfg.enforce_eager=True \
+    generation.temperature=1.0
 ```
 
 Use `generation.model_name` to specify the path to the Hugging Face checkpoint. In addition, we use AIME24 as the validation dataset and calculate pass@1 on it throughout training.
 
+> [!NOTE]
+> AIME24 only has 30 examples so the accuracy can be very noisy.
+> To reduce the variance consider runing `run_eval.py` with `eval.num_tests_per_prompt=16`.
+
 ## Evaluation Results
 Using the above instructions to train DeepSeek-R1-Distill-Qwen-1.5B on the DeepScaleR dataset, we can track the model's performance on the AIME24 benchmark throughout training. The following plot shows the evaluation metrics as training progresses:
 
diff --git a/docs/guides/grpo-sliding-puzzle.md b/docs/guides/grpo-sliding-puzzle.md
new file mode 100644
index 0000000000..35833aad34
--- /dev/null
+++ b/docs/guides/grpo-sliding-puzzle.md
@@ -0,0 +1,294 @@
+# Solve a Sliding Puzzle Using GRPO
+
+This guide explains how to use Nemo RL to train a model to solve the classic **nxn sliding puzzle** game through multi-turn reinforcement learning. This environment implements a classic **n×n sliding puzzle** where numbered tiles must be arranged in sequential order by sliding them into an empty space.
+
+The sliding puzzle task serves as a simple, yet effective example, to illustrate how multi-turn RL and tool-calling are implemented within Nemo RL. This example provides a minimal setup for understanding the core components of Group Relative Policy Optimization (GRPO) and sequential decision-making.
+
+
+## Quick Start Guide
+
+### 1. Install and Set Up NeMo RL with Megatron Backend (Optional)
+
+To get started, clone and set up the NeMo RL repository by initializing submodules, installing CUDA dependencies, and configuring the environment with uv. Refer to [Prerequisites](https://github.com/NVIDIA-NeMo/RL/tree/main?tab=readme-ov-file#prerequisites) for detailed instructions on installation.
+
+### 2. Train a Model
+
+Train a model to solve the sliding puzzle using GRPO with the default 2×2 configuration.
+
+```bash
+uv run python examples/run_grpo_sliding_puzzle.py 
+```
+
+### 3. Customize Puzzle Configuration
+
+By default, this training script uses the configuration in [grpo_sliding_puzzle.yaml](../../examples/configs/grpo_sliding_puzzle.yaml). You can customize parameters with command-line overrides to experiment with different puzzle sizes or levels of difficulty.
+```bash
+# Train on a 3×3 puzzle with 10 random moves to scramble the board
+uv run python examples/run_grpo_sliding_puzzle.py \
+    env.sliding_puzzle_game.cfg.game_config.size=3 \
+    env.sliding_puzzle_game.cfg.game_config.shuffle_moves=10
+```
+
+### 4. Monitor Progress
+
+You can enable logging via Weights & Biases and TensorBoard to monitor training metrics such as rewards, success rate, and loss curves.
+
+```bash
+# Enable logging (optional)
+uv run examples/run_grpo_sliding_puzzle.py \
+    --config examples/configs/grpo_sliding_puzzle.yaml \
+    logger.wandb_enabled=true \
+    logger.tensorboard_enabled=true
+```
+
+## Game Mechanics
+
+### Puzzle Structure
+
+The sliding puzzle consists of:
+- **Grid**: An `n×n` grid with numbered tiles and one empty space
+- **Tiles**: Numbered from `1` to `n²-1`, placed in random order
+- **Empty Space**: Represented by `0`, typically starting at the bottom-right corner
+- **Goal State**: Sequential arrangement `1, 2, 3, ..., n²-1` with `0` at bottom-right
+
+### Example Data Sample
+```
+===== SLIDING PUZZLE =====
+Arrange the 3x3 grid by sliding tiles into the empty space.
+- The goal is to arrange numbers from 1 to 8 in order
+- Use 'up', 'down', 'left', 'right' to slide in that direction
+- Use 'view' to see the current state of the board
+
+Current Board State:
+
+  +---------+
+1 | 1     3 |
+2 | 4  2  5 |
+3 | 7  8  6 |
+  +---------+
+     1  2  3 
+
+Reach the goal state where numbers are ordered 1 through 8 with the empty space (0) at the bottom right.
+Valid actions: 'up', 'down', 'left', 'right', or 'slide row col' (e.g., 'slide 1 2').
+After thinking, output your chosen action on a new line starting with '<action></action>' like this:
+<action>your_action</action>
+If you just want to see the board, output <action>view</action>
+Think carefully step-by-step before acting.
+
+```
+
+### Movement Rules
+
+1. **Valid Moves**: Only tiles adjacent to the empty space `0` can be moved.
+2. **Movement Direction**: Tiles slide into the empty space, not the other way around.
+3. **Grid Boundaries**: Moves that would go beyond the grid are invalid.
+4. **Single Tile Movement**: Each action affects only one tile at a time.
+
+All actions must be wrapped in XML-style tags and follow one of the formats below:
+```xml
+<action>up</action>          <!-- Slide a tile up into the empty space -->
+<action>slide 2 1</action>   <!-- Slide tile at row 2, column 1 -->
+<action>view</action>        <!-- View the current board state -->
+```
+
+## Data Generation
+
+### Configuration Parameters
+
+Sliding puzzle instances are generated using the following parameters, which can be customized via the configuration file:
+
+```yaml
+env:
+  sliding_puzzle_game:
+    cfg:
+      game_config:
+        size: 5              # Size of the puzzle grid (e.g., 3x3, 4x4, 5x5)
+        shuffle_moves: 4     # Number of random moves to scramble the puzzle
+      max_moves: 40          # Maximum number of moves allowed per episode
+```
+#### Description
+
+- **`size`**: Determines the dimensions of the puzzle board (`n×n`).
+- **`shuffle_moves`**: Controls the initial difficulty by randomly moving tiles to scramble the puzzle.
+- **`max_moves`**: Sets an upper limit on the number of actions the agent can take in one episode.
+
+Grids are generated with sizes ranging from 2 to game_config.size. Each grid starts with a solved state and is shuffled by moving random tiles to the empty space n times, where n is a random number between 1 and `shuffle_moves`. The grid is shuffled using only valid moves. 
+The `generate_puzzle_datum()` function in [run_grpo_sliding_puzzle.py](../../examples/run_grpo_sliding_puzzle.py) is responsible for generating the dataset. [sliding_puzzle.py](../../nemo_rl/environments/games/sliding_puzzle.py) contains the `SlidingPuzzleGameLogic` class, responsible for puzzle generation and initialization logic. The number of shuffle moves and size of the grid will control puzzle difficulty.
+
+#### Generation Algorithm
+The puzzle configuration is randomly generated by sampling the grid size and number of shuffling moves within the defined maximums:
+
+```python
+def generate_random_config(max_config: dict[str, Any]) -> dict[str, Any]:
+    """Generate a random config for the sliding puzzle game."""
+    shuffle_moves = random.randint(1, max_config.get("shuffle_moves"))
+    if shuffle_moves % 2 == 0:
+        shuffle_moves += 1  # Ensure odd number for proper scrambling
+    return {
+        "size": random.randint(2, max_config.get("size", 3)),
+        "shuffle_moves": shuffle_moves,
+    }
+
+      game_config = generate_random_config(game_config)
+      initial_game_state = SlidingPuzzleGameLogic.generate(game_config)
+      initial_render = SlidingPuzzleGameLogic.render(initial_game_state)
+      welcome_message = SlidingPuzzleGameLogic.init(initial_game_state)
+  ```
+
+### Dataset Size Calculation
+
+Dataset size is defined by parameters in grpo_sliding_puzzle.yaml:
+```
+Training Size = num_prompts_per_step × num_generations_per_prompt × max_num_steps
+Validation Size = max_val_samples
+```
+
+### Data Structure
+
+Each training sample is returned as a `DatumSpec` dictionary with the following structure:
+
+```python
+datum: DatumSpec = {
+    "message_log": message_log,              # Conversation history
+    "length": len(tokenized_prompt),         # Token count
+    "extra_env_info": metadata,              # Game state metadata
+    "loss_multiplier": 1.0,                  # Training weight
+    "idx": idx,                              # Sample index
+    "task_name": task_name,                  # Task identifier
+    "stop_strings": ["</action>"],           # Termination tokens
+}
+```
+
+## Environment Interface
+
+<!-- ### Architecture Flow
+
+```
+GRPO Training Pipeline:
+run_grpo_sliding_puzzle.grpo_train → nemo_rl.experience.rollouts.run_multi_turn_rollouts → generate_response + calculate_reward → environments.games.sliding_puzzle.SlidingPuzzleEnv.step
+``` -->
+
+### Core Classes
+
+The [sliding_puzzle.py](../../nemo_rl/environments/games/sliding_puzzle.py) defines the environment and the logic for interacting with the environment. The core classes used are outlined below:
+
+#### SlidingPuzzleEnv
+The SlidingPuzzleEnv class serves as the main environment, implementing a Ray remote actor for distributed processing and using functions from both the SlidingPuzzleGameLogic and SlidingPuzzleRunner classes to interact with the environment.
+
+```python
+@ray.remote
+class SlidingPuzzleEnv(EnvironmentInterface):
+    def __init__(self, cfg: Optional[SlidingPuzzleConfig] = None):
+        """Initialize environment with configuration."""
+        
+    def step(
+        self,
+        message_log_batch: list[LLMMessageLogType],
+        metadata_batch: list[SlidingPuzzleMetadata],
+    ) -> EnvironmentReturn:
+        """Process batch of interactions."""
+```
+
+#### SlidingPuzzleGameLogic
+The SlidingPuzzleGameLogic class defines the core game mechanics through static methods for puzzle operations and includes functionality for reward calculation.
+
+```python
+class SlidingPuzzleGameLogic:
+    @staticmethod
+    def generate(config: dict[str, Any]) -> dict[str, Any]:
+        """Generate new puzzle with specified configuration."""
+        
+    @staticmethod
+    def init(game_state: dict[str, Any]) -> str:
+        """Create welcome message with game rules."""
+        
+    @staticmethod
+    def step(action: str, game_state: dict[str, Any]) -> tuple[str, float, bool, dict[str, Any]]:
+        """Execute action and return (response, reward, terminated, new_state)."""
+        
+    @staticmethod
+    def render(game_state: dict[str, Any]) -> str:
+        """Render current puzzle state as visual grid."""
+```
+
+#### SlidingPuzzleRunner
+
+The SlidingPuzzleRunner class handles turn processing and action management.
+
+```python
+class SlidingPuzzleRunner:
+    def __init__(self):
+        """Initialize runner with no persistent state."""
+        
+    def _parse_action(self, text: str) -> Optional[str]:
+        """Extract action from model response using XML tag parsing."""
+        
+    def process_turn(
+        self,
+        message_log: LLMMessageLogType,
+        metadata: SlidingPuzzleMetadata,
+    ) -> tuple[dict[str, str], float, bool, Optional[list[str]], Optional[SlidingPuzzleMetadata]]:
+        """Process single turn and return (response_dict, reward, terminated, stop_strings, updated_metadata)."""
+```
+
+### Processing Pipeline
+
+The step function creates a processing pipeline where each class handles specific responsibilities:
+
+1. **Parse action** (`SlidingPuzzleRunner`): Extracts the action from the model response using XML tag parsing via the `process_turn` method.
+2. **Validate Move** (`SlidingPuzzleGameLogic`): Checks if the action is valid for the current game state and then executes the move.
+3. **Execute Action** (`SlidingPuzzleGameLogic`): Applies the move to the game state using the `SlidingPuzzleGameLogic.step` method.
+4. **Calculate Reward** (`SlidingPuzzleGameLogic`): Assigns a reward based on progress toward solving the puzzle (step function).
+5. **Return Results** (`SlidingPuzzleEnv`):  Returns the updated interaction state as an `EnvironmentReturn` object.
+
+## Reward System
+
+### Reward Structure
+
+The environment uses a sparse reward scheme designed to encourage complete solution strategies, rather than incremental progress or reward hacking.
+
+| Condition | Reward | Termination |
+|-----------|--------|-------------|
+| Valid move (non-solving) | 0.0 | False |
+| Invalid move | 0.0 | False |
+| Puzzle solved | 1.0 | True |
+| Max moves reached | 0.0 | True |
+| Invalid action format | 0.0 | False |
+
+>Goal: The agent receives a reward only upon successfully solving the puzzle, promoting long-horizon planning.
+
+### Reward Calculation Logic
+
+```python
+def step(action: str, game_state: dict[str, Any]) -> tuple[str, float, bool, dict[str, Any]]:
+    """Process action and calculate reward."""
+    reward = 0.0
+    is_terminated = False
+    
+    if move_made:
+        # Check if puzzle is solved
+        if new_state["grid"] == new_state["solution"]:
+            reward = 1.0
+            is_terminated = True
+        else:
+            reward = 0.0  # No reward for non-solving moves
+    
+    return response, reward, is_terminated, new_state
+```
+## Results
+
+We fine-tuned [`Qwen/Qwen2.5-1.5B-Instruct`](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct) on synthetic data for 120 steps using the following configuration settings:
+
+```
+game_config:
+    size: 5 # Size of the puzzle (e.g., 2 for 2x2, 3 for 3x3)
+    shuffle_moves: 10 # Number of random moves to shuffle the solved state
+max_moves: 30
+```
+
+The figure below displays training rewards vs. steps, along with validation accuracy.
+
+![Training Curve](../assets/train-reward-sliding-puzzle.png)
+
+
+![Validation Accuracy](../assets/valid_acc-sliding-puzzle.png)
diff --git a/docs/guides/grpo.md b/docs/guides/grpo.md
old mode 100644
new mode 100755
index b137d45921..e396e66cd6
--- a/docs/guides/grpo.md
+++ b/docs/guides/grpo.md
@@ -1,6 +1,6 @@
 # An in-depth Walkthrough of GRPO in NeMo RL
 
-This guide details the Group Relative Policy Optimization(GRPO) implementation within NeMo RL. We'll walk through essential aspects including data handling, policy model training, fast generation, and the specifics of the GRPO loss function and its enhancements. 
+This guide details the Group Relative Policy Optimization (GRPO) implementation within NeMo RL. We'll walk through essential aspects including data handling, policy model training, fast generation, and the specifics of the GRPO loss function and its enhancements.
 
 ## Quickstart: Launch a GRPO Run
 
@@ -28,7 +28,7 @@ In this guide, we'll walk through how we handle:
 
 We support training with multiple RL "Environments" at the same time.
 
-An [Environment](../../nemo_rl/environments/interfaces.py) is an object that accepts a state/action history and returns an update state and rewards for the step. They run as Ray Remote Actors. Example [MathEnvironment](../../nemo_rl/environments/math_environment.py).
+An [Environment](../../nemo_rl/environments/interfaces.py) is an object that accepts a state/action history and returns an updated state and rewards for the step. They run as Ray Remote Actors. Example [MathEnvironment](../../nemo_rl/environments/math_environment.py).
 
 To support this, we need to know:
 
@@ -36,6 +36,22 @@ To support this, we need to know:
 * Which data should go to which environments
 * How to prepare the data from your dataset into a form we can use
 
+#### Dataset
+
+By default, NeMo RL has support for [OpenMathInstruct-2](../../nemo_rl/data/datasets/response_datasets/openmathinstruct2.py) and [DeepScaler](../../nemo_rl/data/datasets/response_datasets/deepscaler.py) datasets. Both of these datasets are downloaded from HuggingFace and preprocessed on-the-fly, so there's no need to provide a path to any datasets on disk.
+
+We provide a [ResponseDataset](../../nemo_rl/data/datasets/response_datasets/response_dataset.py) class that is compatible with jsonl-formatted response datasets for loading datasets from local path or HuggingFace. You can use `input_key`, `output_key` to specify which fields in your data correspond to the question and answer respectively. Here's an example configuration:
+```yaml
+data:
+  dataset_name: ResponseDataset
+  train_data_path: <PathToTrainingDataset>  # e.g., /path/to/local/dataset.jsonl or hf_org/hf_dataset_name (HuggingFace)
+  val_data_path: <PathToValidationDataset>
+  input_key: <QuestionKey>, default is "input"
+  output_key: <AnswerKey>, default is "output"
+  train_split: <TrainSplit>, default is None  # used for HuggingFace datasets
+  val_split: <ValSplit>, default is None  # used for HuggingFace datasets
+```
+
 #### Common Data Format
 
 We define a [DatumSpec](../../nemo_rl/data/interfaces.py) that holds all relevant information for each training example:
@@ -55,7 +71,7 @@ class DatumSpec(TypedDict):
 
 We refer to each distinct environment your model aims to optimize against as a "task." For example, you might define tasks like "math" or "code."
 
-For each task, you should provide a data processor that reads from your dataset and returns a [DatumSpec](../../nemo_rl/data/interfaces.py)
+For each task, you should provide a data processor that reads from your dataset and returns a [DatumSpec](../../nemo_rl/data/interfaces.py).
 
 ```python
 def my_data_processor(
@@ -67,9 +83,9 @@ def my_data_processor(
 ) -> DatumSpec:
 ```
 
-We have an example of this as `math_data_processor` in [processors.py](../../nemo_rl/data/processors.py)
+We have an example of this as `math_data_processor` in [processors.py](../../nemo_rl/data/processors.py).
 
-#### Putting it all together
+#### Putting It All Together
 
 GRPO expects datasets to have the following form:
 
@@ -81,7 +97,7 @@ Then, you can set the data up as follows:
 
 ```python
 base_dataset = load_dataset("json", data_files=data_config["dataset_name"])["train"]
-tokenizer = AutoTokenizer.from_pretrained(policy_config["model_name"])
+tokenizer = get_tokenizer(tokenizer_config)
 
 task_data_processors = defaultdict(lambda: (math_task_spec, math_data_processor))
 task_data_processors["math"] = (math_task_spec, math_data_processor)
@@ -99,6 +115,12 @@ dataset = AllTaskProcessedDataset(
 
 Ensure you provide a mapping of tasks to their processors so the dataset knows which processor to use when handling samples.
 
+## Environments
+
+GRPO supports various types of environments for different tasks, including **[Math](../../nemo_rl/environments/math_environment.py)**, **[Code](../../nemo_rl/environments/code_environment.py)**, and **[Reward Model](../../nemo_rl/environments/reward_model_environment.py)** environments. Each environment provides a standardized interface for reward computation and evaluation, enabling consistent training across diverse domains.
+
+For more information about environments, see the [Environments Guide](environments.md).
+
 ## Policy Model
 
 We define a {py:class}`PolicyInterface]() <nemo_rl.models.interfaces>` that contains everything you need to train a Policy model.
@@ -107,9 +129,9 @@ This Policy object holds a [RayWorkerGroup](../../nemo_rl/distributed/worker_gro
 
 ## Fast Generation
 
-We support vLLM through the [VllmGeneration](../../nemo_rl/models/generation/vllm.py) class right now.
+We support vLLM through the [VllmGeneration](../../nemo_rl/models/generation/vllm/vllm_generation.py) class right now.
 
-The function [grpo_train](../../nemo_rl/algorithms/grpo.py) contains the core GRPO training loop.
+The function, [grpo_train](../../nemo_rl/algorithms/grpo.py), contains the core GRPO training loop.
 
 ## Performance Optimizations
 
@@ -131,7 +153,7 @@ where:
 - $\beta$ is the KL penalty coefficient
 - $\pi_{\text{ref}}$ is the reference policy
 
-Also supports "Dual-Clipping" from https://arxiv.org/pdf/1912.09729, which
+It also supports "Dual-Clipping" from https://arxiv.org/pdf/1912.09729, which
 imposes an additional upper bound on the probability ratio when advantages are negative.
 This prevents excessive policy updates. $rA \ll 0$ -> $cA$(clipped).
 The loss function is modified to the following when A_t < 0:
@@ -141,15 +163,14 @@ L(\theta) = E_t \Big[ \max \Big( \min \big(r_t(\theta) A_t, \text{clip}(r_t(\the
 $$
 
 where:
-- c is the dual-clip parameter (ratio_clip_c), which must be greater than 1 and is
-    usually set as 3 empirically
-- $r_t(\theta)$ is the ratio $\frac{\pi_\theta(x)}{\pi_{\theta_{\text{old}}}(x)}$ that measures how much the policy has change
+- c is the dual-clip parameter (ratio_clip_c), which must be greater than 1 and is usually set as 3 empirically
+- $r_t(\theta)$ is the ratio $\frac{\pi_\theta(x)}{\pi_{\theta_{\text{old}}}(x)}$ that measures how much the policy has changed
 
-### Improvements to the GRPO loss formulation for stability and accuracy
+### Improvements to the GRPO Loss Formulation for Stability and Accuracy
 
-#### On-Policy KL Approximation (use_on_policy_kl_approximation)
+#### On-Policy KL Approximation
 
-In practice, we calculate the KL divergence using the estimator from Schulman 2020 (http://joschu.net/blog/kl-approx.html), which is unbiased and guaranteed to be positive.
+This feature is controlled by the parameter `use_on_policy_kl_approximation`. It enables the use of an estimator for KL divergence based on [Schulman (2020)](http://joschu.net/blog/kl-approx.html), which is both unbiased and guaranteed to be positive.
 
 $$
 D_{\text{KL}} (\pi_\theta || \pi_\text{ref}) \approx E_{x \sim \pi_{\theta}} \Big[ \frac{\pi_\text{ref}(x)}{\pi_\theta(x)} - \log \frac{\pi_\text{ref}(x)}{\pi_\theta(x)} - 1 \Big]
@@ -169,8 +190,8 @@ $$
 To enable the on-policy KL approximation, set the config `use_on_policy_kl_approximation=True` in the `ClippedPGLossConfig`. By default, we set this config to False to align with standard GRPO.
 
 
-#### Importance Sampling Correction (use_importance_sampling_correction)
-The policy we use to draw samples, $\pi_{\theta_{\text{old}}}$, is used in both the inference framework and the training framework. To account for this distinction, we refer to the inference framework policy as $\pi_{\text{inference}}$ and the training framework policy as $\pi_{\text{training}}$. As noted in [Adding New Models](../adding-new-models.md#understand-discrepancies-between-backends), it is possible for the token probabilities from $\pi_{\text{training}}$ and $\pi_{\text{inference}}$ to have discrepancies (from numerics, precision differences, bugs, etc.), leading to off-policy samples. We can correct for this by introducing importance weights between $\pi_{\text{training}}$ and $\pi_{\text{inference}}$ to the first term of the loss function. 
+#### Importance Sampling Correction
+This feature is controlled by the parameter `use_importance_sampling_correction`. It applies importance sampling to adjust for discrepancies between the behavior policy and the target policy, improving the accuracy of off-policy estimates. The policy we use to draw samples, $\pi_{\theta_{\text{old}}}$, is used in both the inference framework and the training framework. To account for this distinction, we refer to the inference framework policy as $\pi_{\text{inference}}$ and the training framework policy as $\pi_{\text{training}}$. As noted in [Adding New Models](../adding-new-models.md#understand-discrepancies-between-backends), it is possible for the token probabilities from $\pi_{\text{training}}$ and $\pi_{\text{inference}}$ to have discrepancies (from numerics, precision differences, bugs, etc.), leading to off-policy samples. We can correct for this by introducing importance weights between $\pi_{\text{training}}$ and $\pi_{\text{inference}}$ to the first term of the loss function. 
 
 Let $f_\theta(x) = \min \Big(\frac{\pi_\theta(x)}{\pi_{\theta_{\text{old}}}(x)}A_t, \text{clip} \big( \frac{\pi_\theta(x)}{\pi_{\theta_{\text{old}}}(x)}, 1 - \varepsilon, 1 + \varepsilon \big) A_t \Big)$ represent the first term of loss function. Then,
 
@@ -187,11 +208,48 @@ By multiplying the first term of the loss function by the importance weights $\f
 To enable the importance sampling correction, set the config `use_importance_sampling_correction=True` in the `ClippedPGLossConfig`. By default, we set this config to False to align with standard GRPO.
 
 
-## Metrics ({wandb, tb}_name)
-We track a few metrics during training for scientific experimentation and to validate correctness as the run progresses.
+#### Overlong Filtering
+
+This feature is controlled by the parameter `overlong_filtering`. It filters out sequences that exceed a predefined maximum length, helping maintain computational efficiency and model stability. When `overlong_filtering=True`, samples that reach `max_total_sequence_length` without producing an end-of-text token are excluded from loss computation. This reduces noise from penalizing generations that may be high-quality but exceed the sequence length limit.
 
-### Multiplicative Token Probability Error (token_mult_prob_error)
-This is equal to the 'Logprob consistency metric' defined in [Adding New Models](../adding-new-models.md#importance-of-log-probability-consistency-in-training-and-inference):
+The implementation modifies the loss calculation as follows:
+
+For each sample $i$ in the batch:
+
+$$
+\text{truncated}_i = \begin{cases} 
+1 & \text{if sample } i \text{ reached max length without EOS} \\ 
+0 & \text{otherwise} 
+\end{cases}
+$$
+
+The sample mask becomes (let m_i denote the sample mask and ℓ_i denote the loss multiplier):
+
+$$
+m_i = \ell_i \cdot (1 - \text{truncated}_i)
+$$
+
+This results in the effective loss:
+
+$$
+L_{\text{effective}} = \sum_{i} m_i \cdot L_i
+$$
+
+where $L_i$ is the per-sample loss. Truncated samples contribute 0 to the gradient update while remaining in the batch for reward baseline calculations.
+
+To configure:
+```yaml
+grpo:
+  overlong_filtering: false  # default
+```
+
+Set `overlong_filtering` to true when training on tasks where truncation at the maximum sequence length is expected, such as long-form reasoning or mathematical proofs.
+
+## Metrics
+This feature is controlled by the parameters `wandb_name` and `tb_name`. We track a few metrics during training for scientific experimentation and to validate correctness as the run progresses.
+
+### Multiplicative Token Probability Error
+This feature is controlled by the parameter `token_mult_prob_error`. It measures the error introduced when token probabilities are scaled multiplicatively, which can affect model calibration and output consistency. This is equal to the 'Logprob consistency metric' defined in [Adding New Models](../adding-new-models.md#importance-of-log-probability-consistency-in-training-and-inference):
 
 $$
 \text{token-mult-prob-error} = \frac{1}{n}\sum_{i=1}^{n\text{(tokens)}}\exp\left(\left\|\text{log-train-fwk}_i - \text{logprobs-inference-fwk}_i\right\|\right)
@@ -199,24 +257,56 @@ $$
 
 Intuitively, this measures the average multiplicative probability error for sampled tokens, where samples are drawn as $x \sim \pi_{\text{inference-framework}}$. The purpose of this is to highlight any obvious sampling errors or discrepencies between the inference backend and training framework. If it trends upward steeply over the course of training past $\sim 1-2\%$, there is usually a problem with how your weights are being updated. If very spiky, it can indicate a bug in the inference framework or buggy weight refitting.
 
-### Sampling Importance Ratio (sampling_importance_ratio)
-Not to be confused with the clipped importance ratio in PPO/GRPO, this is the importance ratio between $\pi_{\text{training}}$ and $\pi_{\text{inference}}$.
+### KL Divergence Error
+This feature is controlled by the following metrics:
+* `gen_kl_error`: $D_{\text{KL}}(P_{gen} || P_{policy})$
+  - the generation distribution as ground truth
+* `policy_kl_error`: $D_{\text{KL}}(P_{policy} || P_{gen})$
+  - the policy (training) distribution as ground truth
+* `js_divergence_error` or (Jensen–Shannon divergence): $(D_{\text{KL}}(P_{policy} || P_{m}) + D_{\text{KL}}(P_{gen} || P_{m})) / 2$, where $P_{m} = (P_{policy} + P_{gen}) / 2$
+  - uses the mean mixture distribution as reference
+
+According to the paper [When Speed Kills Stability: Demystifying RL Collapse from the Training-Inference Mismatch](https://yingru.notion.site/When-Speed-Kills-Stability-Demystifying-RL-Collapse-from-the-Training-Inference-Mismatch-271211a558b7808d8b12d403fd15edda), `gen_kl_error` was introduced (referred to as `vllm-kl` in the paper) as the key metric to measure mismatch between policy and generation distribution. Empirically, the mismatch is approximately 1e-3, and the divergence is bigger for low-probability tokens as predicted by the generation inference engine (like vLLM).
+
+The three divergence metrics provide complementary perspectives on distribution mismatch. For example:
+
+We observed a case where vLLM assigned a disproportionately high probability to a single rare token, causing significant logprob error spikes (especially in MoE architectures):
+
+```text
+# extreme example
+1. Position 4559: 'au' (ID: 1786)
+   logp_gen     (from vLLM):      -5.xxx
+   logp_policy (from Mcore):      -15.xxx
+```
+Assuming other tokens have near-zero divergence, this single token's metrics with `kl_type=k3` are:
+
+* `gen_kl_error`: exp(-15 + 5) - (-15 + 5) - 1 ≈ 9 (moderate mismatch)
+* `policy_kl_error`: exp(-5 + 15) - (-5 + 15) - 1 ≈ 22,015 (severe mismatch dominating the metric)
+* `js_divergence_error`: ≈ 9, close to `gen_kl_error` since the mixture distribution (~-5.69) is dominated by the higher-probability value (logp_gen in this example)
+
+Ideally, all KL divergence metrics should be close to 0, with values below 1e-3 considered acceptable. Investigate any metric that shows spikes above this threshold.
+
+### Sampling Importance Ratio
+This feature is controlled by the parameter `sampling_importance_ratio`. It adjusts the weighting of samples based on the ratio between the target policy and the behavior policy, helping to correct for distributional shift in off-policy learning. Not to be confused with the clipped importance ratio in PPO/GRPO, this is the importance ratio between $\pi_{\text{training}}$ and $\pi_{\text{inference}}$.
 
 This is simply $\frac{1}{|T|}\sum_{t \in \text{tokens}}\text{exp}(\text{log}(\pi_{\text{training}}(t)) - \text{log}(\pi_{\text{inference}}(t)))$
 
-Similar to [Multiplicative Token Probability Error](#multiplicative-token-probability-error-token_mult_prob_error), this is a measure of how far off your inference backend is from your training framework. However, this metric is meant to find the bias in that error instead of loosely the variance as it does not take the absolute value of the error. With some noise, this should hover around 1.
+Similar to [Multiplicative Token Probability Error](#multiplicative-token-probability-error), this is a measure of how far off your inference backend is from your training framework. However, this metric is meant to find the bias in that error instead of loosely the variance as it does not take the absolute value of the error. With some noise, this should hover around 1.
 
-This metric is always calculated and the per-token version (without the mean) is used in the loss function when [Importance Sampling Correction](#importance-sampling-correction-use_importance_sampling_correction) is enabled.
+This metric is always calculated and the per-token version (without the mean) is used in the loss function when [Importance Sampling Correction](#importance-sampling-correction) is enabled.
 
-### Entropy (approx_entropy)
-We roughly approximate the entropy of the LLM's distribution throughout training by calculating:
+### Entropy
+This feature is controlled by the parameter `approx_entropy`. It estimates the entropy of the policy distribution, which can be used to encourage exploration and prevent premature convergence during training. We roughly approximate the entropy of the LLM's distribution throughout training by calculating:
 
 $$
 E_{s \sim \pi_{\text{inference}}(x)}[-\frac{\pi_{\text{training}}(x)}{\pi_{\text{inference}}(x)}log(\pi_{\text{training}}(x))]
 $$
-using the rollouts in each training global batch as Monte-Carlo samples. The ratio of $\pi$ is in the formula to importance-correct for the mismatch between the policy over the course of training in a singular GRPO step and the inference framework.
 
-We use this to track if our models are entropy-collapsing too quickly during training (as is quite common). This is a pretty rough monte-carlo approximation, so we wouldn't recommend using this directly for an entropy bonus or otherwise backpropagating through this. You can take a look at NeMo-Aligner's [implementation](https://github.com/NVIDIA/NeMo-Aligner/blob/main/nemo_aligner/utils/distributed.py#L351) of a full entropy calculation if you're interested (WIP efficient calculation in NeMo-RL).
+This expectation is estimated using the rollouts in each global training batch as Monte Carlo samples. The ratio of $\pi$ values in the formula serves to importance-correct for the mismatch between the training policy during a single GRPO step and the inference-time policy used to sample states.
+
+We use this to track if our models are entropy-collapsing too quickly during training (as is quite common). This is a pretty rough Monte Carlo approximation, so we wouldn't recommend using this directly for an entropy bonus or otherwise backpropagating through this. You can take a look at NeMo Aligner's [implementation](https://github.com/NVIDIA/NeMo-Aligner/blob/main/nemo_aligner/utils/distributed.py#L351) of a full entropy calculation if you're interested (WIP efficient calculation in NeMo RL).
+
+
 
 
 ## Evaluate the Trained Model
diff --git a/docs/guides/rm.md b/docs/guides/rm.md
new file mode 100644
index 0000000000..f5deb05f0d
--- /dev/null
+++ b/docs/guides/rm.md
@@ -0,0 +1,171 @@
+# Reward Model Training in NeMo RL
+
+This document explains how to train reward models (RM) within NeMo RL. Currently, only Bradley-Terry reward models are supported on the DTensor backend. Megatron backend support is tracked [here](https://github.com/NVIDIA-NeMo/RL/issues/720).
+
+## Launch a Training Job
+
+The script, [examples/run_rm.py](../../examples/run_rm.py), is used to train a Bradley-Terry reward model. This script can be launched either locally or via Slurm. For details on how to set up Ray and launch a job using Slurm, refer to the [cluster documentation](../cluster.md).
+
+Be sure to launch the job using `uv`. The command to launch a training job is as follows:
+
+```bash
+uv run examples/run_rm.py
+
+# Can also add overrides on CLI, like changing the config or changing the model
+uv run examples/run_rm.py --config examples/configs/rm.yaml policy.model_name=Qwen/Qwen2.5-1.5B
+```
+
+The default YAML config shares the same base template as the SFT config but includes a new `reward_model_cfg` section with `enabled: true` to load the model as a Reward Model. You can find an example RM config file at [examples/configs/rm.yaml](../../examples/configs/rm.yaml).
+
+**Reminder**: Set your `HF_HOME`, `WANDB_API_KEY`, and `HF_DATASETS_CACHE` (if needed). Make sure to log in using `huggingface-cli` if you're working with Llama models.
+
+## Datasets
+
+Each RM dataset class is expected to have the following attributes:
+1. `formatted_ds`: The dictionary of formatted datasets, where each dataset should be formatted like
+```json
+{
+  "context": [], // list of dicts - The prompt message (including previous turns, if any)
+  "completions": [ // list of dicts — The list of completions
+    {
+      "rank": 0, // int — The rank of the completion (lower rank is preferred)
+      "completion": [] // list of dicts — The completion message(s)
+    },
+    {
+      "rank": 1, // int — The rank of the completion (lower rank is preferred)
+      "completion": [] // list of dicts — The completion message(s)
+    }
+  ]
+}
+```
+2. `task_spec`: The `TaskDataSpec` for this dataset. This should specify the name you choose for this dataset.
+
+Currently, RM training supports only two completions (where the lowest rank is preferred and the highest one is rejected), with each completion being a single response. For example:
+```json
+{
+    "context": [
+        {
+            "role": "user",
+            "content": "What's the capital of France?"
+        },
+        {
+            "role": "assistant",
+            "content": "The capital of France is Paris."
+        },
+        {
+            "role": "user",
+            "content": "Thanks! And what's the capital of Germany?"
+        }
+    ],
+    "completions": [
+        {
+            "rank": 0,
+            "completion": [
+                {
+                    "role": "assistant",
+                    "content": "The capital of Germany is Berlin."
+                }
+            ]
+        },
+        {
+            "rank": 1,
+            "completion": [
+                {
+                    "role": "assistant",
+                    "content": "The capital of Germany is Munich."
+                }
+            ]
+        }
+    ]
+}
+```
+
+By default, NeMo RL has support for [HelpSteer3](../../nemo_rl/data/datasets/preference_datasets/helpsteer3.py) and [Tulu3Preference](../../nemo_rl/data/datasets/preference_datasets/tulu3.py) datasets. Both of these datasets are downloaded from HuggingFace and preprocessed on-the-fly, so there's no need to provide a path to any datasets on disk.
+
+We provide a [PreferenceDataset](../../nemo_rl/data/datasets/preference_datasets/preference_dataset.py) class that is compatible with jsonl-formatted preference datasets for loading datasets from local path or HuggingFace.. You can modify your config as follows to use such a custom preference dataset:
+```yaml
+data:
+  dataset_name: PreferenceDataset
+  train_data_path: <PathToTrainingDataset>  # e.g., /path/to/local/dataset.jsonl or hf_org/hf_dataset_name (HuggingFace)
+  # multiple validation sets is supported
+  val_data_paths:
+    <NameOfValidationDataset>: <PathToValidationDataset1>
+    <NameOfValidationDataset2>: <PathToValidationDataset2>
+  train_split: <TrainSplit>, default is None  # used for HuggingFace datasets
+  val_split: <ValSplit>, default is None  # used for HuggingFace datasets
+```
+
+We also provide a [BinaryPreferenceDataset](../../nemo_rl/data/datasets/preference_datasets/binary_preference_dataset.py) class, which is a simplified version of PreferenceDataset for pairwise ranked preference with single turn completions. You can use `prompt_key`, `chosen_key` and `rejected_key` to specify which fields in your data correspond to the question, chosen answer and rejected answer respectively. Here's an example configuration:
+```yaml
+data:
+  dataset_name: BinaryPreferenceDataset
+  train_data_path: <PathToTrainingDataset>  # e.g., /path/to/local/dataset.jsonl or hf_org/hf_dataset_name (HuggingFace)
+  val_data_path: <PathToValidationDataset>
+  prompt_key: <PromptKey>, default is "prompt"
+  chosen_key: <ChosenKey>, default is "chosen"
+  rejected_key: <RejectedKey>, default is "rejected"
+  train_split: <TrainSplit>, default is None  # used for HuggingFace datasets
+  val_split: <ValSplit>, default is None  # used for HuggingFace datasets
+```
+
+Please note:
+- If you are using a logger, the prefix used for each validation set will be `validation-<NameOfValidationDataset>`. The total validation time, summed across all validation sets, is reported under `timing/validation/total_validation_time`.
+- If you are doing checkpointing, the `metric_name` value in your `checkpointing` config should reflect the metric and validation set to be tracked. For example, `validation-<NameOfValidationDataset1>_loss`.
+
+## Using Reward Models as Environments
+
+Trained reward models can be used as environments in GRPO training for reinforcement learning from human feedback (RLHF). This allows you to use your trained reward model to provide rewards during policy optimization.
+
+### Reward Model Environment
+
+The Reward Model Environment provides a standardized interface for using trained reward models in RL training:
+
+```python
+from nemo_rl.environments.reward_model_environment import RewardModelEnvironment
+
+env_config = {
+    "enabled": True,
+    "model_name": "path/to/your/trained/reward/model",
+    "tokenizer": {"name": "path/to/your/trained/reward/model"},
+    "precision": "bfloat16",
+    "batch_size": 32,
+    "resources": {"gpus_per_node": 1, "num_nodes": 1},
+    "reward_model_cfg": {
+        "enabled": True,
+        "reward_model_type": "bradley_terry",
+    },
+}
+
+reward_env = RewardModelEnvironment.remote(env_config)
+```
+
+### Integration with GRPO
+
+To use your trained reward model with GRPO, you can use the [examples/run_grpo_rm.py](../../examples/run_grpo_rm.py) script:
+
+```bash
+# Run GRPO training with your trained reward model
+uv run examples/run_grpo_rm.py --config examples/configs/grpo_rm_1B.yaml
+```
+
+### Configuration
+
+In your GRPO configuration, specify the reward model environment:
+
+```yaml
+env:
+  reward_model:
+    enabled: true
+    model_name: "path/to/your/trained/reward/model"
+    tokenizer:
+      name: "path/to/your/trained/reward/model"
+    precision: "bfloat16"
+    batch_size: 32
+    resources:
+      gpus_per_node: 1
+      num_nodes: 1
+    reward_model_cfg:
+      enabled: true
+      reward_model_type: "bradley_terry"
+```
+
diff --git a/docs/guides/sft.md b/docs/guides/sft.md
index e272c6d9d6..982052f074 100644
--- a/docs/guides/sft.md
+++ b/docs/guides/sft.md
@@ -34,7 +34,7 @@ SFT datasets in NeMo RL are encapsulated using classes. Each SFT data class is e
   1. `formatted_ds`: The dictionary of formatted datasets. This dictionary should contain `train` and `validation` splits, and each split should conform to the format described below.
   2. `task_spec`: The `TaskDataSpec` for this dataset. This should specify the name you choose for this dataset.
 
-SFT datasets are expected to follow the Hugging Face chat format. Refer to the [chat dataset document](../design-docs/chat-datasets.md) for details. If your data is not in the correct format, simply write a preprocessing script to convert the data into this format. [data/hf_datasets/squad.py](../../nemo_rl/data/hf_datasets/squad.py) has an example:
+SFT datasets are expected to follow the HuggingFace chat format. Refer to the [chat dataset document](../design-docs/chat-datasets.md) for details. If your data is not in the correct format, simply write a preprocessing script to convert the data into this format. [response_datasets/squad.py](../../nemo_rl/data/datasets/response_datasets/squad.py) has an example:
 
 ```python
 def format_squad(data):
@@ -56,7 +56,7 @@ def format_squad(data):
     }
 ```
 
-NeMo RL SFT uses Hugging Face chat templates to format the individual examples. Three types of chat templates are supported, which can be configured via `tokenizer.chat_template` in your yaml config (see [sft.yaml](../../examples/configs/sft.yaml) for an example):
+NeMo RL SFT uses HuggingFace chat templates to format the individual examples. Three types of chat templates are supported, which can be configured via `tokenizer.chat_template` in your yaml config (see [sft.yaml](../../examples/configs/sft.yaml) for an example):
 
 1. Apply the tokenizer's default chat template. To use the tokenizer's default, either omit `tokenizer.chat_template` from the config altogether, or set `tokenizer.chat_template="default"`.
 2. Use a "passthrough" template which simply concatenates all messages. This is desirable if the chat template has been applied to your dataset as an offline preprocessing step. In this case, you should set `tokenizer.chat_template` to None as follows:
@@ -71,8 +71,89 @@ NeMo RL SFT uses Hugging Face chat templates to format the individual examples.
     custom_template: "{% for message in messages %}{%- if message['role'] == 'system'  %}{{'Context: ' + message['content'].strip()}}{%- elif message['role'] == 'user'  %}{{' Question: ' + message['content'].strip() + ' Answer: '}}{%- elif message['role'] == 'assistant'  %}{{message['content'].strip()}}{%- endif %}{% endfor %}"
     ```
 
+By default, NeMo RL has support for [OpenAssistant](../../nemo_rl/data/datasets/response_datasets/oasst.py), [Squad](../../nemo_rl/data/datasets/response_datasets/squad.py) and [OpenMathInstruct-2](../../nemo_rl/data/datasets/response_datasets/openmathinstruct2.py) datasets. All of these datasets are downloaded from HuggingFace and preprocessed on-the-fly, so there's no need to provide a path to any datasets on disk.
+
+We provide a [ResponseDataset](../../nemo_rl/data/datasets/response_datasets/response_dataset.py) class that is compatible with jsonl-formatted response datasets for loading datasets from local path or HuggingFace. You can use `input_key`, `output_key` to specify which fields in your data correspond to the question and answer respectively. Here's an example configuration:
+```yaml
+data:
+  dataset_name: ResponseDataset
+  train_data_path: <PathToTrainingDataset>  # e.g., /path/to/local/dataset.jsonl or hf_org/hf_dataset_name (HuggingFace)
+  val_data_path: <PathToValidationDataset>
+  input_key: <QuestionKey>, default is "input"
+  output_key: <AnswerKey>, default is "output"
+  train_split: <TrainSplit>, default is None  # used for HuggingFace datasets
+  val_split: <ValSplit>, default is None  # used for HuggingFace datasets
+```
+
+### OpenAI Format Datasets (with Tool Calling Support)
+
+NeMo RL also supports datasets in the OpenAI conversation format, which is commonly used for chat models and function calling. This format is particularly useful for training models with tool-use capabilities.
+
+#### Basic Usage
+
+To use an OpenAI format dataset, configure your YAML as follows:
+
+```yaml
+data:
+  dataset_name: openai_format
+  train_data_path: "/path/to/train.jsonl"  # Path to training data
+  val_data_path: "/path/to/val.jsonl"      # Path to validation data
+  chat_key: "messages"                     # Key for messages in the data (default: "messages")
+  system_key: null                         # Key for system message in the data (optional)
+  system_prompt: null                      # Default system prompt if not in data (optional)
+  tool_key: "tools"                        # Key for tools in the data (default: "tools")
+  use_preserving_dataset: false            # Set to true for heterogeneous tool schemas (see below)
+```
+
+#### Data Format
+
+Your JSONL files should contain one JSON object per line with the following structure:
+
+```json
+{
+  "messages": [
+    {"role": "system", "content": "You are a helpful assistant."},
+    {"role": "user", "content": "What's the weather in Paris?"},
+    {"role": "assistant", "content": "I'll check the weather for you.", "tool_calls": [
+      {"name": "get_weather", "arguments": {"city": "Paris", "unit": "celsius"}}
+    ]},
+    {"role": "tool", "content": "22°C, sunny", "tool_call_id": "call_123"},
+    {"role": "assistant", "content": "The weather in Paris is currently 22°C and sunny."}
+  ],
+  "tools": [
+    {
+      "name": "get_weather",
+      "description": "Get current weather for a city",
+      "parameters": {
+        "city": {"type": "string", "description": "City name"},
+        "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}
+      }
+    }
+  ]
+}
+```
+
+#### Tool Calling with Heterogeneous Schemas
+
+When your dataset contains tools with different argument structures (heterogeneous schemas), you should enable `use_preserving_dataset: true` to avoid data corruption:
+
+```yaml
+data:
+  dataset_name: openai_format
+  ...
+  use_preserving_dataset: true  # IMPORTANT: Enable this for tool calling datasets
+```
+
+**Why this matters:** Standard HuggingFace dataset loading enforces uniform schemas by adding `None` values for missing keys. For example:
+- Tool A has arguments: `{"query": "search term"}`
+- Tool B has arguments: `{"expression": "2+2", "precision": 2}`
+
+Without `use_preserving_dataset: true`, the loader would incorrectly add:
+- Tool A becomes: `{"query": "search term", "expression": None, "precision": None}`
+- Tool B becomes: `{"query": None, "expression": "2+2", "precision": 2}`
+
+This corrupts your training data and can lead to models generating invalid tool calls. The `PreservingDataset` mode maintains the exact structure of each tool call.
 
-By default, NeMo RL has support for `Squad` and `OpenAssistant` datasets. Both of these datasets are downloaded from Hugging Face and preprocessed on-the-fly, so there's no need to provide a path to any datasets on disk.
 
 Adding a new dataset is a straightforward process.
 As long as your custom dataset has the `formatted_ds` and `task_spec` attributes described above, it can serve as a drop-in replacement for Squad and OpenAssistant.
diff --git a/docs/guides/use-custom-vllm.md b/docs/guides/use-custom-vllm.md
index ff196e0e53..61d686e659 100644
--- a/docs/guides/use-custom-vllm.md
+++ b/docs/guides/use-custom-vllm.md
@@ -7,61 +7,75 @@ This guide explains how to use your own version of vLLM while leveraging a pre-c
 Clone your vLLM fork and build it using the provided script. For example:
 
 ```sh
-# Usage: bash tools/build-custom-vllm.sh <GIT_URL> <GIT_BRANCH> <VLLM_PRECOMILED_WHEEL_COMMIT>
-bash tools/build-custom-vllm.sh https://github.com/terrykong/vllm.git terryk/demo-custom-vllm a3319f4f04fbea7defe883e516df727711e516cd
-```
-## Update `pyproject.toml` to Use Your Local vLLM
-Edit your [pyproject.toml](https://github.com/NVIDIA-NeMo/RL/blob/main/pyproject.toml) so that the  `vLLM`  dependency points to your local clone instead of PyPI.
-
-**Change the pyproject.toml:**
-```toml
-# Add setuptools_scm
-[project]
-# ...<OMITTED>
-dependencies = [
-# ...<OMITTED>
-    "setuptools_scm",  # <-- Add
-# ...<OMITTED>
-]
-
-# Change the vLLM dependency:
-
-[project.optional-dependencies]
-vllm = [
-    #"vllm==0.9.0",  # <-- BEFORE
-    "vllm",          # <-- AFTER
-]
-
-# ...<OMITTED>
-
-# Add a local source entry:
-[tool.uv.sources]
-# ...<OMITTED>
-vllm = { path = "3rdparty/vllm", editable = true }  # <-- ADD AN ENTRY
-
-# ...<OMITTED>
-
-# Update build isolation packages:
-[tool.uv]
-no-build-isolation-package = ["transformer-engine-torch", "transformer-engine"]          # <-- BEFORE
-no-build-isolation-package = ["transformer-engine-torch", "transformer-engine", "vllm"]  # <-- AFTER
-```
-## Re-Lock and Install Dependencies
-Install any missing build dependencies and re-lock your environment:
+# Usage: bash tools/build-custom-vllm.sh <GIT_URL> <GIT_REF> <VLLM_WHEEL_COMMIT>
+bash tools/build-custom-vllm.sh https://github.com/terrykong/vllm.git terryk/demo-custom-vllm d8ee5a2ca4c73f2ce5fdc386ce5b4ef3b6e6ae70
 
-```sh
-uv pip install setuptools_scm  # vLLM doesn't declare this build dependency so we install it manually
-uv lock
+# [INFO] pyproject.toml updated. NeMo RL is now configured to use the local vLLM at 3rdparty/vllm.
+# [INFO] Verify this new vllm version by running:
+#
+# VLLM_PRECOMPILED_WHEEL_LOCATION=http://.....whl \
+#   uv run --extra vllm vllm serve Qwen/Qwen3-0.6B
+#
+# [INFO] For more information on this custom install, visit https://github.com/NVIDIA-NeMo/RL/blob/main/docs/guides/use-custom-vllm.md
+# [IMPORTANT] Remember to set the shell variable 'VLLM_PRECOMPILED_WHEEL_LOCATION' when running NeMo RL apps with this custom vLLM to avoid re-compiling.
 ```
-## Verify Your Custom vLLM
+
+This script does the following:
+1. Clones the `vllm` you specify at a particular branch.
+2. Builds `vllm`.
+3. Updates NeMo RL's pyproject.toml to work with this `vllm`.
+4. Updates `uv.lock`.
+
+Make sure to add the updated `pyproject.toml` and `uv.lock` to version control so that your branch can be reproduced by others.
+
+## Verify Your Custom vLLM in Isolation
 Test your setup to ensure your custom vLLM is being used:
 ```sh
-uv run --extra vllm python -c 'import vllm; print("Successfully imported vLLM")'
+uv run --extra vllm python -c 'import vllm; print(f"Successfully imported vLLM version: {vllm.__version__}")'
 # Uninstalled 1 package in 1ms
 # Installed 1 package in 2ms
 # Hi! If you see this, you're using a custom version of vLLM for the purposes of this tutorial
 # INFO 06-18 09:22:44 [__init__.py:244] Automatically detected platform cuda.
-# Successfully imported vLLM
+# Successfully imported vLLM version: 0.0.1.dev1+g69d5add74.d20250910
 ```
 
 If you don't see the log message `Hi! If you see this...`, it's because this message is unique to the tutorial's specific `vLLM` fork. It was added in [this commit](https://github.com/terrykong/vllm/commit/69d5add744e51b988e985736f35c162d3e87b683) and doesn't exist in the main `vLLM` project.
+
+## Running NeMo RL Apps with Custom vLLM
+
+To ensure the custom vLLM install is setup properly in NeMo RL applications, always run the following before anything:
+
+```sh
+# Ensures vLLM uses the precompiled wheel and avoids recompiling C++ sources
+export VLLM_PRECOMPILED_WHEEL_LOCATION=https://wheels.vllm.ai/d8ee5a2ca4c73f2ce5fdc386ce5b4ef3b6e6ae70/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl
+# Ensures worker venvs are rebuilt to use the custom vLLM. Otherwise it may use the cached version in cached venvs
+export NRL_FORCE_REBUILD_VENVS=true
+# This isn't necessary if you only do `uv run foobar.py`, but may be needed if you switching between optional extras `uv run --extra vllm foobar.py`. If you are unsure if you need this, it's safer to include it.
+uv pip install setuptools_scm
+```
+
+Then run your application:
+```sh
+uv run examples/run_grpo_math.py
+```
+
+## Re-building the NeMo RL Docker Image
+
+Using a custom vllm may require you to rebuild the docker image. The two most common reasons are:
+
+1. The `ray` version was changed, so you **must** rebuild the image to allow `ray.sub` to start the ray cluster with the same version as the application.
+2. Many dependencies changed and add a large overhead when `NRL_FORCE_REBUILD_VENVS=true` is set to rebuild venvs, so you wish to cache the dependencies in the image to avoid re-build/re-pulling wheels.
+
+For convenience, you can have the image build your custom vLLM by running the same script inside the Docker build.
+Pass `--build-arg BUILD_CUSTOM_VLLM=1` to enable this path; the build will create `3rdparty/vllm` and source `3rdparty/vllm/nemo-rl.env` automatically.
+
+```sh
+docker buildx build \
+  --build-arg BUILD_CUSTOM_VLLM=1 \
+  --target release \
+  --build-context nemo-rl=. \
+  -f docker/Dockerfile \
+  --tag <registry>/nemo-rl:latest \
+  --push \
+  .
+```
\ No newline at end of file
diff --git a/docs/index.md b/docs/index.md
index 8eac11b146..c1b81b2b54 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -1,9 +1,187 @@
-```{include} ../README.md
-:relative-docs: docs/
+# NeMo RL Documentation
+
+Welcome to the NeMo RL documentation. NeMo RL is an open-source post-training library developed by NVIDIA, designed to streamline and scale reinforcement learning methods for multimodal models (LLMs, VLMs, etc.).
+
+This documentation provides comprehensive guides, examples, and references to help you get started with NeMo RL and build powerful post-training pipelines for your models.
+
+## Getting Started
+
+::::{grid} 1 1 2 2
+:gutter: 3
+
+:::{grid-item-card} {octicon}`book` Overview
+:link: about/overview
+:link-type: doc
+
+Learn about NeMo RL's architecture, design philosophy, and key features that make it ideal for scalable reinforcement learning.
+:::
+
+:::{grid-item-card} {octicon}`rocket` Quick Start
+:link: about/quick-start
+:link-type: doc
+
+Get up and running quickly with examples for both DTensor and Megatron Core training backends.
+:::
+
+:::{grid-item-card} {octicon}`download` Installation
+:link: about/installation
+:link-type: doc
+
+Step-by-step instructions for installing NeMo RL, including prerequisites, system dependencies, and environment setup.
+:::
+
+:::{grid-item-card} {octicon}`star` Features
+:link: about/features
+:link-type: doc
+
+Explore the current features and upcoming enhancements in NeMo RL, including distributed training, advanced parallelism, and more.
+:::
+
+:::{grid-item-card} {octicon}`light-bulb` Tips and Tricks
+:link: about/tips-and-tricks
+:link-type: doc
+
+Troubleshooting common issues including missing submodules, Ray dashboard access, and debugging techniques.
+:::
+
+::::
+
+## Training and Generation
+
+::::{grid} 1 1 2 2
+:gutter: 3
+
+:::{grid-item-card} {octicon}`cpu` Training Backends
+:link: about/backends
+:link-type: doc
+
+Learn about DTensor and Megatron Core training backends, their capabilities, and how to choose the right one for your use case.
+:::
+
+:::{grid-item-card} {octicon}`workflow` Algorithms
+:link: about/algorithms/index
+:link-type: doc
+
+Discover supported algorithms including GRPO, SFT, DPO, RM, and on-policy distillation with detailed guides and examples.
+:::
+
+:::{grid-item-card} {octicon}`graph` Evaluation
+:link: about/evaluation
+:link-type: doc
+
+Learn how to evaluate your models using built-in evaluation datasets and custom evaluation pipelines.
+:::
+
+:::{grid-item-card} {octicon}`server` Cluster Setup
+:link: about/clusters
+:link-type: doc
+
+Configure and deploy NeMo RL on multi-node Slurm or Kubernetes clusters for distributed computing.
+:::
+
+::::
+
+## Guides and Examples
+
+::::{grid} 1 1 2 2
+:gutter: 3
+
+:::{grid-item-card} {octicon}`mortar-board` GRPO DeepscaleR
+:link: guides/grpo-deepscaler
+:link-type: doc
+
+Reproduce DeepscaleR results with NeMo RL using GRPO on mathematical reasoning tasks.
+:::
+
+:::{grid-item-card} {octicon}`number` SFT on OpenMathInstruct2
+:link: guides/sft-openmathinstruct2
+:link-type: doc
+
+Step-by-step guide for supervised fine-tuning on the OpenMathInstruct2 dataset.
+:::
+
+:::{grid-item-card} {octicon}`stack` Environments
+:link: guides/environments
+:link-type: doc
+
+Create custom reward environments and integrate them with NeMo RL training pipelines.
+:::
+
+:::{grid-item-card} {octicon}`plus-circle` Adding New Models
+:link: adding-new-models
+:link-type: doc
+
+Learn how to add support for new model architectures in NeMo RL.
+:::
+
+::::
+
+## Advanced Topics
+
+::::{grid} 1 1 2 2
+:gutter: 3
+
+:::{grid-item-card} {octicon}`telescope` Design and Philosophy
+:link: design-docs/design-and-philosophy
+:link-type: doc
+
+Deep dive into NeMo RL's architecture, APIs, and design decisions for scalable RL.
+:::
+
+:::{grid-item-card} {octicon}`bug` Debugging
+:link: debugging
+:link-type: doc
+
+Tools and techniques for debugging distributed Ray applications and RL training runs.
+:::
+
+:::{grid-item-card} {octicon}`zap` FP8 Quantization
+:link: fp8
+:link-type: doc
+
+Optimize large language models with FP8 quantization for faster training and inference.
+:::
+
+:::{grid-item-card} {octicon}`container` Docker Containers
+:link: docker
+:link-type: doc
+
+Build and use Docker containers for reproducible NeMo RL environments.
+:::
+
+::::
+
+## API Reference
+
+::::{grid} 1 1 1 1
+:gutter: 3
+
+:::{grid-item-card} {octicon}`code` Complete API Documentation
+:link: apidocs/index
+:link-type: doc
+
+Comprehensive reference for all NeMo RL modules, classes, functions, and methods. Browse the complete Python API with detailed docstrings and usage examples.
+:::
+
+::::
+
+```{toctree}
+:caption: About
+:hidden:
+
+about/overview
+about/features
+about/backends
+about/quick-start
+about/installation
+about/algorithms/index
+about/evaluation
+about/clusters
+about/tips-and-tricks
 ```
 
 ```{toctree}
-:caption: 🖥️  Environment Start
+:caption: Environment Start
 :hidden:
 
 local-workstation.md
@@ -12,48 +190,52 @@ cluster.md
 ```
 
 ```{toctree}
-:caption: 🚀 E2E Examples
+:caption: E2E Examples
 :hidden:
 
-guides/grpo-deepscaler.md
 guides/sft-openmathinstruct2.md
 ```
 
 ```{toctree}
-:caption: 📚 Guides
+:caption: Guides
 :hidden:
 
 adding-new-models.md
 guides/sft.md
 guides/dpo.md
+guides/dapo.md
 guides/grpo.md
 guides/grpo-deepscaler.md
+guides/grpo-sliding-puzzle.md
+guides/rm.md
+guides/environments.md
 guides/eval.md
 guides/deepseek.md
 model-quirks.md
+guides/async-grpo.md
 ```
 
 ```{toctree}
-:caption: 🐳  Containers
+:caption: Containers
 :hidden:
 
 docker.md
 ```
 
 ```{toctree}
-:caption: 🛠️ Development
+:caption: Development
 :hidden:
 
 testing.md
 documentation.md
 debugging.md
 nsys-profiling.md
+fp8.md
 guides/use-custom-vllm.md
-apidocs/index.rst
 ```
 
 ```{toctree}
-:caption: 📐 Design Docs
+:caption: Design Docs
 :hidden:
 
 design-docs/design-and-philosophy.md
@@ -67,4 +249,12 @@ design-docs/loss-functions.md
 design-docs/fsdp2-parallel-plan.md
 design-docs/training-backends.md
 design-docs/sequence-packing-and-dynamic-batching.md
+design-docs/env-vars.md
+```
+
+```{toctree}
+:caption: API Reference
+:hidden:
+
+apidocs/index
 ```
diff --git a/docs/model-quirks.md b/docs/model-quirks.md
index 6ba7f12f55..52869bf04d 100644
--- a/docs/model-quirks.md
+++ b/docs/model-quirks.md
@@ -4,14 +4,6 @@ This document outlines special cases and model-specific behaviors that require c
 
 ## Gemma-3
 
-### Tied Weights
-
-Weight tying between the embedding layer (`model.embed_tokens`) and output layer (`lm_head`) is currently not respected when using the DTensor policy when TP > 1 (See [this issue](https://github.com/NVIDIA-NeMo/RL/issues/227)). To avoid errors when training these models, we only allow training models with tied weights using the DTensor policy with TP=1. For Llama-3 and Qwen2.5 models, weight-tying is only enabled for the smaller models (< 2B), which can typically be trained without tensor parallelism. For Gemma-3, all model sizes have weight-tying enabled, including the larger models which require tensor parallelism. To support training of these models, we specially handle the Gemma-3 models by allowing training using the DTensor policy with TP > 1.
-
-**Special Handling:**
-- We skip the tied weights check for all Gemma-3 models when using the DTensor policy, allowing training using TP > 1.
-- We exclude `model.embed_tokens` and `lm_head` from the DTensor tensor parallel plan to maintain weight tying correctly.
-
 ### vLLM Initialization
 
 Gemma-3 models have a specific issue with vLLM dummy weight initialization due to a vLLM bug where [a `normalizer` buffer is created](https://github.com/vllm-project/vllm/blob/964472b9667508b1d4a7ed92068ff81740ae0036/vllm/model_executor/models/gemma3.py#L372) that is not present in the Hugging Face model. This causes the `normalizer` buffer to be set to dummy weights at initialization and then never updated with the correct values during model refit. As a workaround for this issue, we do not use dummy weight initialization for vLLM with Gemma-3 models and instead use the `load_format="auto"` setting to load the full weights at initialization.
@@ -39,6 +31,13 @@ Whether model level support CP only depends on arguments passed to `torch.nn.fun
 - It's a known issue that context parallel can't be used together with sequence parallel.
 Refer to [here](https://github.com/NVIDIA-NeMo/RL/issues/659) for more details.
 
+## DeepScaleR Recipe Convergence Issues
+
+The DeepScaleR recipe (e.g., `examples/configs/grpo-deepscaler-1.5b-8K.yaml`) has been found to experience convergence issues when CUDA graphs are enabled in vLLM.
+
+**Special Handling:**
+- CUDA graphs must be disabled by setting `enforce_eager: True` in the vLLM configuration (https://github.com/NVIDIA-NeMo/RL/pull/857 forces eager execution by default).
+
 ## vLLM Async Rollout Timeout
 
 vLLM async generation has a configurable timeout for waiting for individual sample results. This is particularly important for longer sequences on large models.
diff --git a/docs/nsys-profiling.md b/docs/nsys-profiling.md
index 3c5ccd0c3a..5e97390050 100644
--- a/docs/nsys-profiling.md
+++ b/docs/nsys-profiling.md
@@ -6,7 +6,7 @@ NeMo RL supports Nsight profiling for Ray workers through environment variable p
 
 ## Prerequisites
 
-* Install NVIDIA Nsight Systems (`nsys`) on the compute nodes where workers will run. For Ubuntu installation instructions, see the [NVIDIA Nsight Systems Installation Guide](https://docs.nvidia.com/nsight-systems/InstallationGuide/index.html#:~:text=Ubuntu%20(minimal%20setup%20for%20containers)).
+* Install NVIDIA Nsight Systems (`nsys`) on the compute nodes where workers will run. For Ubuntu installation instructions, see the [NVIDIA Nsight Systems Installation Guide](https://docs.nvidia.com/nsight-systems/InstallationGuide/index.html#package-manager-installation)).
 
 **Note: If you're using NeMo RL containers, `nsys` is already installed.**
 
@@ -63,9 +63,8 @@ NRL_NSYS_PROFILE_STEP_RANGE=3:10 NRL_NSYS_WORKER_PATTERNS="dtensor_policy_worker
 
 ### Profile Megatron Workers
 
-:::{important}
-To profile a Megatron worker, you should set `LD_LIBRARY_PATH` as follows, otherwise you will get errors when loading `libtransformer_engine.so`.
-:::
+> [!IMPORTANT]
+> To profile a Megatron worker, you should set `LD_LIBRARY_PATH` as follows, otherwise you will get errors when loading `libtransformer_engine.so`.
 
 ```bash
 LD_LIBRARY_PATH="/usr/local/cuda/targets/x86_64-linux/lib:/usr/local/cuda/lib64:/usr/local/cuda/lib:/usr/local/nvidia/lib64:/usr/local/nvidia/lib:/usr/lib/x86_64-linux-gnu" \
@@ -85,16 +84,25 @@ When profiling is enabled, it generates the following logs and files:
    ```
    dtensor_policy_worker_<NRL_NSYS_PROFILE_STEP_RANGE>_<PID>.nsys-rep
    vllm_generation_worker_<NRL_NSYS_PROFILE_STEP_RANGE>_<PID>.nsys-rep
+   worker_process_<PID>.nsys-rep
    ```
+If you are not using model parallelism in Vllm, you should directly refer to `vllm_generation_worker_<NRL_NSYS_PROFILE_STEP_RANGE>_<PID>.nsys-rep` for nsight reports; If you are using model parallelism, the `vllm_generation_worker_<NRL_NSYS_PROFILE_STEP_RANGE>_<PID>.nsys-rep` will be empty, and the `worker_process_<PID>.nsys-rep` are nsight profiles from vllm's ray distributed executors (refer to https://github.com/vllm-project/vllm/blob/7e3a8dc90670fd312ce1e0d4eba9bf11c571e3ad/vllm/executor/ray_distributed_executor.py#L136 for more information).
 
-3. **File Location**: Profile files are saved in `/tmp/ray/session*/logs/nsight/` directory on each worker node.
+3. **File Location**: Profile files are saved in `/tmp/ray/session*/logs/nsight/` directory on each worker node. Ensure you check both `ls /tmp/ray/session_[0-9]*/logs/nsight` and `ls /tmp/ray/session_latest/logs/nsight` for the profiles, since the "latest" pointer may be stale.
 
-**Note for SLURM users with `ray.sub`**: When using `ray.sub` on SLURM, set `RAY_LOG_SYNC_FREQUENCY=$NUM_SEC` (e.g., `RAY_LOG_SYNC_FREQUENCY=30`) to ensure that the nsight profile files get copied from the container's ephemeral filesystem (`/tmp/ray`) to the persistent `$SLURM_JOB_ID-logs/ray` directory.
+**Note for SLURM users with `ray.sub`**: When using `ray.sub` on SLURM, set `RAY_LOG_SYNC_FREQUENCY=$NUM_SEC` (e.g., `RAY_LOG_SYNC_FREQUENCY=30`) to ensure that the nsight profile files get copied from the container's ephemeral filesystem (`/tmp/ray`) to the persistent directory. The header node's files will be synced to ``$SLURM_JOB_ID-logs/ray`, and other nodes' files will be synced to `$SLURM_JOB_ID-logs/ray/$node_ip/` where `$node_ip` is the IP address of the node.
 
 ## Analyze Profile Files
 
 To analyze the generated profile files, load the `.nsys-rep` files into the NVIDIA Nsight Systems desktop application, which you can download from the [NVIDIA Nsight Systems Get Started page](https://developer.nvidia.com/nsight-systems/get-started).
 
+### How to Analyze the End-to-End RL Loop All at Once
+
+Nsight Systems supports [multi-report view](https://docs.nvidia.com/nsight-systems/UserGuide/index.html#viewing-multiple-reports-in-the-same-timeline) functionality. If you open the profiles from different workers (e.g., `*policy_worker*.nsys-rep` and `*generation_worker*.nsys-rep`) in a single multi-report view, you can analyze the behavior of the end-to-end RL loop on the same timeline.
+
+
+![Nsys multi report view](./assets/nsys-multi-report-view.png)
+
 ## How We Patched Nsight Support in Ray
 
 Ray's Nsight profiling support had a bug where it hardcoded the Python executable path instead of using the actual Python executable from the runtime environment. This caused issues when using virtual environments or custom Python installations (`py_executables`).
diff --git a/docs/pyproject.toml b/docs/pyproject.toml
new file mode 100644
index 0000000000..933a68bf80
--- /dev/null
+++ b/docs/pyproject.toml
@@ -0,0 +1,22 @@
+# uv.docs.toml
+[project]
+name = "nemo-rl-docs-env"
+version = "0.0.1"
+requires-python = ">=3.12"
+dependencies = [
+  "sphinx",
+  "sphinx-autobuild",
+  "sphinx-autodoc2",
+  "sphinx-copybutton",
+  "sphinx-design",
+  "myst_parser",
+  "nvidia-sphinx-theme",
+  "gitpython>=3.1.45",
+  "python-dotenv",
+  "sphinxcontrib-mermaid",
+  "swagger-plugin-for-sphinx",
+]
+
+[tool.uv]
+# DO NOT create or modify the main project lockfile.
+no-lock = true
diff --git a/docs/testing.md b/docs/testing.md
index 8ce97346b9..24d277802e 100644
--- a/docs/testing.md
+++ b/docs/testing.md
@@ -4,9 +4,15 @@ This guide outlines how to test NeMo RL using unit and functional tests, detaili
 
 ## Unit Tests
 
-:::{important}
-Unit tests require 2 GPUs to test the full suite.
-:::
+> [!IMPORTANT]
+> Unit tests require 2 GPUs to test the full suite.
+
+> [!TIP]
+> Some unit tests require setting up test assets which you can download with 
+> ```sh
+> uv run tests/unit/prepare_unit_test_assets.py
+> ```
+
 
 ```sh
 # Run the unit tests using local GPUs
@@ -24,6 +30,42 @@ uv run --extra mcore --group test bash tests/run_unit.sh --mcore-only
 uv run --extra mcore --group test bash tests/run_unit.sh --mcore-only --hf-gated
 ```
 
+### Experimental: Faster local test iteration with pytest-testmon
+
+We support `pytest-testmon` to speed up local unit test runs by re-running only impacted tests. This works for both regular in-process code and out-of-process `@ray.remote` workers via a lightweight, test-only selection helper.
+
+Usage:
+```sh
+# Re-run only impacted unit tests
+uv run --group test pytest --testmon tests/unit
+
+# You can also combine with markers/paths
+uv run --group test pytest --hf-gated --testmon tests/unit/models/policy/test_dtensor_worker.py
+```
+
+What to expect:
+- On the first run in a fresh workspace, testmon may run a broader set (or deselect everything if nothing was executed yet) to build its dependency cache.
+- On subsequent runs, editing non-remote code narrows selection to only the tests that import/use those modules.
+- Editing code inside `@ray.remote` actors also retriggers impacted tests. We maintain a static mapping from test modules to transitive `nemo_rl` modules they import and intersect that with changed files when `--testmon` is present.
+- After a successful impacted run, a second `--testmon` invocation (with no further edits) will deselect all tests.
+- Running `pytest` with `-k some_substring_in_test_name` will always run tests that match even if `--testmon` is passed.
+
+Limitations and tips:
+- Selection is based on Python imports and file mtimes; non-Python assets (YAML/JSON/shell) are not tracked. When editing those, re-run target tests explicitly.
+- The remote-aware selection uses a conservative static import map (no dynamic import resolution). If a test loads code dynamically that isn’t visible via imports, you may need to run it explicitly once to seed the map.
+- The helper is test-only and does not alter library behavior. It activates automatically when you pass `--testmon`.
+
+Refreshing remote-selection artifacts
+-------------------------------------
+If you change test layout or significantly refactor imports, the remote-selection artifacts may become stale.
+To rebuild them, delete the following files at the repo root and re-run with `--testmon` to seed again:
+
+```sh
+# At the root of nemo-rl
+rm .nrl_remote_map.json .nrl_remote_state.json
+```
+
+
 ### Run Unit Tests in a Hermetic Environment
 
 For environments lacking necessary dependencies (e.g., `gcc`, `nvcc`)
@@ -87,28 +129,24 @@ Which would produce this file in `tests/unit/unit_results.json`:
 }
 ```
 
-:::{tip}
-Past unit test results are logged in `tests/unit/unit_results/`. These are helpful to view trends over time and commits.
-
-Here's an example `jq` command to view trends:
-
-```sh
-jq -r '[.start_time, .git_commit, .metrics["test_hf_ray_policy::test_lm_policy_generation"].avg_prob_mult_error] | @tsv' tests/unit/unit_results/*
-
-# Example output:
-#2025-03-24 23:35:39     778d288bb5d2edfd3eec4d07bb7dffffad5ef21b        1.0000039339065552
-#2025-03-24 23:36:37     778d288bb5d2edfd3eec4d07bb7dffffad5ef21b        1.0000039339065552
-#2025-03-24 23:37:37     778d288bb5d2edfd3eec4d07bb7dffffad5ef21b        1.0000039339065552
-#2025-03-24 23:38:14     778d288bb5d2edfd3eec4d07bb7dffffad5ef21b        1.0000039339065552
-#2025-03-24 23:38:50     778d288bb5d2edfd3eec4d07bb7dffffad5ef21b        1.0000039339065552
-```
-:::
+> [!TIP]
+> Past unit test results are logged in `tests/unit/unit_results/`. These are helpful to view trends over time and commits.
+>
+> ```sh
+> jq -r '[.start_time, .git_commit, .metrics["test_hf_ray_policy::test_lm_policy_generation"].avg_prob_mult_error] | @tsv' tests/unit/unit_results/*
+>
+> # Example output:
+> #2025-03-24 23:35:39     778d288bb5d2edfd3eec4d07bb7dffffad5ef21b        1.0000039339065552
+> #2025-03-24 23:36:37     778d288bb5d2edfd3eec4d07bb7dffffad5ef21b        1.0000039339065552
+> #2025-03-24 23:37:37     778d288bb5d2edfd3eec4d07bb7dffffad5ef21b        1.0000039339065552
+> #2025-03-24 23:38:14     778d288bb5d2edfd3eec4d07bb7dffffad5ef21b        1.0000039339065552
+> #2025-03-24 23:38:50     778d288bb5d2edfd3eec4d07bb7dffffad5ef21b        1.0000039339065552
+> ```
 
 ## Functional Tests
 
-:::{important}
-Functional tests may require multiple GPUs to run. See each script to understand the requirements.
-:::
+> [!IMPORTANT]
+> Functional tests may require multiple GPUs to run. See each script to understand the requirements.
 
 Functional tests are located under `tests/functional/`.
 
diff --git a/docs/uv.lock b/docs/uv.lock
new file mode 100644
index 0000000000..b1ce142581
--- /dev/null
+++ b/docs/uv.lock
@@ -0,0 +1,846 @@
+version = 1
+revision = 3
+requires-python = ">=3.12"
+
+[[package]]
+name = "accessible-pygments"
+version = "0.0.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pygments" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/bc/c1/bbac6a50d02774f91572938964c582fff4270eee73ab822a4aeea4d8b11b/accessible_pygments-0.0.5.tar.gz", hash = "sha256:40918d3e6a2b619ad424cb91e556bd3bd8865443d9f22f1dcdf79e33c8046872", size = 1377899, upload-time = "2024-05-10T11:23:10.216Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/8d/3f/95338030883d8c8b91223b4e21744b04d11b161a3ef117295d8241f50ab4/accessible_pygments-0.0.5-py3-none-any.whl", hash = "sha256:88ae3211e68a1d0b011504b2ffc1691feafce124b845bd072ab6f9f66f34d4b7", size = 1395903, upload-time = "2024-05-10T11:23:08.421Z" },
+]
+
+[[package]]
+name = "alabaster"
+version = "1.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a6/f8/d9c74d0daf3f742840fd818d69cfae176fa332022fd44e3469487d5a9420/alabaster-1.0.0.tar.gz", hash = "sha256:c00dca57bca26fa62a6d7d0a9fcce65f3e026e9bfe33e9c538fd3fbb2144fd9e", size = 24210, upload-time = "2024-07-26T18:15:03.762Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7e/b3/6b4067be973ae96ba0d615946e314c5ae35f9f993eca561b356540bb0c2b/alabaster-1.0.0-py3-none-any.whl", hash = "sha256:fc6786402dc3fcb2de3cabd5fe455a2db534b371124f1f21de8731783dec828b", size = 13929, upload-time = "2024-07-26T18:15:02.05Z" },
+]
+
+[[package]]
+name = "anyio"
+version = "4.11.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "idna" },
+    { name = "sniffio" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c6/78/7d432127c41b50bccba979505f272c16cbcadcc33645d5fa3a738110ae75/anyio-4.11.0.tar.gz", hash = "sha256:82a8d0b81e318cc5ce71a5f1f8b5c4e63619620b63141ef8c995fa0db95a57c4", size = 219094, upload-time = "2025-09-23T09:19:12.58Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/15/b3/9b1a8074496371342ec1e796a96f99c82c945a339cd81a8e73de28b4cf9e/anyio-4.11.0-py3-none-any.whl", hash = "sha256:0287e96f4d26d4149305414d4e3bc32f0dcd0862365a4bddea19d7a1ec38c4fc", size = 109097, upload-time = "2025-09-23T09:19:10.601Z" },
+]
+
+[[package]]
+name = "astroid"
+version = "3.3.11"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/18/74/dfb75f9ccd592bbedb175d4a32fc643cf569d7c218508bfbd6ea7ef9c091/astroid-3.3.11.tar.gz", hash = "sha256:1e5a5011af2920c7c67a53f65d536d65bfa7116feeaf2354d8b94f29573bb0ce", size = 400439, upload-time = "2025-07-13T18:04:23.177Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/af/0f/3b8fdc946b4d9cc8cc1e8af42c4e409468c84441b933d037e101b3d72d86/astroid-3.3.11-py3-none-any.whl", hash = "sha256:54c760ae8322ece1abd213057c4b5bba7c49818853fc901ef09719a60dbf9dec", size = 275612, upload-time = "2025-07-13T18:04:21.07Z" },
+]
+
+[[package]]
+name = "babel"
+version = "2.17.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/7d/6b/d52e42361e1aa00709585ecc30b3f9684b3ab62530771402248b1b1d6240/babel-2.17.0.tar.gz", hash = "sha256:0c54cffb19f690cdcc52a3b50bcbf71e07a808d1c80d549f2459b9d2cf0afb9d", size = 9951852, upload-time = "2025-02-01T15:17:41.026Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b7/b8/3fe70c75fe32afc4bb507f75563d39bc5642255d1d94f1f23604725780bf/babel-2.17.0-py3-none-any.whl", hash = "sha256:4d0b53093fdfb4b21c92b5213dba5a1b23885afa8383709427046b21c366e5f2", size = 10182537, upload-time = "2025-02-01T15:17:37.39Z" },
+]
+
+[[package]]
+name = "beautifulsoup4"
+version = "4.14.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "soupsieve" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/77/e9/df2358efd7659577435e2177bfa69cba6c33216681af51a707193dec162a/beautifulsoup4-4.14.2.tar.gz", hash = "sha256:2a98ab9f944a11acee9cc848508ec28d9228abfd522ef0fad6a02a72e0ded69e", size = 625822, upload-time = "2025-09-29T10:05:42.613Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/94/fe/3aed5d0be4d404d12d36ab97e2f1791424d9ca39c2f754a6285d59a3b01d/beautifulsoup4-4.14.2-py3-none-any.whl", hash = "sha256:5ef6fa3a8cbece8488d66985560f97ed091e22bbc4e9c2338508a9d5de6d4515", size = 106392, upload-time = "2025-09-29T10:05:43.771Z" },
+]
+
+[[package]]
+name = "certifi"
+version = "2025.11.12"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a2/8c/58f469717fa48465e4a50c014a0400602d3c437d7c0c468e17ada824da3a/certifi-2025.11.12.tar.gz", hash = "sha256:d8ab5478f2ecd78af242878415affce761ca6bc54a22a27e026d7c25357c3316", size = 160538, upload-time = "2025-11-12T02:54:51.517Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/70/7d/9bc192684cea499815ff478dfcdc13835ddf401365057044fb721ec6bddb/certifi-2025.11.12-py3-none-any.whl", hash = "sha256:97de8790030bbd5c2d96b7ec782fc2f7820ef8dba6db909ccf95449f2d062d4b", size = 159438, upload-time = "2025-11-12T02:54:49.735Z" },
+]
+
+[[package]]
+name = "charset-normalizer"
+version = "3.4.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/13/69/33ddede1939fdd074bce5434295f38fae7136463422fe4fd3e0e89b98062/charset_normalizer-3.4.4.tar.gz", hash = "sha256:94537985111c35f28720e43603b8e7b43a6ecfb2ce1d3058bbe955b73404e21a", size = 129418, upload-time = "2025-10-14T04:42:32.879Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f3/85/1637cd4af66fa687396e757dec650f28025f2a2f5a5531a3208dc0ec43f2/charset_normalizer-3.4.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0a98e6759f854bd25a58a73fa88833fba3b7c491169f86ce1180c948ab3fd394", size = 208425, upload-time = "2025-10-14T04:40:53.353Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/6a/04130023fef2a0d9c62d0bae2649b69f7b7d8d24ea5536feef50551029df/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b5b290ccc2a263e8d185130284f8501e3e36c5e02750fc6b6bdeb2e9e96f1e25", size = 148162, upload-time = "2025-10-14T04:40:54.558Z" },
+    { url = "https://files.pythonhosted.org/packages/78/29/62328d79aa60da22c9e0b9a66539feae06ca0f5a4171ac4f7dc285b83688/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74bb723680f9f7a6234dcf67aea57e708ec1fbdf5699fb91dfd6f511b0a320ef", size = 144558, upload-time = "2025-10-14T04:40:55.677Z" },
+    { url = "https://files.pythonhosted.org/packages/86/bb/b32194a4bf15b88403537c2e120b817c61cd4ecffa9b6876e941c3ee38fe/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f1e34719c6ed0b92f418c7c780480b26b5d9c50349e9a9af7d76bf757530350d", size = 161497, upload-time = "2025-10-14T04:40:57.217Z" },
+    { url = "https://files.pythonhosted.org/packages/19/89/a54c82b253d5b9b111dc74aca196ba5ccfcca8242d0fb64146d4d3183ff1/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2437418e20515acec67d86e12bf70056a33abdacb5cb1655042f6538d6b085a8", size = 159240, upload-time = "2025-10-14T04:40:58.358Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/10/d20b513afe03acc89ec33948320a5544d31f21b05368436d580dec4e234d/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:11d694519d7f29d6cd09f6ac70028dba10f92f6cdd059096db198c283794ac86", size = 153471, upload-time = "2025-10-14T04:40:59.468Z" },
+    { url = "https://files.pythonhosted.org/packages/61/fa/fbf177b55bdd727010f9c0a3c49eefa1d10f960e5f09d1d887bf93c2e698/charset_normalizer-3.4.4-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ac1c4a689edcc530fc9d9aa11f5774b9e2f33f9a0c6a57864e90908f5208d30a", size = 150864, upload-time = "2025-10-14T04:41:00.623Z" },
+    { url = "https://files.pythonhosted.org/packages/05/12/9fbc6a4d39c0198adeebbde20b619790e9236557ca59fc40e0e3cebe6f40/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:21d142cc6c0ec30d2efee5068ca36c128a30b0f2c53c1c07bd78cb6bc1d3be5f", size = 150647, upload-time = "2025-10-14T04:41:01.754Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/1f/6a9a593d52e3e8c5d2b167daf8c6b968808efb57ef4c210acb907c365bc4/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:5dbe56a36425d26d6cfb40ce79c314a2e4dd6211d51d6d2191c00bed34f354cc", size = 145110, upload-time = "2025-10-14T04:41:03.231Z" },
+    { url = "https://files.pythonhosted.org/packages/30/42/9a52c609e72471b0fc54386dc63c3781a387bb4fe61c20231a4ebcd58bdd/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:5bfbb1b9acf3334612667b61bd3002196fe2a1eb4dd74d247e0f2a4d50ec9bbf", size = 162839, upload-time = "2025-10-14T04:41:04.715Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/5b/c0682bbf9f11597073052628ddd38344a3d673fda35a36773f7d19344b23/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:d055ec1e26e441f6187acf818b73564e6e6282709e9bcb5b63f5b23068356a15", size = 150667, upload-time = "2025-10-14T04:41:05.827Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/24/a41afeab6f990cf2daf6cb8c67419b63b48cf518e4f56022230840c9bfb2/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:af2d8c67d8e573d6de5bc30cdb27e9b95e49115cd9baad5ddbd1a6207aaa82a9", size = 160535, upload-time = "2025-10-14T04:41:06.938Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/e5/6a4ce77ed243c4a50a1fecca6aaaab419628c818a49434be428fe24c9957/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:780236ac706e66881f3b7f2f32dfe90507a09e67d1d454c762cf642e6e1586e0", size = 154816, upload-time = "2025-10-14T04:41:08.101Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/ef/89297262b8092b312d29cdb2517cb1237e51db8ecef2e9af5edbe7b683b1/charset_normalizer-3.4.4-cp312-cp312-win32.whl", hash = "sha256:5833d2c39d8896e4e19b689ffc198f08ea58116bee26dea51e362ecc7cd3ed26", size = 99694, upload-time = "2025-10-14T04:41:09.23Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/2d/1e5ed9dd3b3803994c155cd9aacb60c82c331bad84daf75bcb9c91b3295e/charset_normalizer-3.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:a79cfe37875f822425b89a82333404539ae63dbdddf97f84dcbc3d339aae9525", size = 107131, upload-time = "2025-10-14T04:41:10.467Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/d9/0ed4c7098a861482a7b6a95603edce4c0d9db2311af23da1fb2b75ec26fc/charset_normalizer-3.4.4-cp312-cp312-win_arm64.whl", hash = "sha256:376bec83a63b8021bb5c8ea75e21c4ccb86e7e45ca4eb81146091b56599b80c3", size = 100390, upload-time = "2025-10-14T04:41:11.915Z" },
+    { url = "https://files.pythonhosted.org/packages/97/45/4b3a1239bbacd321068ea6e7ac28875b03ab8bc0aa0966452db17cd36714/charset_normalizer-3.4.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e1f185f86a6f3403aa2420e815904c67b2f9ebc443f045edd0de921108345794", size = 208091, upload-time = "2025-10-14T04:41:13.346Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/62/73a6d7450829655a35bb88a88fca7d736f9882a27eacdca2c6d505b57e2e/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b39f987ae8ccdf0d2642338faf2abb1862340facc796048b604ef14919e55ed", size = 147936, upload-time = "2025-10-14T04:41:14.461Z" },
+    { url = "https://files.pythonhosted.org/packages/89/c5/adb8c8b3d6625bef6d88b251bbb0d95f8205831b987631ab0c8bb5d937c2/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3162d5d8ce1bb98dd51af660f2121c55d0fa541b46dff7bb9b9f86ea1d87de72", size = 144180, upload-time = "2025-10-14T04:41:15.588Z" },
+    { url = "https://files.pythonhosted.org/packages/91/ed/9706e4070682d1cc219050b6048bfd293ccf67b3d4f5a4f39207453d4b99/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:81d5eb2a312700f4ecaa977a8235b634ce853200e828fbadf3a9c50bab278328", size = 161346, upload-time = "2025-10-14T04:41:16.738Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/0d/031f0d95e4972901a2f6f09ef055751805ff541511dc1252ba3ca1f80cf5/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5bd2293095d766545ec1a8f612559f6b40abc0eb18bb2f5d1171872d34036ede", size = 158874, upload-time = "2025-10-14T04:41:17.923Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/83/6ab5883f57c9c801ce5e5677242328aa45592be8a00644310a008d04f922/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a8a8b89589086a25749f471e6a900d3f662d1d3b6e2e59dcecf787b1cc3a1894", size = 153076, upload-time = "2025-10-14T04:41:19.106Z" },
+    { url = "https://files.pythonhosted.org/packages/75/1e/5ff781ddf5260e387d6419959ee89ef13878229732732ee73cdae01800f2/charset_normalizer-3.4.4-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc7637e2f80d8530ee4a78e878bce464f70087ce73cf7c1caf142416923b98f1", size = 150601, upload-time = "2025-10-14T04:41:20.245Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/57/71be810965493d3510a6ca79b90c19e48696fb1ff964da319334b12677f0/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f8bf04158c6b607d747e93949aa60618b61312fe647a6369f88ce2ff16043490", size = 150376, upload-time = "2025-10-14T04:41:21.398Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/d5/c3d057a78c181d007014feb7e9f2e65905a6c4ef182c0ddf0de2924edd65/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:554af85e960429cf30784dd47447d5125aaa3b99a6f0683589dbd27e2f45da44", size = 144825, upload-time = "2025-10-14T04:41:22.583Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/8c/d0406294828d4976f275ffbe66f00266c4b3136b7506941d87c00cab5272/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:74018750915ee7ad843a774364e13a3db91682f26142baddf775342c3f5b1133", size = 162583, upload-time = "2025-10-14T04:41:23.754Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/24/e2aa1f18c8f15c4c0e932d9287b8609dd30ad56dbe41d926bd846e22fb8d/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:c0463276121fdee9c49b98908b3a89c39be45d86d1dbaa22957e38f6321d4ce3", size = 150366, upload-time = "2025-10-14T04:41:25.27Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/5b/1e6160c7739aad1e2df054300cc618b06bf784a7a164b0f238360721ab86/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:362d61fd13843997c1c446760ef36f240cf81d3ebf74ac62652aebaf7838561e", size = 160300, upload-time = "2025-10-14T04:41:26.725Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/10/f882167cd207fbdd743e55534d5d9620e095089d176d55cb22d5322f2afd/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9a26f18905b8dd5d685d6d07b0cdf98a79f3c7a918906af7cc143ea2e164c8bc", size = 154465, upload-time = "2025-10-14T04:41:28.322Z" },
+    { url = "https://files.pythonhosted.org/packages/89/66/c7a9e1b7429be72123441bfdbaf2bc13faab3f90b933f664db506dea5915/charset_normalizer-3.4.4-cp313-cp313-win32.whl", hash = "sha256:9b35f4c90079ff2e2edc5b26c0c77925e5d2d255c42c74fdb70fb49b172726ac", size = 99404, upload-time = "2025-10-14T04:41:29.95Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/26/b9924fa27db384bdcd97ab83b4f0a8058d96ad9626ead570674d5e737d90/charset_normalizer-3.4.4-cp313-cp313-win_amd64.whl", hash = "sha256:b435cba5f4f750aa6c0a0d92c541fb79f69a387c91e61f1795227e4ed9cece14", size = 107092, upload-time = "2025-10-14T04:41:31.188Z" },
+    { url = "https://files.pythonhosted.org/packages/af/8f/3ed4bfa0c0c72a7ca17f0380cd9e4dd842b09f664e780c13cff1dcf2ef1b/charset_normalizer-3.4.4-cp313-cp313-win_arm64.whl", hash = "sha256:542d2cee80be6f80247095cc36c418f7bddd14f4a6de45af91dfad36d817bba2", size = 100408, upload-time = "2025-10-14T04:41:32.624Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/35/7051599bd493e62411d6ede36fd5af83a38f37c4767b92884df7301db25d/charset_normalizer-3.4.4-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:da3326d9e65ef63a817ecbcc0df6e94463713b754fe293eaa03da99befb9a5bd", size = 207746, upload-time = "2025-10-14T04:41:33.773Z" },
+    { url = "https://files.pythonhosted.org/packages/10/9a/97c8d48ef10d6cd4fcead2415523221624bf58bcf68a802721a6bc807c8f/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8af65f14dc14a79b924524b1e7fffe304517b2bff5a58bf64f30b98bbc5079eb", size = 147889, upload-time = "2025-10-14T04:41:34.897Z" },
+    { url = "https://files.pythonhosted.org/packages/10/bf/979224a919a1b606c82bd2c5fa49b5c6d5727aa47b4312bb27b1734f53cd/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74664978bb272435107de04e36db5a9735e78232b85b77d45cfb38f758efd33e", size = 143641, upload-time = "2025-10-14T04:41:36.116Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/33/0ad65587441fc730dc7bd90e9716b30b4702dc7b617e6ba4997dc8651495/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:752944c7ffbfdd10c074dc58ec2d5a8a4cd9493b314d367c14d24c17684ddd14", size = 160779, upload-time = "2025-10-14T04:41:37.229Z" },
+    { url = "https://files.pythonhosted.org/packages/67/ed/331d6b249259ee71ddea93f6f2f0a56cfebd46938bde6fcc6f7b9a3d0e09/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d1f13550535ad8cff21b8d757a3257963e951d96e20ec82ab44bc64aeb62a191", size = 159035, upload-time = "2025-10-14T04:41:38.368Z" },
+    { url = "https://files.pythonhosted.org/packages/67/ff/f6b948ca32e4f2a4576aa129d8bed61f2e0543bf9f5f2b7fc3758ed005c9/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ecaae4149d99b1c9e7b88bb03e3221956f68fd6d50be2ef061b2381b61d20838", size = 152542, upload-time = "2025-10-14T04:41:39.862Z" },
+    { url = "https://files.pythonhosted.org/packages/16/85/276033dcbcc369eb176594de22728541a925b2632f9716428c851b149e83/charset_normalizer-3.4.4-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:cb6254dc36b47a990e59e1068afacdcd02958bdcce30bb50cc1700a8b9d624a6", size = 149524, upload-time = "2025-10-14T04:41:41.319Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/f2/6a2a1f722b6aba37050e626530a46a68f74e63683947a8acff92569f979a/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c8ae8a0f02f57a6e61203a31428fa1d677cbe50c93622b4149d5c0f319c1d19e", size = 150395, upload-time = "2025-10-14T04:41:42.539Z" },
+    { url = "https://files.pythonhosted.org/packages/60/bb/2186cb2f2bbaea6338cad15ce23a67f9b0672929744381e28b0592676824/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:47cc91b2f4dd2833fddaedd2893006b0106129d4b94fdb6af1f4ce5a9965577c", size = 143680, upload-time = "2025-10-14T04:41:43.661Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/a5/bf6f13b772fbb2a90360eb620d52ed8f796f3c5caee8398c3b2eb7b1c60d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:82004af6c302b5d3ab2cfc4cc5f29db16123b1a8417f2e25f9066f91d4411090", size = 162045, upload-time = "2025-10-14T04:41:44.821Z" },
+    { url = "https://files.pythonhosted.org/packages/df/c5/d1be898bf0dc3ef9030c3825e5d3b83f2c528d207d246cbabe245966808d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:2b7d8f6c26245217bd2ad053761201e9f9680f8ce52f0fcd8d0755aeae5b2152", size = 149687, upload-time = "2025-10-14T04:41:46.442Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/42/90c1f7b9341eef50c8a1cb3f098ac43b0508413f33affd762855f67a410e/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:799a7a5e4fb2d5898c60b640fd4981d6a25f1c11790935a44ce38c54e985f828", size = 160014, upload-time = "2025-10-14T04:41:47.631Z" },
+    { url = "https://files.pythonhosted.org/packages/76/be/4d3ee471e8145d12795ab655ece37baed0929462a86e72372fd25859047c/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:99ae2cffebb06e6c22bdc25801d7b30f503cc87dbd283479e7b606f70aff57ec", size = 154044, upload-time = "2025-10-14T04:41:48.81Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/6f/8f7af07237c34a1defe7defc565a9bc1807762f672c0fde711a4b22bf9c0/charset_normalizer-3.4.4-cp314-cp314-win32.whl", hash = "sha256:f9d332f8c2a2fcbffe1378594431458ddbef721c1769d78e2cbc06280d8155f9", size = 99940, upload-time = "2025-10-14T04:41:49.946Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/51/8ade005e5ca5b0d80fb4aff72a3775b325bdc3d27408c8113811a7cbe640/charset_normalizer-3.4.4-cp314-cp314-win_amd64.whl", hash = "sha256:8a6562c3700cce886c5be75ade4a5db4214fda19fede41d9792d100288d8f94c", size = 107104, upload-time = "2025-10-14T04:41:51.051Z" },
+    { url = "https://files.pythonhosted.org/packages/da/5f/6b8f83a55bb8278772c5ae54a577f3099025f9ade59d0136ac24a0df4bde/charset_normalizer-3.4.4-cp314-cp314-win_arm64.whl", hash = "sha256:de00632ca48df9daf77a2c65a484531649261ec9f25489917f09e455cb09ddb2", size = 100743, upload-time = "2025-10-14T04:41:52.122Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/4c/925909008ed5a988ccbb72dcc897407e5d6d3bd72410d69e051fc0c14647/charset_normalizer-3.4.4-py3-none-any.whl", hash = "sha256:7a32c560861a02ff789ad905a2fe94e3f840803362c84fecf1851cb4cf3dc37f", size = 53402, upload-time = "2025-10-14T04:42:31.76Z" },
+]
+
+[[package]]
+name = "click"
+version = "8.3.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "colorama", marker = "sys_platform == 'win32'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/46/61/de6cd827efad202d7057d93e0fed9294b96952e188f7384832791c7b2254/click-8.3.0.tar.gz", hash = "sha256:e7b8232224eba16f4ebe410c25ced9f7875cb5f3263ffc93cc3e8da705e229c4", size = 276943, upload-time = "2025-09-18T17:32:23.696Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/db/d3/9dcc0f5797f070ec8edf30fbadfb200e71d9db6b84d211e3b2085a7589a0/click-8.3.0-py3-none-any.whl", hash = "sha256:9b9f285302c6e3064f4330c05f05b81945b2a39544279343e6e7c5f27a9baddc", size = 107295, upload-time = "2025-09-18T17:32:22.42Z" },
+]
+
+[[package]]
+name = "colorama"
+version = "0.4.6"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" },
+]
+
+[[package]]
+name = "docutils"
+version = "0.21.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ae/ed/aefcc8cd0ba62a0560c3c18c33925362d46c6075480bfa4df87b28e169a9/docutils-0.21.2.tar.gz", hash = "sha256:3a6b18732edf182daa3cd12775bbb338cf5691468f91eeeb109deff6ebfa986f", size = 2204444, upload-time = "2024-04-23T18:57:18.24Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/8f/d7/9322c609343d929e75e7e5e6255e614fcc67572cfd083959cdef3b7aad79/docutils-0.21.2-py3-none-any.whl", hash = "sha256:dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2", size = 587408, upload-time = "2024-04-23T18:57:14.835Z" },
+]
+
+[[package]]
+name = "gitdb"
+version = "4.0.12"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "smmap" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/72/94/63b0fc47eb32792c7ba1fe1b694daec9a63620db1e313033d18140c2320a/gitdb-4.0.12.tar.gz", hash = "sha256:5ef71f855d191a3326fcfbc0d5da835f26b13fbcba60c32c21091c349ffdb571", size = 394684, upload-time = "2025-01-02T07:20:46.413Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a0/61/5c78b91c3143ed5c14207f463aecfc8f9dbb5092fb2869baf37c273b2705/gitdb-4.0.12-py3-none-any.whl", hash = "sha256:67073e15955400952c6565cc3e707c554a4eea2e428946f7a4c162fab9bd9bcf", size = 62794, upload-time = "2025-01-02T07:20:43.624Z" },
+]
+
+[[package]]
+name = "gitpython"
+version = "3.1.45"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "gitdb" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/9a/c8/dd58967d119baab745caec2f9d853297cec1989ec1d63f677d3880632b88/gitpython-3.1.45.tar.gz", hash = "sha256:85b0ee964ceddf211c41b9f27a49086010a190fd8132a24e21f362a4b36a791c", size = 215076, upload-time = "2025-07-24T03:45:54.871Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/01/61/d4b89fec821f72385526e1b9d9a3a0385dda4a72b206d28049e2c7cd39b8/gitpython-3.1.45-py3-none-any.whl", hash = "sha256:8908cb2e02fb3b93b7eb0f2827125cb699869470432cc885f019b8fd0fccff77", size = 208168, upload-time = "2025-07-24T03:45:52.517Z" },
+]
+
+[[package]]
+name = "h11"
+version = "0.16.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250, upload-time = "2025-04-24T03:35:25.427Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" },
+]
+
+[[package]]
+name = "idna"
+version = "3.11"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/0703ccc57f3a7233505399edb88de3cbd678da106337b9fcde432b65ed60/idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902", size = 194582, upload-time = "2025-10-12T14:55:20.501Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" },
+]
+
+[[package]]
+name = "imagesize"
+version = "1.4.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a7/84/62473fb57d61e31fef6e36d64a179c8781605429fd927b5dd608c997be31/imagesize-1.4.1.tar.gz", hash = "sha256:69150444affb9cb0d5cc5a92b3676f0b2fb7cd9ae39e947a5e11a36b4497cd4a", size = 1280026, upload-time = "2022-07-01T12:21:05.687Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ff/62/85c4c919272577931d407be5ba5d71c20f0b616d31a0befe0ae45bb79abd/imagesize-1.4.1-py2.py3-none-any.whl", hash = "sha256:0d8d18d08f840c19d0ee7ca1fd82490fdc3729b7ac93f49870406ddde8ef8d8b", size = 8769, upload-time = "2022-07-01T12:21:02.467Z" },
+]
+
+[[package]]
+name = "jinja2"
+version = "3.1.6"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "markupsafe" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/df/bf/f7da0350254c0ed7c72f3e33cef02e048281fec7ecec5f032d4aac52226b/jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d", size = 245115, upload-time = "2025-03-05T20:05:02.478Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" },
+]
+
+[[package]]
+name = "markdown-it-py"
+version = "3.0.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "mdurl" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/38/71/3b932df36c1a044d397a1f92d1cf91ee0a503d91e470cbd670aa66b07ed0/markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb", size = 74596, upload-time = "2023-06-03T06:41:14.443Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/42/d7/1ec15b46af6af88f19b8e5ffea08fa375d433c998b8a7639e76935c14f1f/markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1", size = 87528, upload-time = "2023-06-03T06:41:11.019Z" },
+]
+
+[[package]]
+name = "markupsafe"
+version = "3.0.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/7e/99/7690b6d4034fffd95959cbe0c02de8deb3098cc577c67bb6a24fe5d7caa7/markupsafe-3.0.3.tar.gz", hash = "sha256:722695808f4b6457b320fdc131280796bdceb04ab50fe1795cd540799ebe1698", size = 80313, upload-time = "2025-09-27T18:37:40.426Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5a/72/147da192e38635ada20e0a2e1a51cf8823d2119ce8883f7053879c2199b5/markupsafe-3.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d53197da72cc091b024dd97249dfc7794d6a56530370992a5e1a08983ad9230e", size = 11615, upload-time = "2025-09-27T18:36:30.854Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/81/7e4e08678a1f98521201c3079f77db69fb552acd56067661f8c2f534a718/markupsafe-3.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1872df69a4de6aead3491198eaf13810b565bdbeec3ae2dc8780f14458ec73ce", size = 12020, upload-time = "2025-09-27T18:36:31.971Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/2c/799f4742efc39633a1b54a92eec4082e4f815314869865d876824c257c1e/markupsafe-3.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3a7e8ae81ae39e62a41ec302f972ba6ae23a5c5396c8e60113e9066ef893da0d", size = 24332, upload-time = "2025-09-27T18:36:32.813Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/2e/8d0c2ab90a8c1d9a24f0399058ab8519a3279d1bd4289511d74e909f060e/markupsafe-3.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d6dd0be5b5b189d31db7cda48b91d7e0a9795f31430b7f271219ab30f1d3ac9d", size = 22947, upload-time = "2025-09-27T18:36:33.86Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/54/887f3092a85238093a0b2154bd629c89444f395618842e8b0c41783898ea/markupsafe-3.0.3-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:94c6f0bb423f739146aec64595853541634bde58b2135f27f61c1ffd1cd4d16a", size = 21962, upload-time = "2025-09-27T18:36:35.099Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/2f/336b8c7b6f4a4d95e91119dc8521402461b74a485558d8f238a68312f11c/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:be8813b57049a7dc738189df53d69395eba14fb99345e0a5994914a3864c8a4b", size = 23760, upload-time = "2025-09-27T18:36:36.001Z" },
+    { url = "https://files.pythonhosted.org/packages/32/43/67935f2b7e4982ffb50a4d169b724d74b62a3964bc1a9a527f5ac4f1ee2b/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:83891d0e9fb81a825d9a6d61e3f07550ca70a076484292a70fde82c4b807286f", size = 21529, upload-time = "2025-09-27T18:36:36.906Z" },
+    { url = "https://files.pythonhosted.org/packages/89/e0/4486f11e51bbba8b0c041098859e869e304d1c261e59244baa3d295d47b7/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:77f0643abe7495da77fb436f50f8dab76dbc6e5fd25d39589a0f1fe6548bfa2b", size = 23015, upload-time = "2025-09-27T18:36:37.868Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/e1/78ee7a023dac597a5825441ebd17170785a9dab23de95d2c7508ade94e0e/markupsafe-3.0.3-cp312-cp312-win32.whl", hash = "sha256:d88b440e37a16e651bda4c7c2b930eb586fd15ca7406cb39e211fcff3bf3017d", size = 14540, upload-time = "2025-09-27T18:36:38.761Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/5b/bec5aa9bbbb2c946ca2733ef9c4ca91c91b6a24580193e891b5f7dbe8e1e/markupsafe-3.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:26a5784ded40c9e318cfc2bdb30fe164bdb8665ded9cd64d500a34fb42067b1c", size = 15105, upload-time = "2025-09-27T18:36:39.701Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/f1/216fc1bbfd74011693a4fd837e7026152e89c4bcf3e77b6692fba9923123/markupsafe-3.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:35add3b638a5d900e807944a078b51922212fb3dedb01633a8defc4b01a3c85f", size = 13906, upload-time = "2025-09-27T18:36:40.689Z" },
+    { url = "https://files.pythonhosted.org/packages/38/2f/907b9c7bbba283e68f20259574b13d005c121a0fa4c175f9bed27c4597ff/markupsafe-3.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e1cf1972137e83c5d4c136c43ced9ac51d0e124706ee1c8aa8532c1287fa8795", size = 11622, upload-time = "2025-09-27T18:36:41.777Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/d9/5f7756922cdd676869eca1c4e3c0cd0df60ed30199ffd775e319089cb3ed/markupsafe-3.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:116bb52f642a37c115f517494ea5feb03889e04df47eeff5b130b1808ce7c219", size = 12029, upload-time = "2025-09-27T18:36:43.257Z" },
+    { url = "https://files.pythonhosted.org/packages/00/07/575a68c754943058c78f30db02ee03a64b3c638586fba6a6dd56830b30a3/markupsafe-3.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:133a43e73a802c5562be9bbcd03d090aa5a1fe899db609c29e8c8d815c5f6de6", size = 24374, upload-time = "2025-09-27T18:36:44.508Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/21/9b05698b46f218fc0e118e1f8168395c65c8a2c750ae2bab54fc4bd4e0e8/markupsafe-3.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ccfcd093f13f0f0b7fdd0f198b90053bf7b2f02a3927a30e63f3ccc9df56b676", size = 22980, upload-time = "2025-09-27T18:36:45.385Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/71/544260864f893f18b6827315b988c146b559391e6e7e8f7252839b1b846a/markupsafe-3.0.3-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:509fa21c6deb7a7a273d629cf5ec029bc209d1a51178615ddf718f5918992ab9", size = 21990, upload-time = "2025-09-27T18:36:46.916Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/28/b50fc2f74d1ad761af2f5dcce7492648b983d00a65b8c0e0cb457c82ebbe/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a4afe79fb3de0b7097d81da19090f4df4f8d3a2b3adaa8764138aac2e44f3af1", size = 23784, upload-time = "2025-09-27T18:36:47.884Z" },
+    { url = "https://files.pythonhosted.org/packages/ed/76/104b2aa106a208da8b17a2fb72e033a5a9d7073c68f7e508b94916ed47a9/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:795e7751525cae078558e679d646ae45574b47ed6e7771863fcc079a6171a0fc", size = 21588, upload-time = "2025-09-27T18:36:48.82Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/99/16a5eb2d140087ebd97180d95249b00a03aa87e29cc224056274f2e45fd6/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8485f406a96febb5140bfeca44a73e3ce5116b2501ac54fe953e488fb1d03b12", size = 23041, upload-time = "2025-09-27T18:36:49.797Z" },
+    { url = "https://files.pythonhosted.org/packages/19/bc/e7140ed90c5d61d77cea142eed9f9c303f4c4806f60a1044c13e3f1471d0/markupsafe-3.0.3-cp313-cp313-win32.whl", hash = "sha256:bdd37121970bfd8be76c5fb069c7751683bdf373db1ed6c010162b2a130248ed", size = 14543, upload-time = "2025-09-27T18:36:51.584Z" },
+    { url = "https://files.pythonhosted.org/packages/05/73/c4abe620b841b6b791f2edc248f556900667a5a1cf023a6646967ae98335/markupsafe-3.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:9a1abfdc021a164803f4d485104931fb8f8c1efd55bc6b748d2f5774e78b62c5", size = 15113, upload-time = "2025-09-27T18:36:52.537Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/3a/fa34a0f7cfef23cf9500d68cb7c32dd64ffd58a12b09225fb03dd37d5b80/markupsafe-3.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:7e68f88e5b8799aa49c85cd116c932a1ac15caaa3f5db09087854d218359e485", size = 13911, upload-time = "2025-09-27T18:36:53.513Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/d7/e05cd7efe43a88a17a37b3ae96e79a19e846f3f456fe79c57ca61356ef01/markupsafe-3.0.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:218551f6df4868a8d527e3062d0fb968682fe92054e89978594c28e642c43a73", size = 11658, upload-time = "2025-09-27T18:36:54.819Z" },
+    { url = "https://files.pythonhosted.org/packages/99/9e/e412117548182ce2148bdeacdda3bb494260c0b0184360fe0d56389b523b/markupsafe-3.0.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3524b778fe5cfb3452a09d31e7b5adefeea8c5be1d43c4f810ba09f2ceb29d37", size = 12066, upload-time = "2025-09-27T18:36:55.714Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/e6/fa0ffcda717ef64a5108eaa7b4f5ed28d56122c9a6d70ab8b72f9f715c80/markupsafe-3.0.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4e885a3d1efa2eadc93c894a21770e4bc67899e3543680313b09f139e149ab19", size = 25639, upload-time = "2025-09-27T18:36:56.908Z" },
+    { url = "https://files.pythonhosted.org/packages/96/ec/2102e881fe9d25fc16cb4b25d5f5cde50970967ffa5dddafdb771237062d/markupsafe-3.0.3-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8709b08f4a89aa7586de0aadc8da56180242ee0ada3999749b183aa23df95025", size = 23569, upload-time = "2025-09-27T18:36:57.913Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/30/6f2fce1f1f205fc9323255b216ca8a235b15860c34b6798f810f05828e32/markupsafe-3.0.3-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b8512a91625c9b3da6f127803b166b629725e68af71f8184ae7e7d54686a56d6", size = 23284, upload-time = "2025-09-27T18:36:58.833Z" },
+    { url = "https://files.pythonhosted.org/packages/58/47/4a0ccea4ab9f5dcb6f79c0236d954acb382202721e704223a8aafa38b5c8/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:9b79b7a16f7fedff2495d684f2b59b0457c3b493778c9eed31111be64d58279f", size = 24801, upload-time = "2025-09-27T18:36:59.739Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/70/3780e9b72180b6fecb83a4814d84c3bf4b4ae4bf0b19c27196104149734c/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:12c63dfb4a98206f045aa9563db46507995f7ef6d83b2f68eda65c307c6829eb", size = 22769, upload-time = "2025-09-27T18:37:00.719Z" },
+    { url = "https://files.pythonhosted.org/packages/98/c5/c03c7f4125180fc215220c035beac6b9cb684bc7a067c84fc69414d315f5/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8f71bc33915be5186016f675cd83a1e08523649b0e33efdb898db577ef5bb009", size = 23642, upload-time = "2025-09-27T18:37:01.673Z" },
+    { url = "https://files.pythonhosted.org/packages/80/d6/2d1b89f6ca4bff1036499b1e29a1d02d282259f3681540e16563f27ebc23/markupsafe-3.0.3-cp313-cp313t-win32.whl", hash = "sha256:69c0b73548bc525c8cb9a251cddf1931d1db4d2258e9599c28c07ef3580ef354", size = 14612, upload-time = "2025-09-27T18:37:02.639Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/98/e48a4bfba0a0ffcf9925fe2d69240bfaa19c6f7507b8cd09c70684a53c1e/markupsafe-3.0.3-cp313-cp313t-win_amd64.whl", hash = "sha256:1b4b79e8ebf6b55351f0d91fe80f893b4743f104bff22e90697db1590e47a218", size = 15200, upload-time = "2025-09-27T18:37:03.582Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/72/e3cc540f351f316e9ed0f092757459afbc595824ca724cbc5a5d4263713f/markupsafe-3.0.3-cp313-cp313t-win_arm64.whl", hash = "sha256:ad2cf8aa28b8c020ab2fc8287b0f823d0a7d8630784c31e9ee5edea20f406287", size = 13973, upload-time = "2025-09-27T18:37:04.929Z" },
+    { url = "https://files.pythonhosted.org/packages/33/8a/8e42d4838cd89b7dde187011e97fe6c3af66d8c044997d2183fbd6d31352/markupsafe-3.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:eaa9599de571d72e2daf60164784109f19978b327a3910d3e9de8c97b5b70cfe", size = 11619, upload-time = "2025-09-27T18:37:06.342Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/64/7660f8a4a8e53c924d0fa05dc3a55c9cee10bbd82b11c5afb27d44b096ce/markupsafe-3.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c47a551199eb8eb2121d4f0f15ae0f923d31350ab9280078d1e5f12b249e0026", size = 12029, upload-time = "2025-09-27T18:37:07.213Z" },
+    { url = "https://files.pythonhosted.org/packages/da/ef/e648bfd021127bef5fa12e1720ffed0c6cbb8310c8d9bea7266337ff06de/markupsafe-3.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f34c41761022dd093b4b6896d4810782ffbabe30f2d443ff5f083e0cbbb8c737", size = 24408, upload-time = "2025-09-27T18:37:09.572Z" },
+    { url = "https://files.pythonhosted.org/packages/41/3c/a36c2450754618e62008bf7435ccb0f88053e07592e6028a34776213d877/markupsafe-3.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:457a69a9577064c05a97c41f4e65148652db078a3a509039e64d3467b9e7ef97", size = 23005, upload-time = "2025-09-27T18:37:10.58Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/20/b7fdf89a8456b099837cd1dc21974632a02a999ec9bf7ca3e490aacd98e7/markupsafe-3.0.3-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e8afc3f2ccfa24215f8cb28dcf43f0113ac3c37c2f0f0806d8c70e4228c5cf4d", size = 22048, upload-time = "2025-09-27T18:37:11.547Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/a7/591f592afdc734f47db08a75793a55d7fbcc6902a723ae4cfbab61010cc5/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ec15a59cf5af7be74194f7ab02d0f59a62bdcf1a537677ce67a2537c9b87fcda", size = 23821, upload-time = "2025-09-27T18:37:12.48Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/33/45b24e4f44195b26521bc6f1a82197118f74df348556594bd2262bda1038/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:0eb9ff8191e8498cca014656ae6b8d61f39da5f95b488805da4bb029cccbfbaf", size = 21606, upload-time = "2025-09-27T18:37:13.485Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/0e/53dfaca23a69fbfbbf17a4b64072090e70717344c52eaaaa9c5ddff1e5f0/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2713baf880df847f2bece4230d4d094280f4e67b1e813eec43b4c0e144a34ffe", size = 23043, upload-time = "2025-09-27T18:37:14.408Z" },
+    { url = "https://files.pythonhosted.org/packages/46/11/f333a06fc16236d5238bfe74daccbca41459dcd8d1fa952e8fbd5dccfb70/markupsafe-3.0.3-cp314-cp314-win32.whl", hash = "sha256:729586769a26dbceff69f7a7dbbf59ab6572b99d94576a5592625d5b411576b9", size = 14747, upload-time = "2025-09-27T18:37:15.36Z" },
+    { url = "https://files.pythonhosted.org/packages/28/52/182836104b33b444e400b14f797212f720cbc9ed6ba34c800639d154e821/markupsafe-3.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:bdc919ead48f234740ad807933cdf545180bfbe9342c2bb451556db2ed958581", size = 15341, upload-time = "2025-09-27T18:37:16.496Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/18/acf23e91bd94fd7b3031558b1f013adfa21a8e407a3fdb32745538730382/markupsafe-3.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:5a7d5dc5140555cf21a6fefbdbf8723f06fcd2f63ef108f2854de715e4422cb4", size = 14073, upload-time = "2025-09-27T18:37:17.476Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/f0/57689aa4076e1b43b15fdfa646b04653969d50cf30c32a102762be2485da/markupsafe-3.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:1353ef0c1b138e1907ae78e2f6c63ff67501122006b0f9abad68fda5f4ffc6ab", size = 11661, upload-time = "2025-09-27T18:37:18.453Z" },
+    { url = "https://files.pythonhosted.org/packages/89/c3/2e67a7ca217c6912985ec766c6393b636fb0c2344443ff9d91404dc4c79f/markupsafe-3.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1085e7fbddd3be5f89cc898938f42c0b3c711fdcb37d75221de2666af647c175", size = 12069, upload-time = "2025-09-27T18:37:19.332Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/00/be561dce4e6ca66b15276e184ce4b8aec61fe83662cce2f7d72bd3249d28/markupsafe-3.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1b52b4fb9df4eb9ae465f8d0c228a00624de2334f216f178a995ccdcf82c4634", size = 25670, upload-time = "2025-09-27T18:37:20.245Z" },
+    { url = "https://files.pythonhosted.org/packages/50/09/c419f6f5a92e5fadde27efd190eca90f05e1261b10dbd8cbcb39cd8ea1dc/markupsafe-3.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fed51ac40f757d41b7c48425901843666a6677e3e8eb0abcff09e4ba6e664f50", size = 23598, upload-time = "2025-09-27T18:37:21.177Z" },
+    { url = "https://files.pythonhosted.org/packages/22/44/a0681611106e0b2921b3033fc19bc53323e0b50bc70cffdd19f7d679bb66/markupsafe-3.0.3-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f190daf01f13c72eac4efd5c430a8de82489d9cff23c364c3ea822545032993e", size = 23261, upload-time = "2025-09-27T18:37:22.167Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/57/1b0b3f100259dc9fffe780cfb60d4be71375510e435efec3d116b6436d43/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e56b7d45a839a697b5eb268c82a71bd8c7f6c94d6fd50c3d577fa39a9f1409f5", size = 24835, upload-time = "2025-09-27T18:37:23.296Z" },
+    { url = "https://files.pythonhosted.org/packages/26/6a/4bf6d0c97c4920f1597cc14dd720705eca0bf7c787aebc6bb4d1bead5388/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:f3e98bb3798ead92273dc0e5fd0f31ade220f59a266ffd8a4f6065e0a3ce0523", size = 22733, upload-time = "2025-09-27T18:37:24.237Z" },
+    { url = "https://files.pythonhosted.org/packages/14/c7/ca723101509b518797fedc2fdf79ba57f886b4aca8a7d31857ba3ee8281f/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5678211cb9333a6468fb8d8be0305520aa073f50d17f089b5b4b477ea6e67fdc", size = 23672, upload-time = "2025-09-27T18:37:25.271Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/df/5bd7a48c256faecd1d36edc13133e51397e41b73bb77e1a69deab746ebac/markupsafe-3.0.3-cp314-cp314t-win32.whl", hash = "sha256:915c04ba3851909ce68ccc2b8e2cd691618c4dc4c4232fb7982bca3f41fd8c3d", size = 14819, upload-time = "2025-09-27T18:37:26.285Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/8a/0402ba61a2f16038b48b39bccca271134be00c5c9f0f623208399333c448/markupsafe-3.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4faffd047e07c38848ce017e8725090413cd80cbc23d86e55c587bf979e579c9", size = 15426, upload-time = "2025-09-27T18:37:27.316Z" },
+    { url = "https://files.pythonhosted.org/packages/70/bc/6f1c2f612465f5fa89b95bead1f44dcb607670fd42891d8fdcd5d039f4f4/markupsafe-3.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:32001d6a8fc98c8cb5c947787c5d08b0a50663d139f1305bac5885d98d9b40fa", size = 14146, upload-time = "2025-09-27T18:37:28.327Z" },
+]
+
+[[package]]
+name = "mdit-py-plugins"
+version = "0.5.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "markdown-it-py" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b2/fd/a756d36c0bfba5f6e39a1cdbdbfdd448dc02692467d83816dff4592a1ebc/mdit_py_plugins-0.5.0.tar.gz", hash = "sha256:f4918cb50119f50446560513a8e311d574ff6aaed72606ddae6d35716fe809c6", size = 44655, upload-time = "2025-08-11T07:25:49.083Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fb/86/dd6e5db36df29e76c7a7699123569a4a18c1623ce68d826ed96c62643cae/mdit_py_plugins-0.5.0-py3-none-any.whl", hash = "sha256:07a08422fc1936a5d26d146759e9155ea466e842f5ab2f7d2266dd084c8dab1f", size = 57205, upload-time = "2025-08-11T07:25:47.597Z" },
+]
+
+[[package]]
+name = "mdurl"
+version = "0.1.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload-time = "2022-08-14T12:40:10.846Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" },
+]
+
+[[package]]
+name = "myst-parser"
+version = "4.0.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "docutils" },
+    { name = "jinja2" },
+    { name = "markdown-it-py" },
+    { name = "mdit-py-plugins" },
+    { name = "pyyaml" },
+    { name = "sphinx" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/66/a5/9626ba4f73555b3735ad86247a8077d4603aa8628537687c839ab08bfe44/myst_parser-4.0.1.tar.gz", hash = "sha256:5cfea715e4f3574138aecbf7d54132296bfd72bb614d31168f48c477a830a7c4", size = 93985, upload-time = "2025-02-12T10:53:03.833Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5f/df/76d0321c3797b54b60fef9ec3bd6f4cfd124b9e422182156a1dd418722cf/myst_parser-4.0.1-py3-none-any.whl", hash = "sha256:9134e88959ec3b5780aedf8a99680ea242869d012e8821db3126d427edc9c95d", size = 84579, upload-time = "2025-02-12T10:53:02.078Z" },
+]
+
+[[package]]
+name = "nemo-rl-docs-env"
+version = "0.0.1"
+source = { virtual = "." }
+dependencies = [
+    { name = "gitpython" },
+    { name = "myst-parser" },
+    { name = "nvidia-sphinx-theme" },
+    { name = "python-dotenv" },
+    { name = "sphinx" },
+    { name = "sphinx-autobuild" },
+    { name = "sphinx-autodoc2" },
+    { name = "sphinx-copybutton" },
+    { name = "sphinx-design" },
+    { name = "sphinxcontrib-mermaid" },
+    { name = "swagger-plugin-for-sphinx" },
+]
+
+[package.metadata]
+requires-dist = [
+    { name = "gitpython", specifier = ">=3.1.45" },
+    { name = "myst-parser" },
+    { name = "nvidia-sphinx-theme" },
+    { name = "python-dotenv" },
+    { name = "sphinx" },
+    { name = "sphinx-autobuild" },
+    { name = "sphinx-autodoc2" },
+    { name = "sphinx-copybutton" },
+    { name = "sphinx-design" },
+    { name = "sphinxcontrib-mermaid" },
+    { name = "swagger-plugin-for-sphinx" },
+]
+
+[[package]]
+name = "nvidia-sphinx-theme"
+version = "0.0.9.post1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pydata-sphinx-theme" },
+    { name = "sphinx" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/8c/79/017fab2f7167a9a9795665f894d04f77aafceca80821b51589bb4b23ff5c/nvidia_sphinx_theme-0.0.9.post1-py3-none-any.whl", hash = "sha256:21ca60206dff2f380d7783d64bbaf71a5b9cacae53c7d0686f089c16b5a3d45a", size = 143816, upload-time = "2025-11-09T23:16:55.719Z" },
+]
+
+[[package]]
+name = "packaging"
+version = "25.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a1/d4/1fc4078c65507b51b96ca8f8c3ba19e6a61c8253c72794544580a7b6c24d/packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f", size = 165727, upload-time = "2025-04-19T11:48:59.673Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" },
+]
+
+[[package]]
+name = "pydata-sphinx-theme"
+version = "0.16.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "accessible-pygments" },
+    { name = "babel" },
+    { name = "beautifulsoup4" },
+    { name = "docutils" },
+    { name = "pygments" },
+    { name = "sphinx" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/00/20/bb50f9de3a6de69e6abd6b087b52fa2418a0418b19597601605f855ad044/pydata_sphinx_theme-0.16.1.tar.gz", hash = "sha256:a08b7f0b7f70387219dc659bff0893a7554d5eb39b59d3b8ef37b8401b7642d7", size = 2412693, upload-time = "2024-12-17T10:53:39.537Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e2/0d/8ba33fa83a7dcde13eb3c1c2a0c1cc29950a048bfed6d9b0d8b6bd710b4c/pydata_sphinx_theme-0.16.1-py3-none-any.whl", hash = "sha256:225331e8ac4b32682c18fcac5a57a6f717c4e632cea5dd0e247b55155faeccde", size = 6723264, upload-time = "2024-12-17T10:53:35.645Z" },
+]
+
+[[package]]
+name = "pygments"
+version = "2.19.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631, upload-time = "2025-06-21T13:39:12.283Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" },
+]
+
+[[package]]
+name = "python-dotenv"
+version = "1.2.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f0/26/19cadc79a718c5edbec86fd4919a6b6d3f681039a2f6d66d14be94e75fb9/python_dotenv-1.2.1.tar.gz", hash = "sha256:42667e897e16ab0d66954af0e60a9caa94f0fd4ecf3aaf6d2d260eec1aa36ad6", size = 44221, upload-time = "2025-10-26T15:12:10.434Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/14/1b/a298b06749107c305e1fe0f814c6c74aea7b2f1e10989cb30f544a1b3253/python_dotenv-1.2.1-py3-none-any.whl", hash = "sha256:b81ee9561e9ca4004139c6cbba3a238c32b03e4894671e181b671e8cb8425d61", size = 21230, upload-time = "2025-10-26T15:12:09.109Z" },
+]
+
+[[package]]
+name = "pyyaml"
+version = "6.0.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/05/8e/961c0007c59b8dd7729d542c61a4d537767a59645b82a0b521206e1e25c2/pyyaml-6.0.3.tar.gz", hash = "sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f", size = 130960, upload-time = "2025-09-25T21:33:16.546Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d1/33/422b98d2195232ca1826284a76852ad5a86fe23e31b009c9886b2d0fb8b2/pyyaml-6.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7f047e29dcae44602496db43be01ad42fc6f1cc0d8cd6c83d342306c32270196", size = 182063, upload-time = "2025-09-25T21:32:11.445Z" },
+    { url = "https://files.pythonhosted.org/packages/89/a0/6cf41a19a1f2f3feab0e9c0b74134aa2ce6849093d5517a0c550fe37a648/pyyaml-6.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fc09d0aa354569bc501d4e787133afc08552722d3ab34836a80547331bb5d4a0", size = 173973, upload-time = "2025-09-25T21:32:12.492Z" },
+    { url = "https://files.pythonhosted.org/packages/ed/23/7a778b6bd0b9a8039df8b1b1d80e2e2ad78aa04171592c8a5c43a56a6af4/pyyaml-6.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28", size = 775116, upload-time = "2025-09-25T21:32:13.652Z" },
+    { url = "https://files.pythonhosted.org/packages/65/30/d7353c338e12baef4ecc1b09e877c1970bd3382789c159b4f89d6a70dc09/pyyaml-6.0.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5fdec68f91a0c6739b380c83b951e2c72ac0197ace422360e6d5a959d8d97b2c", size = 844011, upload-time = "2025-09-25T21:32:15.21Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/9d/b3589d3877982d4f2329302ef98a8026e7f4443c765c46cfecc8858c6b4b/pyyaml-6.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ba1cc08a7ccde2d2ec775841541641e4548226580ab850948cbfda66a1befcdc", size = 807870, upload-time = "2025-09-25T21:32:16.431Z" },
+    { url = "https://files.pythonhosted.org/packages/05/c0/b3be26a015601b822b97d9149ff8cb5ead58c66f981e04fedf4e762f4bd4/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8dc52c23056b9ddd46818a57b78404882310fb473d63f17b07d5c40421e47f8e", size = 761089, upload-time = "2025-09-25T21:32:17.56Z" },
+    { url = "https://files.pythonhosted.org/packages/be/8e/98435a21d1d4b46590d5459a22d88128103f8da4c2d4cb8f14f2a96504e1/pyyaml-6.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:41715c910c881bc081f1e8872880d3c650acf13dfa8214bad49ed4cede7c34ea", size = 790181, upload-time = "2025-09-25T21:32:18.834Z" },
+    { url = "https://files.pythonhosted.org/packages/74/93/7baea19427dcfbe1e5a372d81473250b379f04b1bd3c4c5ff825e2327202/pyyaml-6.0.3-cp312-cp312-win32.whl", hash = "sha256:96b533f0e99f6579b3d4d4995707cf36df9100d67e0c8303a0c55b27b5f99bc5", size = 137658, upload-time = "2025-09-25T21:32:20.209Z" },
+    { url = "https://files.pythonhosted.org/packages/86/bf/899e81e4cce32febab4fb42bb97dcdf66bc135272882d1987881a4b519e9/pyyaml-6.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:5fcd34e47f6e0b794d17de1b4ff496c00986e1c83f7ab2fb8fcfe9616ff7477b", size = 154003, upload-time = "2025-09-25T21:32:21.167Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/08/67bd04656199bbb51dbed1439b7f27601dfb576fb864099c7ef0c3e55531/pyyaml-6.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:64386e5e707d03a7e172c0701abfb7e10f0fb753ee1d773128192742712a98fd", size = 140344, upload-time = "2025-09-25T21:32:22.617Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/11/0fd08f8192109f7169db964b5707a2f1e8b745d4e239b784a5a1dd80d1db/pyyaml-6.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8da9669d359f02c0b91ccc01cac4a67f16afec0dac22c2ad09f46bee0697eba8", size = 181669, upload-time = "2025-09-25T21:32:23.673Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/16/95309993f1d3748cd644e02e38b75d50cbc0d9561d21f390a76242ce073f/pyyaml-6.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2283a07e2c21a2aa78d9c4442724ec1eb15f5e42a723b99cb3d822d48f5f7ad1", size = 173252, upload-time = "2025-09-25T21:32:25.149Z" },
+    { url = "https://files.pythonhosted.org/packages/50/31/b20f376d3f810b9b2371e72ef5adb33879b25edb7a6d072cb7ca0c486398/pyyaml-6.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee2922902c45ae8ccada2c5b501ab86c36525b883eff4255313a253a3160861c", size = 767081, upload-time = "2025-09-25T21:32:26.575Z" },
+    { url = "https://files.pythonhosted.org/packages/49/1e/a55ca81e949270d5d4432fbbd19dfea5321eda7c41a849d443dc92fd1ff7/pyyaml-6.0.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a33284e20b78bd4a18c8c2282d549d10bc8408a2a7ff57653c0cf0b9be0afce5", size = 841159, upload-time = "2025-09-25T21:32:27.727Z" },
+    { url = "https://files.pythonhosted.org/packages/74/27/e5b8f34d02d9995b80abcef563ea1f8b56d20134d8f4e5e81733b1feceb2/pyyaml-6.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f29edc409a6392443abf94b9cf89ce99889a1dd5376d94316ae5145dfedd5d6", size = 801626, upload-time = "2025-09-25T21:32:28.878Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/11/ba845c23988798f40e52ba45f34849aa8a1f2d4af4b798588010792ebad6/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f7057c9a337546edc7973c0d3ba84ddcdf0daa14533c2065749c9075001090e6", size = 753613, upload-time = "2025-09-25T21:32:30.178Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/e0/7966e1a7bfc0a45bf0a7fb6b98ea03fc9b8d84fa7f2229e9659680b69ee3/pyyaml-6.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:eda16858a3cab07b80edaf74336ece1f986ba330fdb8ee0d6c0d68fe82bc96be", size = 794115, upload-time = "2025-09-25T21:32:31.353Z" },
+    { url = "https://files.pythonhosted.org/packages/de/94/980b50a6531b3019e45ddeada0626d45fa85cbe22300844a7983285bed3b/pyyaml-6.0.3-cp313-cp313-win32.whl", hash = "sha256:d0eae10f8159e8fdad514efdc92d74fd8d682c933a6dd088030f3834bc8e6b26", size = 137427, upload-time = "2025-09-25T21:32:32.58Z" },
+    { url = "https://files.pythonhosted.org/packages/97/c9/39d5b874e8b28845e4ec2202b5da735d0199dbe5b8fb85f91398814a9a46/pyyaml-6.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:79005a0d97d5ddabfeeea4cf676af11e647e41d81c9a7722a193022accdb6b7c", size = 154090, upload-time = "2025-09-25T21:32:33.659Z" },
+    { url = "https://files.pythonhosted.org/packages/73/e8/2bdf3ca2090f68bb3d75b44da7bbc71843b19c9f2b9cb9b0f4ab7a5a4329/pyyaml-6.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb", size = 140246, upload-time = "2025-09-25T21:32:34.663Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/8c/f4bd7f6465179953d3ac9bc44ac1a8a3e6122cf8ada906b4f96c60172d43/pyyaml-6.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:8d1fab6bb153a416f9aeb4b8763bc0f22a5586065f86f7664fc23339fc1c1fac", size = 181814, upload-time = "2025-09-25T21:32:35.712Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/9c/4d95bb87eb2063d20db7b60faa3840c1b18025517ae857371c4dd55a6b3a/pyyaml-6.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:34d5fcd24b8445fadc33f9cf348c1047101756fd760b4dacb5c3e99755703310", size = 173809, upload-time = "2025-09-25T21:32:36.789Z" },
+    { url = "https://files.pythonhosted.org/packages/92/b5/47e807c2623074914e29dabd16cbbdd4bf5e9b2db9f8090fa64411fc5382/pyyaml-6.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:501a031947e3a9025ed4405a168e6ef5ae3126c59f90ce0cd6f2bfc477be31b7", size = 766454, upload-time = "2025-09-25T21:32:37.966Z" },
+    { url = "https://files.pythonhosted.org/packages/02/9e/e5e9b168be58564121efb3de6859c452fccde0ab093d8438905899a3a483/pyyaml-6.0.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b3bc83488de33889877a0f2543ade9f70c67d66d9ebb4ac959502e12de895788", size = 836355, upload-time = "2025-09-25T21:32:39.178Z" },
+    { url = "https://files.pythonhosted.org/packages/88/f9/16491d7ed2a919954993e48aa941b200f38040928474c9e85ea9e64222c3/pyyaml-6.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c458b6d084f9b935061bc36216e8a69a7e293a2f1e68bf956dcd9e6cbcd143f5", size = 794175, upload-time = "2025-09-25T21:32:40.865Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/3f/5989debef34dc6397317802b527dbbafb2b4760878a53d4166579111411e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7c6610def4f163542a622a73fb39f534f8c101d690126992300bf3207eab9764", size = 755228, upload-time = "2025-09-25T21:32:42.084Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/ce/af88a49043cd2e265be63d083fc75b27b6ed062f5f9fd6cdc223ad62f03e/pyyaml-6.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5190d403f121660ce8d1d2c1bb2ef1bd05b5f68533fc5c2ea899bd15f4399b35", size = 789194, upload-time = "2025-09-25T21:32:43.362Z" },
+    { url = "https://files.pythonhosted.org/packages/23/20/bb6982b26a40bb43951265ba29d4c246ef0ff59c9fdcdf0ed04e0687de4d/pyyaml-6.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:4a2e8cebe2ff6ab7d1050ecd59c25d4c8bd7e6f400f5f82b96557ac0abafd0ac", size = 156429, upload-time = "2025-09-25T21:32:57.844Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/f4/a4541072bb9422c8a883ab55255f918fa378ecf083f5b85e87fc2b4eda1b/pyyaml-6.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:93dda82c9c22deb0a405ea4dc5f2d0cda384168e466364dec6255b293923b2f3", size = 143912, upload-time = "2025-09-25T21:32:59.247Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/f9/07dd09ae774e4616edf6cda684ee78f97777bdd15847253637a6f052a62f/pyyaml-6.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:02893d100e99e03eda1c8fd5c441d8c60103fd175728e23e431db1b589cf5ab3", size = 189108, upload-time = "2025-09-25T21:32:44.377Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/78/8d08c9fb7ce09ad8c38ad533c1191cf27f7ae1effe5bb9400a46d9437fcf/pyyaml-6.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c1ff362665ae507275af2853520967820d9124984e0f7466736aea23d8611fba", size = 183641, upload-time = "2025-09-25T21:32:45.407Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/5b/3babb19104a46945cf816d047db2788bcaf8c94527a805610b0289a01c6b/pyyaml-6.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6adc77889b628398debc7b65c073bcb99c4a0237b248cacaf3fe8a557563ef6c", size = 831901, upload-time = "2025-09-25T21:32:48.83Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/cc/dff0684d8dc44da4d22a13f35f073d558c268780ce3c6ba1b87055bb0b87/pyyaml-6.0.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a80cb027f6b349846a3bf6d73b5e95e782175e52f22108cfa17876aaeff93702", size = 861132, upload-time = "2025-09-25T21:32:50.149Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/5e/f77dc6b9036943e285ba76b49e118d9ea929885becb0a29ba8a7c75e29fe/pyyaml-6.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00c4bdeba853cc34e7dd471f16b4114f4162dc03e6b7afcc2128711f0eca823c", size = 839261, upload-time = "2025-09-25T21:32:51.808Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/88/a9db1376aa2a228197c58b37302f284b5617f56a5d959fd1763fb1675ce6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e1674c3ef6f541c35191caae2d429b967b99e02040f5ba928632d9a7f0f065", size = 805272, upload-time = "2025-09-25T21:32:52.941Z" },
+    { url = "https://files.pythonhosted.org/packages/da/92/1446574745d74df0c92e6aa4a7b0b3130706a4142b2d1a5869f2eaa423c6/pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:16249ee61e95f858e83976573de0f5b2893b3677ba71c9dd36b9cf8be9ac6d65", size = 829923, upload-time = "2025-09-25T21:32:54.537Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/7a/1c7270340330e575b92f397352af856a8c06f230aa3e76f86b39d01b416a/pyyaml-6.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4ad1906908f2f5ae4e5a8ddfce73c320c2a1429ec52eafd27138b7f1cbe341c9", size = 174062, upload-time = "2025-09-25T21:32:55.767Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341, upload-time = "2025-09-25T21:32:56.828Z" },
+]
+
+[[package]]
+name = "requests"
+version = "2.32.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "certifi" },
+    { name = "charset-normalizer" },
+    { name = "idna" },
+    { name = "urllib3" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c9/74/b3ff8e6c8446842c3f5c837e9c3dfcfe2018ea6ecef224c710c85ef728f4/requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf", size = 134517, upload-time = "2025-08-18T20:46:02.573Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload-time = "2025-08-18T20:46:00.542Z" },
+]
+
+[[package]]
+name = "roman-numerals-py"
+version = "3.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/30/76/48fd56d17c5bdbdf65609abbc67288728a98ed4c02919428d4f52d23b24b/roman_numerals_py-3.1.0.tar.gz", hash = "sha256:be4bf804f083a4ce001b5eb7e3c0862479d10f94c936f6c4e5f250aa5ff5bd2d", size = 9017, upload-time = "2025-02-22T07:34:54.333Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/53/97/d2cbbaa10c9b826af0e10fdf836e1bf344d9f0abb873ebc34d1f49642d3f/roman_numerals_py-3.1.0-py3-none-any.whl", hash = "sha256:9da2ad2fb670bcf24e81070ceb3be72f6c11c440d73bd579fbeca1e9f330954c", size = 7742, upload-time = "2025-02-22T07:34:52.422Z" },
+]
+
+[[package]]
+name = "smmap"
+version = "5.0.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/44/cd/a040c4b3119bbe532e5b0732286f805445375489fceaec1f48306068ee3b/smmap-5.0.2.tar.gz", hash = "sha256:26ea65a03958fa0c8a1c7e8c7a58fdc77221b8910f6be2131affade476898ad5", size = 22329, upload-time = "2025-01-02T07:14:40.909Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/04/be/d09147ad1ec7934636ad912901c5fd7667e1c858e19d355237db0d0cd5e4/smmap-5.0.2-py3-none-any.whl", hash = "sha256:b30115f0def7d7531d22a0fb6502488d879e75b260a9db4d0819cfb25403af5e", size = 24303, upload-time = "2025-01-02T07:14:38.724Z" },
+]
+
+[[package]]
+name = "sniffio"
+version = "1.3.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a2/87/a6771e1546d97e7e041b6ae58d80074f81b7d5121207425c964ddf5cfdbd/sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc", size = 20372, upload-time = "2024-02-25T23:20:04.057Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" },
+]
+
+[[package]]
+name = "snowballstemmer"
+version = "3.0.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/75/a7/9810d872919697c9d01295633f5d574fb416d47e535f258272ca1f01f447/snowballstemmer-3.0.1.tar.gz", hash = "sha256:6d5eeeec8e9f84d4d56b847692bacf79bc2c8e90c7f80ca4444ff8b6f2e52895", size = 105575, upload-time = "2025-05-09T16:34:51.843Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c8/78/3565d011c61f5a43488987ee32b6f3f656e7f107ac2782dd57bdd7d91d9a/snowballstemmer-3.0.1-py3-none-any.whl", hash = "sha256:6cd7b3897da8d6c9ffb968a6781fa6532dce9c3618a4b127d920dab764a19064", size = 103274, upload-time = "2025-05-09T16:34:50.371Z" },
+]
+
+[[package]]
+name = "soupsieve"
+version = "2.8"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/6d/e6/21ccce3262dd4889aa3332e5a119a3491a95e8f60939870a3a035aabac0d/soupsieve-2.8.tar.gz", hash = "sha256:e2dd4a40a628cb5f28f6d4b0db8800b8f581b65bb380b97de22ba5ca8d72572f", size = 103472, upload-time = "2025-08-27T15:39:51.78Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/14/a0/bb38d3b76b8cae341dad93a2dd83ab7462e6dbcdd84d43f54ee60a8dc167/soupsieve-2.8-py3-none-any.whl", hash = "sha256:0cc76456a30e20f5d7f2e14a98a4ae2ee4e5abdc7c5ea0aafe795f344bc7984c", size = 36679, upload-time = "2025-08-27T15:39:50.179Z" },
+]
+
+[[package]]
+name = "sphinx"
+version = "8.2.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "alabaster" },
+    { name = "babel" },
+    { name = "colorama", marker = "sys_platform == 'win32'" },
+    { name = "docutils" },
+    { name = "imagesize" },
+    { name = "jinja2" },
+    { name = "packaging" },
+    { name = "pygments" },
+    { name = "requests" },
+    { name = "roman-numerals-py" },
+    { name = "snowballstemmer" },
+    { name = "sphinxcontrib-applehelp" },
+    { name = "sphinxcontrib-devhelp" },
+    { name = "sphinxcontrib-htmlhelp" },
+    { name = "sphinxcontrib-jsmath" },
+    { name = "sphinxcontrib-qthelp" },
+    { name = "sphinxcontrib-serializinghtml" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/38/ad/4360e50ed56cb483667b8e6dadf2d3fda62359593faabbe749a27c4eaca6/sphinx-8.2.3.tar.gz", hash = "sha256:398ad29dee7f63a75888314e9424d40f52ce5a6a87ae88e7071e80af296ec348", size = 8321876, upload-time = "2025-03-02T22:31:59.658Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/31/53/136e9eca6e0b9dc0e1962e2c908fbea2e5ac000c2a2fbd9a35797958c48b/sphinx-8.2.3-py3-none-any.whl", hash = "sha256:4405915165f13521d875a8c29c8970800a0141c14cc5416a38feca4ea5d9b9c3", size = 3589741, upload-time = "2025-03-02T22:31:56.836Z" },
+]
+
+[[package]]
+name = "sphinx-autobuild"
+version = "2025.8.25"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "colorama" },
+    { name = "sphinx" },
+    { name = "starlette" },
+    { name = "uvicorn" },
+    { name = "watchfiles" },
+    { name = "websockets" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/e0/3c/a59a3a453d4133777f7ed2e83c80b7dc817d43c74b74298ca0af869662ad/sphinx_autobuild-2025.8.25.tar.gz", hash = "sha256:9cf5aab32853c8c31af572e4fecdc09c997e2b8be5a07daf2a389e270e85b213", size = 15200, upload-time = "2025-08-25T18:44:55.436Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d7/20/56411b52f917696995f5ad27d2ea7e9492c84a043c5b49a3a3173573cd93/sphinx_autobuild-2025.8.25-py3-none-any.whl", hash = "sha256:b750ac7d5a18603e4665294323fd20f6dcc0a984117026d1986704fa68f0379a", size = 12535, upload-time = "2025-08-25T18:44:54.164Z" },
+]
+
+[[package]]
+name = "sphinx-autodoc2"
+version = "0.5.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "astroid" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/17/5f/5350046d1aa1a56b063ae08b9ad871025335c9d55fe2372896ea48711da9/sphinx_autodoc2-0.5.0.tar.gz", hash = "sha256:7d76044aa81d6af74447080182b6868c7eb066874edc835e8ddf810735b6565a", size = 115077, upload-time = "2023-11-27T07:27:51.407Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/19/e6/48d47961bbdae755ba9c17dfc65d89356312c67668dcb36c87cfadfa1964/sphinx_autodoc2-0.5.0-py3-none-any.whl", hash = "sha256:e867013b1512f9d6d7e6f6799f8b537d6884462acd118ef361f3f619a60b5c9e", size = 43385, upload-time = "2023-11-27T07:27:49.929Z" },
+]
+
+[[package]]
+name = "sphinx-copybutton"
+version = "0.5.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "sphinx" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/fc/2b/a964715e7f5295f77509e59309959f4125122d648f86b4fe7d70ca1d882c/sphinx-copybutton-0.5.2.tar.gz", hash = "sha256:4cf17c82fb9646d1bc9ca92ac280813a3b605d8c421225fd9913154103ee1fbd", size = 23039, upload-time = "2023-04-14T08:10:22.998Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/9e/48/1ea60e74949eecb12cdd6ac43987f9fd331156388dcc2319b45e2ebb81bf/sphinx_copybutton-0.5.2-py3-none-any.whl", hash = "sha256:fb543fd386d917746c9a2c50360c7905b605726b9355cd26e9974857afeae06e", size = 13343, upload-time = "2023-04-14T08:10:20.844Z" },
+]
+
+[[package]]
+name = "sphinx-design"
+version = "0.6.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "sphinx" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/2b/69/b34e0cb5336f09c6866d53b4a19d76c227cdec1bbc7ac4de63ca7d58c9c7/sphinx_design-0.6.1.tar.gz", hash = "sha256:b44eea3719386d04d765c1a8257caca2b3e6f8421d7b3a5e742c0fd45f84e632", size = 2193689, upload-time = "2024-08-02T13:48:44.277Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c6/43/65c0acbd8cc6f50195a3a1fc195c404988b15c67090e73c7a41a9f57d6bd/sphinx_design-0.6.1-py3-none-any.whl", hash = "sha256:b11f37db1a802a183d61b159d9a202314d4d2fe29c163437001324fe2f19549c", size = 2215338, upload-time = "2024-08-02T13:48:42.106Z" },
+]
+
+[[package]]
+name = "sphinxcontrib-applehelp"
+version = "2.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ba/6e/b837e84a1a704953c62ef8776d45c3e8d759876b4a84fe14eba2859106fe/sphinxcontrib_applehelp-2.0.0.tar.gz", hash = "sha256:2f29ef331735ce958efa4734873f084941970894c6090408b079c61b2e1c06d1", size = 20053, upload-time = "2024-07-29T01:09:00.465Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5d/85/9ebeae2f76e9e77b952f4b274c27238156eae7979c5421fba91a28f4970d/sphinxcontrib_applehelp-2.0.0-py3-none-any.whl", hash = "sha256:4cd3f0ec4ac5dd9c17ec65e9ab272c9b867ea77425228e68ecf08d6b28ddbdb5", size = 119300, upload-time = "2024-07-29T01:08:58.99Z" },
+]
+
+[[package]]
+name = "sphinxcontrib-devhelp"
+version = "2.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f6/d2/5beee64d3e4e747f316bae86b55943f51e82bb86ecd325883ef65741e7da/sphinxcontrib_devhelp-2.0.0.tar.gz", hash = "sha256:411f5d96d445d1d73bb5d52133377b4248ec79db5c793ce7dbe59e074b4dd1ad", size = 12967, upload-time = "2024-07-29T01:09:23.417Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/35/7a/987e583882f985fe4d7323774889ec58049171828b58c2217e7f79cdf44e/sphinxcontrib_devhelp-2.0.0-py3-none-any.whl", hash = "sha256:aefb8b83854e4b0998877524d1029fd3e6879210422ee3780459e28a1f03a8a2", size = 82530, upload-time = "2024-07-29T01:09:21.945Z" },
+]
+
+[[package]]
+name = "sphinxcontrib-htmlhelp"
+version = "2.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/43/93/983afd9aa001e5201eab16b5a444ed5b9b0a7a010541e0ddfbbfd0b2470c/sphinxcontrib_htmlhelp-2.1.0.tar.gz", hash = "sha256:c9e2916ace8aad64cc13a0d233ee22317f2b9025b9cf3295249fa985cc7082e9", size = 22617, upload-time = "2024-07-29T01:09:37.889Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0a/7b/18a8c0bcec9182c05a0b3ec2a776bba4ead82750a55ff798e8d406dae604/sphinxcontrib_htmlhelp-2.1.0-py3-none-any.whl", hash = "sha256:166759820b47002d22914d64a075ce08f4c46818e17cfc9470a9786b759b19f8", size = 98705, upload-time = "2024-07-29T01:09:36.407Z" },
+]
+
+[[package]]
+name = "sphinxcontrib-jsmath"
+version = "1.0.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/b2/e8/9ed3830aeed71f17c026a07a5097edcf44b692850ef215b161b8ad875729/sphinxcontrib-jsmath-1.0.1.tar.gz", hash = "sha256:a9925e4a4587247ed2191a22df5f6970656cb8ca2bd6284309578f2153e0c4b8", size = 5787, upload-time = "2019-01-21T16:10:16.347Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c2/42/4c8646762ee83602e3fb3fbe774c2fac12f317deb0b5dbeeedd2d3ba4b77/sphinxcontrib_jsmath-1.0.1-py2.py3-none-any.whl", hash = "sha256:2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178", size = 5071, upload-time = "2019-01-21T16:10:14.333Z" },
+]
+
+[[package]]
+name = "sphinxcontrib-mermaid"
+version = "1.0.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pyyaml" },
+    { name = "sphinx" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/97/69/bf039237ad260073e8c02f820b3e00dc34f3a2de20aff7861e6b19d2f8c5/sphinxcontrib_mermaid-1.0.0.tar.gz", hash = "sha256:2e8ab67d3e1e2816663f9347d026a8dee4a858acdd4ad32dd1c808893db88146", size = 15153, upload-time = "2024-10-12T16:33:03.863Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/cd/c8/784b9ac6ea08aa594c1a4becbd0dbe77186785362e31fd633b8c6ae0197a/sphinxcontrib_mermaid-1.0.0-py3-none-any.whl", hash = "sha256:60b72710ea02087f212028feb09711225fbc2e343a10d34822fe787510e1caa3", size = 9597, upload-time = "2024-10-12T16:33:02.303Z" },
+]
+
+[[package]]
+name = "sphinxcontrib-qthelp"
+version = "2.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/68/bc/9104308fc285eb3e0b31b67688235db556cd5b0ef31d96f30e45f2e51cae/sphinxcontrib_qthelp-2.0.0.tar.gz", hash = "sha256:4fe7d0ac8fc171045be623aba3e2a8f613f8682731f9153bb2e40ece16b9bbab", size = 17165, upload-time = "2024-07-29T01:09:56.435Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/27/83/859ecdd180cacc13b1f7e857abf8582a64552ea7a061057a6c716e790fce/sphinxcontrib_qthelp-2.0.0-py3-none-any.whl", hash = "sha256:b18a828cdba941ccd6ee8445dbe72ffa3ef8cbe7505d8cd1fa0d42d3f2d5f3eb", size = 88743, upload-time = "2024-07-29T01:09:54.885Z" },
+]
+
+[[package]]
+name = "sphinxcontrib-serializinghtml"
+version = "2.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/3b/44/6716b257b0aa6bfd51a1b31665d1c205fb12cb5ad56de752dfa15657de2f/sphinxcontrib_serializinghtml-2.0.0.tar.gz", hash = "sha256:e9d912827f872c029017a53f0ef2180b327c3f7fd23c87229f7a8e8b70031d4d", size = 16080, upload-time = "2024-07-29T01:10:09.332Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/52/a7/d2782e4e3f77c8450f727ba74a8f12756d5ba823d81b941f1b04da9d033a/sphinxcontrib_serializinghtml-2.0.0-py3-none-any.whl", hash = "sha256:6e2cb0eef194e10c27ec0023bfeb25badbbb5868244cf5bc5bdc04e4464bf331", size = 92072, upload-time = "2024-07-29T01:10:08.203Z" },
+]
+
+[[package]]
+name = "starlette"
+version = "0.50.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "anyio" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/ba/b8/73a0e6a6e079a9d9cfa64113d771e421640b6f679a52eeb9b32f72d871a1/starlette-0.50.0.tar.gz", hash = "sha256:a2a17b22203254bcbc2e1f926d2d55f3f9497f769416b3190768befe598fa3ca", size = 2646985, upload-time = "2025-11-01T15:25:27.516Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d9/52/1064f510b141bd54025f9b55105e26d1fa970b9be67ad766380a3c9b74b0/starlette-0.50.0-py3-none-any.whl", hash = "sha256:9e5391843ec9b6e472eed1365a78c8098cfceb7a74bfd4d6b1c0c0095efb3bca", size = 74033, upload-time = "2025-11-01T15:25:25.461Z" },
+]
+
+[[package]]
+name = "swagger-plugin-for-sphinx"
+version = "6.0.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "docutils" },
+    { name = "jinja2" },
+    { name = "sphinx" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/5b/b3/84bc7d8af6b50e6c365f1593fe4a01245f4bfb2178040261c6429db0b46b/swagger_plugin_for_sphinx-6.0.0.tar.gz", hash = "sha256:70366c610648cede5ef482922c9c97c86c99746b9edf33e4ec13fab23d820251", size = 16026, upload-time = "2025-10-16T06:26:09.95Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/51/f0/1098f6628bbe04b086ce59692d09b116ec751286eb7d33e88c5bf0c2e210/swagger_plugin_for_sphinx-6.0.0-py3-none-any.whl", hash = "sha256:35dc646d759a44ce78aefde2fe34f54e7b8c3439d0a52541a6a8b9924a711832", size = 11253, upload-time = "2025-10-16T06:26:08.504Z" },
+]
+
+[[package]]
+name = "typing-extensions"
+version = "4.15.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/72/94/1a15dd82efb362ac84269196e94cf00f187f7ed21c242792a923cdb1c61f/typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", size = 109391, upload-time = "2025-08-25T13:49:26.313Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" },
+]
+
+[[package]]
+name = "urllib3"
+version = "2.5.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/15/22/9ee70a2574a4f4599c47dd506532914ce044817c7752a79b6a51286319bc/urllib3-2.5.0.tar.gz", hash = "sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760", size = 393185, upload-time = "2025-06-18T14:07:41.644Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a7/c2/fe1e52489ae3122415c51f387e221dd0773709bad6c6cdaa599e8a2c5185/urllib3-2.5.0-py3-none-any.whl", hash = "sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc", size = 129795, upload-time = "2025-06-18T14:07:40.39Z" },
+]
+
+[[package]]
+name = "uvicorn"
+version = "0.38.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "click" },
+    { name = "h11" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/cb/ce/f06b84e2697fef4688ca63bdb2fdf113ca0a3be33f94488f2cadb690b0cf/uvicorn-0.38.0.tar.gz", hash = "sha256:fd97093bdd120a2609fc0d3afe931d4d4ad688b6e75f0f929fde1bc36fe0e91d", size = 80605, upload-time = "2025-10-18T13:46:44.63Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ee/d9/d88e73ca598f4f6ff671fb5fde8a32925c2e08a637303a1d12883c7305fa/uvicorn-0.38.0-py3-none-any.whl", hash = "sha256:48c0afd214ceb59340075b4a052ea1ee91c16fbc2a9b1469cca0e54566977b02", size = 68109, upload-time = "2025-10-18T13:46:42.958Z" },
+]
+
+[[package]]
+name = "watchfiles"
+version = "1.1.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "anyio" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c2/c9/8869df9b2a2d6c59d79220a4db37679e74f807c559ffe5265e08b227a210/watchfiles-1.1.1.tar.gz", hash = "sha256:a173cb5c16c4f40ab19cecf48a534c409f7ea983ab8fed0741304a1c0a31b3f2", size = 94440, upload-time = "2025-10-14T15:06:21.08Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/74/d5/f039e7e3c639d9b1d09b07ea412a6806d38123f0508e5f9b48a87b0a76cc/watchfiles-1.1.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:8c89f9f2f740a6b7dcc753140dd5e1ab9215966f7a3530d0c0705c83b401bd7d", size = 404745, upload-time = "2025-10-14T15:04:46.731Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/96/a881a13aa1349827490dab2d363c8039527060cfcc2c92cc6d13d1b1049e/watchfiles-1.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:bd404be08018c37350f0d6e34676bd1e2889990117a2b90070b3007f172d0610", size = 391769, upload-time = "2025-10-14T15:04:48.003Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/5b/d3b460364aeb8da471c1989238ea0e56bec24b6042a68046adf3d9ddb01c/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8526e8f916bb5b9a0a777c8317c23ce65de259422bba5b31325a6fa6029d33af", size = 449374, upload-time = "2025-10-14T15:04:49.179Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/44/5769cb62d4ed055cb17417c0a109a92f007114a4e07f30812a73a4efdb11/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2edc3553362b1c38d9f06242416a5d8e9fe235c204a4072e988ce2e5bb1f69f6", size = 459485, upload-time = "2025-10-14T15:04:50.155Z" },
+    { url = "https://files.pythonhosted.org/packages/19/0c/286b6301ded2eccd4ffd0041a1b726afda999926cf720aab63adb68a1e36/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:30f7da3fb3f2844259cba4720c3fc7138eb0f7b659c38f3bfa65084c7fc7abce", size = 488813, upload-time = "2025-10-14T15:04:51.059Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/2b/8530ed41112dd4a22f4dcfdb5ccf6a1baad1ff6eed8dc5a5f09e7e8c41c7/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f8979280bdafff686ba5e4d8f97840f929a87ed9cdf133cbbd42f7766774d2aa", size = 594816, upload-time = "2025-10-14T15:04:52.031Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/d2/f5f9fb49489f184f18470d4f99f4e862a4b3e9ac2865688eb2099e3d837a/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dcc5c24523771db3a294c77d94771abcfcb82a0e0ee8efd910c37c59ec1b31bb", size = 475186, upload-time = "2025-10-14T15:04:53.064Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/68/5707da262a119fb06fbe214d82dd1fe4a6f4af32d2d14de368d0349eb52a/watchfiles-1.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1db5d7ae38ff20153d542460752ff397fcf5c96090c1230803713cf3147a6803", size = 456812, upload-time = "2025-10-14T15:04:55.174Z" },
+    { url = "https://files.pythonhosted.org/packages/66/ab/3cbb8756323e8f9b6f9acb9ef4ec26d42b2109bce830cc1f3468df20511d/watchfiles-1.1.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:28475ddbde92df1874b6c5c8aaeb24ad5be47a11f87cde5a28ef3835932e3e94", size = 630196, upload-time = "2025-10-14T15:04:56.22Z" },
+    { url = "https://files.pythonhosted.org/packages/78/46/7152ec29b8335f80167928944a94955015a345440f524d2dfe63fc2f437b/watchfiles-1.1.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:36193ed342f5b9842edd3532729a2ad55c4160ffcfa3700e0d54be496b70dd43", size = 622657, upload-time = "2025-10-14T15:04:57.521Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/bf/95895e78dd75efe9a7f31733607f384b42eb5feb54bd2eb6ed57cc2e94f4/watchfiles-1.1.1-cp312-cp312-win32.whl", hash = "sha256:859e43a1951717cc8de7f4c77674a6d389b106361585951d9e69572823f311d9", size = 272042, upload-time = "2025-10-14T15:04:59.046Z" },
+    { url = "https://files.pythonhosted.org/packages/87/0a/90eb755f568de2688cb220171c4191df932232c20946966c27a59c400850/watchfiles-1.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:91d4c9a823a8c987cce8fa2690923b069966dabb196dd8d137ea2cede885fde9", size = 288410, upload-time = "2025-10-14T15:05:00.081Z" },
+    { url = "https://files.pythonhosted.org/packages/36/76/f322701530586922fbd6723c4f91ace21364924822a8772c549483abed13/watchfiles-1.1.1-cp312-cp312-win_arm64.whl", hash = "sha256:a625815d4a2bdca61953dbba5a39d60164451ef34c88d751f6c368c3ea73d404", size = 278209, upload-time = "2025-10-14T15:05:01.168Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/f4/f750b29225fe77139f7ae5de89d4949f5a99f934c65a1f1c0b248f26f747/watchfiles-1.1.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:130e4876309e8686a5e37dba7d5e9bc77e6ed908266996ca26572437a5271e18", size = 404321, upload-time = "2025-10-14T15:05:02.063Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/f9/f07a295cde762644aa4c4bb0f88921d2d141af45e735b965fb2e87858328/watchfiles-1.1.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5f3bde70f157f84ece3765b42b4a52c6ac1a50334903c6eaf765362f6ccca88a", size = 391783, upload-time = "2025-10-14T15:05:03.052Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/11/fc2502457e0bea39a5c958d86d2cb69e407a4d00b85735ca724bfa6e0d1a/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:14e0b1fe858430fc0251737ef3824c54027bedb8c37c38114488b8e131cf8219", size = 449279, upload-time = "2025-10-14T15:05:04.004Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/1f/d66bc15ea0b728df3ed96a539c777acfcad0eb78555ad9efcaa1274688f0/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f27db948078f3823a6bb3b465180db8ebecf26dd5dae6f6180bd87383b6b4428", size = 459405, upload-time = "2025-10-14T15:05:04.942Z" },
+    { url = "https://files.pythonhosted.org/packages/be/90/9f4a65c0aec3ccf032703e6db02d89a157462fbb2cf20dd415128251cac0/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:059098c3a429f62fc98e8ec62b982230ef2c8df68c79e826e37b895bc359a9c0", size = 488976, upload-time = "2025-10-14T15:05:05.905Z" },
+    { url = "https://files.pythonhosted.org/packages/37/57/ee347af605d867f712be7029bb94c8c071732a4b44792e3176fa3c612d39/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bfb5862016acc9b869bb57284e6cb35fdf8e22fe59f7548858e2f971d045f150", size = 595506, upload-time = "2025-10-14T15:05:06.906Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/78/cc5ab0b86c122047f75e8fc471c67a04dee395daf847d3e59381996c8707/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:319b27255aacd9923b8a276bb14d21a5f7ff82564c744235fc5eae58d95422ae", size = 474936, upload-time = "2025-10-14T15:05:07.906Z" },
+    { url = "https://files.pythonhosted.org/packages/62/da/def65b170a3815af7bd40a3e7010bf6ab53089ef1b75d05dd5385b87cf08/watchfiles-1.1.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c755367e51db90e75b19454b680903631d41f9e3607fbd941d296a020c2d752d", size = 456147, upload-time = "2025-10-14T15:05:09.138Z" },
+    { url = "https://files.pythonhosted.org/packages/57/99/da6573ba71166e82d288d4df0839128004c67d2778d3b566c138695f5c0b/watchfiles-1.1.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:c22c776292a23bfc7237a98f791b9ad3144b02116ff10d820829ce62dff46d0b", size = 630007, upload-time = "2025-10-14T15:05:10.117Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/51/7439c4dd39511368849eb1e53279cd3454b4a4dbace80bab88feeb83c6b5/watchfiles-1.1.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:3a476189be23c3686bc2f4321dd501cb329c0a0469e77b7b534ee10129ae6374", size = 622280, upload-time = "2025-10-14T15:05:11.146Z" },
+    { url = "https://files.pythonhosted.org/packages/95/9c/8ed97d4bba5db6fdcdb2b298d3898f2dd5c20f6b73aee04eabe56c59677e/watchfiles-1.1.1-cp313-cp313-win32.whl", hash = "sha256:bf0a91bfb5574a2f7fc223cf95eeea79abfefa404bf1ea5e339c0c1560ae99a0", size = 272056, upload-time = "2025-10-14T15:05:12.156Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/f3/c14e28429f744a260d8ceae18bf58c1d5fa56b50d006a7a9f80e1882cb0d/watchfiles-1.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:52e06553899e11e8074503c8e716d574adeeb7e68913115c4b3653c53f9bae42", size = 288162, upload-time = "2025-10-14T15:05:13.208Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/61/fe0e56c40d5cd29523e398d31153218718c5786b5e636d9ae8ae79453d27/watchfiles-1.1.1-cp313-cp313-win_arm64.whl", hash = "sha256:ac3cc5759570cd02662b15fbcd9d917f7ecd47efe0d6b40474eafd246f91ea18", size = 277909, upload-time = "2025-10-14T15:05:14.49Z" },
+    { url = "https://files.pythonhosted.org/packages/79/42/e0a7d749626f1e28c7108a99fb9bf524b501bbbeb9b261ceecde644d5a07/watchfiles-1.1.1-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:563b116874a9a7ce6f96f87cd0b94f7faf92d08d0021e837796f0a14318ef8da", size = 403389, upload-time = "2025-10-14T15:05:15.777Z" },
+    { url = "https://files.pythonhosted.org/packages/15/49/08732f90ce0fbbc13913f9f215c689cfc9ced345fb1bcd8829a50007cc8d/watchfiles-1.1.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3ad9fe1dae4ab4212d8c91e80b832425e24f421703b5a42ef2e4a1e215aff051", size = 389964, upload-time = "2025-10-14T15:05:16.85Z" },
+    { url = "https://files.pythonhosted.org/packages/27/0d/7c315d4bd5f2538910491a0393c56bf70d333d51bc5b34bee8e68e8cea19/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce70f96a46b894b36eba678f153f052967a0d06d5b5a19b336ab0dbbd029f73e", size = 448114, upload-time = "2025-10-14T15:05:17.876Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/24/9e096de47a4d11bc4df41e9d1e61776393eac4cb6eb11b3e23315b78b2cc/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:cb467c999c2eff23a6417e58d75e5828716f42ed8289fe6b77a7e5a91036ca70", size = 460264, upload-time = "2025-10-14T15:05:18.962Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/0f/e8dea6375f1d3ba5fcb0b3583e2b493e77379834c74fd5a22d66d85d6540/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:836398932192dae4146c8f6f737d74baeac8b70ce14831a239bdb1ca882fc261", size = 487877, upload-time = "2025-10-14T15:05:20.094Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/5b/df24cfc6424a12deb41503b64d42fbea6b8cb357ec62ca84a5a3476f654a/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:743185e7372b7bc7c389e1badcc606931a827112fbbd37f14c537320fca08620", size = 595176, upload-time = "2025-10-14T15:05:21.134Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/b5/853b6757f7347de4e9b37e8cc3289283fb983cba1ab4d2d7144694871d9c/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:afaeff7696e0ad9f02cbb8f56365ff4686ab205fcf9c4c5b6fdfaaa16549dd04", size = 473577, upload-time = "2025-10-14T15:05:22.306Z" },
+    { url = "https://files.pythonhosted.org/packages/e1/f7/0a4467be0a56e80447c8529c9fce5b38eab4f513cb3d9bf82e7392a5696b/watchfiles-1.1.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3f7eb7da0eb23aa2ba036d4f616d46906013a68caf61b7fdbe42fc8b25132e77", size = 455425, upload-time = "2025-10-14T15:05:23.348Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/e0/82583485ea00137ddf69bc84a2db88bd92ab4a6e3c405e5fb878ead8d0e7/watchfiles-1.1.1-cp313-cp313t-musllinux_1_1_aarch64.whl", hash = "sha256:831a62658609f0e5c64178211c942ace999517f5770fe9436be4c2faeba0c0ef", size = 628826, upload-time = "2025-10-14T15:05:24.398Z" },
+    { url = "https://files.pythonhosted.org/packages/28/9a/a785356fccf9fae84c0cc90570f11702ae9571036fb25932f1242c82191c/watchfiles-1.1.1-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:f9a2ae5c91cecc9edd47e041a930490c31c3afb1f5e6d71de3dc671bfaca02bf", size = 622208, upload-time = "2025-10-14T15:05:25.45Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/f4/0872229324ef69b2c3edec35e84bd57a1289e7d3fe74588048ed8947a323/watchfiles-1.1.1-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:d1715143123baeeaeadec0528bb7441103979a1d5f6fd0e1f915383fea7ea6d5", size = 404315, upload-time = "2025-10-14T15:05:26.501Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/22/16d5331eaed1cb107b873f6ae1b69e9ced582fcf0c59a50cd84f403b1c32/watchfiles-1.1.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:39574d6370c4579d7f5d0ad940ce5b20db0e4117444e39b6d8f99db5676c52fd", size = 390869, upload-time = "2025-10-14T15:05:27.649Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/7e/5643bfff5acb6539b18483128fdc0ef2cccc94a5b8fbda130c823e8ed636/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7365b92c2e69ee952902e8f70f3ba6360d0d596d9299d55d7d386df84b6941fb", size = 449919, upload-time = "2025-10-14T15:05:28.701Z" },
+    { url = "https://files.pythonhosted.org/packages/51/2e/c410993ba5025a9f9357c376f48976ef0e1b1aefb73b97a5ae01a5972755/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bfff9740c69c0e4ed32416f013f3c45e2ae42ccedd1167ef2d805c000b6c71a5", size = 460845, upload-time = "2025-10-14T15:05:30.064Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/a4/2df3b404469122e8680f0fcd06079317e48db58a2da2950fb45020947734/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b27cf2eb1dda37b2089e3907d8ea92922b673c0c427886d4edc6b94d8dfe5db3", size = 489027, upload-time = "2025-10-14T15:05:31.064Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/84/4587ba5b1f267167ee715b7f66e6382cca6938e0a4b870adad93e44747e6/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:526e86aced14a65a5b0ec50827c745597c782ff46b571dbfe46192ab9e0b3c33", size = 595615, upload-time = "2025-10-14T15:05:32.074Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/0f/c6988c91d06e93cd0bb3d4a808bcf32375ca1904609835c3031799e3ecae/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:04e78dd0b6352db95507fd8cb46f39d185cf8c74e4cf1e4fbad1d3df96faf510", size = 474836, upload-time = "2025-10-14T15:05:33.209Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/36/ded8aebea91919485b7bbabbd14f5f359326cb5ec218cd67074d1e426d74/watchfiles-1.1.1-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5c85794a4cfa094714fb9c08d4a218375b2b95b8ed1666e8677c349906246c05", size = 455099, upload-time = "2025-10-14T15:05:34.189Z" },
+    { url = "https://files.pythonhosted.org/packages/98/e0/8c9bdba88af756a2fce230dd365fab2baf927ba42cd47521ee7498fd5211/watchfiles-1.1.1-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:74d5012b7630714b66be7b7b7a78855ef7ad58e8650c73afc4c076a1f480a8d6", size = 630626, upload-time = "2025-10-14T15:05:35.216Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/84/a95db05354bf2d19e438520d92a8ca475e578c647f78f53197f5a2f17aaf/watchfiles-1.1.1-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:8fbe85cb3201c7d380d3d0b90e63d520f15d6afe217165d7f98c9c649654db81", size = 622519, upload-time = "2025-10-14T15:05:36.259Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/ce/d8acdc8de545de995c339be67711e474c77d643555a9bb74a9334252bd55/watchfiles-1.1.1-cp314-cp314-win32.whl", hash = "sha256:3fa0b59c92278b5a7800d3ee7733da9d096d4aabcfabb9a928918bd276ef9b9b", size = 272078, upload-time = "2025-10-14T15:05:37.63Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/c9/a74487f72d0451524be827e8edec251da0cc1fcf111646a511ae752e1a3d/watchfiles-1.1.1-cp314-cp314-win_amd64.whl", hash = "sha256:c2047d0b6cea13b3316bdbafbfa0c4228ae593d995030fda39089d36e64fc03a", size = 287664, upload-time = "2025-10-14T15:05:38.95Z" },
+    { url = "https://files.pythonhosted.org/packages/df/b8/8ac000702cdd496cdce998c6f4ee0ca1f15977bba51bdf07d872ebdfc34c/watchfiles-1.1.1-cp314-cp314-win_arm64.whl", hash = "sha256:842178b126593addc05acf6fce960d28bc5fae7afbaa2c6c1b3a7b9460e5be02", size = 277154, upload-time = "2025-10-14T15:05:39.954Z" },
+    { url = "https://files.pythonhosted.org/packages/47/a8/e3af2184707c29f0f14b1963c0aace6529f9d1b8582d5b99f31bbf42f59e/watchfiles-1.1.1-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:88863fbbc1a7312972f1c511f202eb30866370ebb8493aef2812b9ff28156a21", size = 403820, upload-time = "2025-10-14T15:05:40.932Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/ec/e47e307c2f4bd75f9f9e8afbe3876679b18e1bcec449beca132a1c5ffb2d/watchfiles-1.1.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:55c7475190662e202c08c6c0f4d9e345a29367438cf8e8037f3155e10a88d5a5", size = 390510, upload-time = "2025-10-14T15:05:41.945Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/a0/ad235642118090f66e7b2f18fd5c42082418404a79205cdfca50b6309c13/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3f53fa183d53a1d7a8852277c92b967ae99c2d4dcee2bfacff8868e6e30b15f7", size = 448408, upload-time = "2025-10-14T15:05:43.385Z" },
+    { url = "https://files.pythonhosted.org/packages/df/85/97fa10fd5ff3332ae17e7e40e20784e419e28521549780869f1413742e9d/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6aae418a8b323732fa89721d86f39ec8f092fc2af67f4217a2b07fd3e93c6101", size = 458968, upload-time = "2025-10-14T15:05:44.404Z" },
+    { url = "https://files.pythonhosted.org/packages/47/c2/9059c2e8966ea5ce678166617a7f75ecba6164375f3b288e50a40dc6d489/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f096076119da54a6080e8920cbdaac3dbee667eb91dcc5e5b78840b87415bd44", size = 488096, upload-time = "2025-10-14T15:05:45.398Z" },
+    { url = "https://files.pythonhosted.org/packages/94/44/d90a9ec8ac309bc26db808a13e7bfc0e4e78b6fc051078a554e132e80160/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:00485f441d183717038ed2e887a7c868154f216877653121068107b227a2f64c", size = 596040, upload-time = "2025-10-14T15:05:46.502Z" },
+    { url = "https://files.pythonhosted.org/packages/95/68/4e3479b20ca305cfc561db3ed207a8a1c745ee32bf24f2026a129d0ddb6e/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a55f3e9e493158d7bfdb60a1165035f1cf7d320914e7b7ea83fe22c6023b58fc", size = 473847, upload-time = "2025-10-14T15:05:47.484Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/55/2af26693fd15165c4ff7857e38330e1b61ab8c37d15dc79118cdba115b7a/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c91ed27800188c2ae96d16e3149f199d62f86c7af5f5f4d2c61a3ed8cd3666c", size = 455072, upload-time = "2025-10-14T15:05:48.928Z" },
+    { url = "https://files.pythonhosted.org/packages/66/1d/d0d200b10c9311ec25d2273f8aad8c3ef7cc7ea11808022501811208a750/watchfiles-1.1.1-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:311ff15a0bae3714ffb603e6ba6dbfba4065ab60865d15a6ec544133bdb21099", size = 629104, upload-time = "2025-10-14T15:05:49.908Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/bd/fa9bb053192491b3867ba07d2343d9f2252e00811567d30ae8d0f78136fe/watchfiles-1.1.1-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:a916a2932da8f8ab582f242c065f5c81bed3462849ca79ee357dd9551b0e9b01", size = 622112, upload-time = "2025-10-14T15:05:50.941Z" },
+]
+
+[[package]]
+name = "websockets"
+version = "15.0.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/21/e6/26d09fab466b7ca9c7737474c52be4f76a40301b08362eb2dbc19dcc16c1/websockets-15.0.1.tar.gz", hash = "sha256:82544de02076bafba038ce055ee6412d68da13ab47f0c60cab827346de828dee", size = 177016, upload-time = "2025-03-05T20:03:41.606Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/51/6b/4545a0d843594f5d0771e86463606a3988b5a09ca5123136f8a76580dd63/websockets-15.0.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:3e90baa811a5d73f3ca0bcbf32064d663ed81318ab225ee4f427ad4e26e5aff3", size = 175437, upload-time = "2025-03-05T20:02:16.706Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/71/809a0f5f6a06522af902e0f2ea2757f71ead94610010cf570ab5c98e99ed/websockets-15.0.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:592f1a9fe869c778694f0aa806ba0374e97648ab57936f092fd9d87f8bc03665", size = 173096, upload-time = "2025-03-05T20:02:18.832Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/69/1a681dd6f02180916f116894181eab8b2e25b31e484c5d0eae637ec01f7c/websockets-15.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0701bc3cfcb9164d04a14b149fd74be7347a530ad3bbf15ab2c678a2cd3dd9a2", size = 173332, upload-time = "2025-03-05T20:02:20.187Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/02/0073b3952f5bce97eafbb35757f8d0d54812b6174ed8dd952aa08429bcc3/websockets-15.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8b56bdcdb4505c8078cb6c7157d9811a85790f2f2b3632c7d1462ab5783d215", size = 183152, upload-time = "2025-03-05T20:02:22.286Z" },
+    { url = "https://files.pythonhosted.org/packages/74/45/c205c8480eafd114b428284840da0b1be9ffd0e4f87338dc95dc6ff961a1/websockets-15.0.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0af68c55afbd5f07986df82831c7bff04846928ea8d1fd7f30052638788bc9b5", size = 182096, upload-time = "2025-03-05T20:02:24.368Z" },
+    { url = "https://files.pythonhosted.org/packages/14/8f/aa61f528fba38578ec553c145857a181384c72b98156f858ca5c8e82d9d3/websockets-15.0.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64dee438fed052b52e4f98f76c5790513235efaa1ef7f3f2192c392cd7c91b65", size = 182523, upload-time = "2025-03-05T20:02:25.669Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/6d/0267396610add5bc0d0d3e77f546d4cd287200804fe02323797de77dbce9/websockets-15.0.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d5f6b181bb38171a8ad1d6aa58a67a6aa9d4b38d0f8c5f496b9e42561dfc62fe", size = 182790, upload-time = "2025-03-05T20:02:26.99Z" },
+    { url = "https://files.pythonhosted.org/packages/02/05/c68c5adbf679cf610ae2f74a9b871ae84564462955d991178f95a1ddb7dd/websockets-15.0.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:5d54b09eba2bada6011aea5375542a157637b91029687eb4fdb2dab11059c1b4", size = 182165, upload-time = "2025-03-05T20:02:30.291Z" },
+    { url = "https://files.pythonhosted.org/packages/29/93/bb672df7b2f5faac89761cb5fa34f5cec45a4026c383a4b5761c6cea5c16/websockets-15.0.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3be571a8b5afed347da347bfcf27ba12b069d9d7f42cb8c7028b5e98bbb12597", size = 182160, upload-time = "2025-03-05T20:02:31.634Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/83/de1f7709376dc3ca9b7eeb4b9a07b4526b14876b6d372a4dc62312bebee0/websockets-15.0.1-cp312-cp312-win32.whl", hash = "sha256:c338ffa0520bdb12fbc527265235639fb76e7bc7faafbb93f6ba80d9c06578a9", size = 176395, upload-time = "2025-03-05T20:02:33.017Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/71/abf2ebc3bbfa40f391ce1428c7168fb20582d0ff57019b69ea20fa698043/websockets-15.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:fcd5cf9e305d7b8338754470cf69cf81f420459dbae8a3b40cee57417f4614a7", size = 176841, upload-time = "2025-03-05T20:02:34.498Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/9f/51f0cf64471a9d2b4d0fc6c534f323b664e7095640c34562f5182e5a7195/websockets-15.0.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ee443ef070bb3b6ed74514f5efaa37a252af57c90eb33b956d35c8e9c10a1931", size = 175440, upload-time = "2025-03-05T20:02:36.695Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/05/aa116ec9943c718905997412c5989f7ed671bc0188ee2ba89520e8765d7b/websockets-15.0.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5a939de6b7b4e18ca683218320fc67ea886038265fd1ed30173f5ce3f8e85675", size = 173098, upload-time = "2025-03-05T20:02:37.985Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/0b/33cef55ff24f2d92924923c99926dcce78e7bd922d649467f0eda8368923/websockets-15.0.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:746ee8dba912cd6fc889a8147168991d50ed70447bf18bcda7039f7d2e3d9151", size = 173329, upload-time = "2025-03-05T20:02:39.298Z" },
+    { url = "https://files.pythonhosted.org/packages/31/1d/063b25dcc01faa8fada1469bdf769de3768b7044eac9d41f734fd7b6ad6d/websockets-15.0.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:595b6c3969023ecf9041b2936ac3827e4623bfa3ccf007575f04c5a6aa318c22", size = 183111, upload-time = "2025-03-05T20:02:40.595Z" },
+    { url = "https://files.pythonhosted.org/packages/93/53/9a87ee494a51bf63e4ec9241c1ccc4f7c2f45fff85d5bde2ff74fcb68b9e/websockets-15.0.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3c714d2fc58b5ca3e285461a4cc0c9a66bd0e24c5da9911e30158286c9b5be7f", size = 182054, upload-time = "2025-03-05T20:02:41.926Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/b2/83a6ddf56cdcbad4e3d841fcc55d6ba7d19aeb89c50f24dd7e859ec0805f/websockets-15.0.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f3c1e2ab208db911594ae5b4f79addeb3501604a165019dd221c0bdcabe4db8", size = 182496, upload-time = "2025-03-05T20:02:43.304Z" },
+    { url = "https://files.pythonhosted.org/packages/98/41/e7038944ed0abf34c45aa4635ba28136f06052e08fc2168520bb8b25149f/websockets-15.0.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:229cf1d3ca6c1804400b0a9790dc66528e08a6a1feec0d5040e8b9eb14422375", size = 182829, upload-time = "2025-03-05T20:02:48.812Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/17/de15b6158680c7623c6ef0db361da965ab25d813ae54fcfeae2e5b9ef910/websockets-15.0.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:756c56e867a90fb00177d530dca4b097dd753cde348448a1012ed6c5131f8b7d", size = 182217, upload-time = "2025-03-05T20:02:50.14Z" },
+    { url = "https://files.pythonhosted.org/packages/33/2b/1f168cb6041853eef0362fb9554c3824367c5560cbdaad89ac40f8c2edfc/websockets-15.0.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:558d023b3df0bffe50a04e710bc87742de35060580a293c2a984299ed83bc4e4", size = 182195, upload-time = "2025-03-05T20:02:51.561Z" },
+    { url = "https://files.pythonhosted.org/packages/86/eb/20b6cdf273913d0ad05a6a14aed4b9a85591c18a987a3d47f20fa13dcc47/websockets-15.0.1-cp313-cp313-win32.whl", hash = "sha256:ba9e56e8ceeeedb2e080147ba85ffcd5cd0711b89576b83784d8605a7df455fa", size = 176393, upload-time = "2025-03-05T20:02:53.814Z" },
+    { url = "https://files.pythonhosted.org/packages/1b/6c/c65773d6cab416a64d191d6ee8a8b1c68a09970ea6909d16965d26bfed1e/websockets-15.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:e09473f095a819042ecb2ab9465aee615bd9c2028e4ef7d933600a8401c79561", size = 176837, upload-time = "2025-03-05T20:02:55.237Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/a8/5b41e0da817d64113292ab1f8247140aac61cbf6cfd085d6a0fa77f4984f/websockets-15.0.1-py3-none-any.whl", hash = "sha256:f7a866fbc1e97b5c617ee4116daaa09b722101d4a3c170c787450ba409f9736f", size = 169743, upload-time = "2025-03-05T20:03:39.41Z" },
+]
diff --git a/examples/configs/distillation_math.yaml b/examples/configs/distillation_math.yaml
new file mode 100644
index 0000000000..b77c6d3893
--- /dev/null
+++ b/examples/configs/distillation_math.yaml
@@ -0,0 +1,238 @@
+# Distillation Algorithm Configuration
+distillation:
+    num_prompts_per_step: 128
+    num_generations_per_prompt: 1
+    max_rollout_turns: 1 # for multi-turn rollouts. Math Environments just have 1 turn (answering the question)
+    max_num_steps: 1000
+    max_num_epochs: 10
+    val_batch_size: 64
+    val_period: 20
+    val_at_start: false
+    max_val_samples: 512
+    topk_logits_k: 64
+    seed: 42
+
+loss_fn:
+    kl_type: "mixed" # forward, reverse, mixed
+    mixed_kl_weight: 0.5 # when kl_type is "mixed", this is the weight of the forward KL
+    zero_outside_topk: false # zero out the teacher logits outside the top k when calculate forward KL loss
+
+checkpointing:
+    enabled: true
+    checkpoint_dir: "checkpoints/distillation-${policy.model_name}"
+    metric_name: "val:accuracy" # one of "val:" or "train:" followed by the metric name
+    higher_is_better: true
+    keep_top_k: 3
+    save_period: 10
+    checkpoint_must_save_by: null
+    model_save_format: "safetensors"
+    save_consolidated: false
+
+policy: &POLICY_BASE
+    model_name: "Qwen/Qwen3-1.7B-Base"
+    tokenizer:
+        name: ${..model_name} ## specify if you'd like to use a tokenizer different from the model's default
+        chat_template_kwargs: null # can be used to pass kwargs to the chat template, e.g., enable_thinking=true
+    train_global_batch_size: 64
+    train_micro_batch_size: 1
+    generation_batch_size: 64
+    logprob_batch_size: 1
+    max_total_sequence_length: 8192
+    precision: "bfloat16"
+    logprob_chunk_size: null
+
+    offload_optimizer_for_logprob: false
+
+    dtensor_cfg: &DTENSOR_BASE
+        enabled: true
+        _v2: true
+        cpu_offload: False
+        sequence_parallel: false
+        activation_checkpointing: true
+        tensor_parallel_size: 2
+        context_parallel_size: 2
+        custom_parallel_plan: null
+
+    dynamic_batching:
+        enabled: true
+        train_mb_tokens: ${mul:${..max_total_sequence_length}, ${..train_micro_batch_size}}
+        logprob_mb_tokens: ${mul:${..max_total_sequence_length}, ${..logprob_batch_size}}
+        sequence_length_round: 64
+
+    sequence_packing:
+        enabled: false
+        train_mb_tokens: ${mul:${..max_total_sequence_length}, ${..train_micro_batch_size}}
+        logprob_mb_tokens: ${mul:${..max_total_sequence_length}, ${..logprob_batch_size}}
+        algorithm: "modified_first_fit_decreasing"
+        sequence_length_round: 64
+
+    max_grad_norm: 1.0
+    # makes the training sequence length divisible by the tensor parallel size
+    # this is useful for sequence parallel training
+    # must be divisible by 2*cp
+    make_sequence_length_divisible_by: ${mul:${mul:${.dtensor_cfg.tensor_parallel_size}, ${.dtensor_cfg.context_parallel_size}}, 2}
+    optimizer:
+        name: "torch.optim.AdamW"
+        kwargs:
+            lr: 2.0e-5
+            weight_decay: 0.01
+            betas: [0.9, 0.999]
+            eps: 1e-8
+            # when using Dtensor, we need to set foreach
+            # and fused to False
+            foreach: False
+            fused: False
+
+    megatron_cfg: &MEGATRON_BASE
+        enabled: false
+        empty_unused_memory_level: 0
+        activation_checkpointing: false
+        converter_type: "Qwen3ForCausalLM"
+        tensor_model_parallel_size: 2
+        expert_tensor_parallel_size: 1
+        expert_model_parallel_size: 1
+        pipeline_model_parallel_size: 2
+        num_layers_in_first_pipeline_stage: null
+        num_layers_in_last_pipeline_stage: null
+        context_parallel_size: 2
+        pipeline_dtype: ${policy.precision}
+        sequence_parallel: false
+        freeze_moe_router: true
+        moe_router_dtype: "fp64"
+        moe_router_load_balancing_type: "none" # "seq_aux_loss" causes logprob error divergence for grpo
+        moe_router_bias_update_rate: 0.0 # by default, disable bias updates for grpo
+        moe_permute_fusion: false
+        #gives ~20% training perf speedup with sequence packing 
+        apply_rope_fusion: True
+        bias_activation_fusion: True
+        defer_fp32_logits: False
+        
+        optimizer:
+            optimizer: "adam"
+            lr: 2.00001e-5
+            min_lr: 2.0e-5
+            weight_decay: 0.01
+            bf16: true
+            fp16: false
+            params_dtype: "float32"
+
+            #adam
+            adam_beta1: 0.9
+            adam_beta2: 0.999
+            adam_eps: 1e-8
+
+            #sgd
+            sgd_momentum: 0.9
+
+            #distributed optimizer
+            use_distributed_optimizer: true
+            use_precision_aware_optimizer: true
+
+            # optimizer cpu offload
+            optimizer_cpu_offload: false
+            optimizer_offload_fraction: 0.0
+
+            clip_grad: ${policy.max_grad_norm}
+
+        scheduler:
+            start_weight_decay: ${policy.megatron_cfg.optimizer.weight_decay}
+            end_weight_decay: ${policy.megatron_cfg.optimizer.weight_decay}
+            weight_decay_incr_style: "constant"
+            lr_decay_style: "constant"
+            lr_decay_iters: 1000
+            lr_warmup_iters: 10
+            lr_warmup_init: 2.0e-6
+
+        distributed_data_parallel_config:
+            grad_reduce_in_fp32: false
+            overlap_grad_reduce: true
+            overlap_param_gather: true
+            use_custom_fsdp: false
+            data_parallel_sharding_strategy: "optim_grads_params"
+
+    scheduler:
+        - name: "torch.optim.lr_scheduler.LinearLR"
+          kwargs:
+              start_factor: 0.1
+              end_factor: 1.0
+              total_iters: 10
+        - name: "torch.optim.lr_scheduler.ConstantLR"
+          kwargs:
+              factor: 1.0
+              total_iters: 10000000000
+        - milestones: [10]
+
+    generation:
+        backend: "vllm"
+        max_new_tokens: ${..max_total_sequence_length} # refer to local policy/teacher config
+        temperature: 1.0
+        top_p: 1.0
+        top_k: null
+        stop_token_ids: null
+        stop_strings: null
+        vllm_cfg:
+            async_engine: false
+            precision: ${...precision}
+            tensor_parallel_size: 1
+            pipeline_parallel_size: 1
+            expert_parallel_size: 1  # When EP > 1, EP must be a multiple of TP since vLLM's EP = DP * TP
+            gpu_memory_utilization: 0.6
+            max_model_len: ${...max_total_sequence_length} # refer to local policy/teacher config
+            enforce_eager: False
+            use_deep_gemm: False
+            num_last_layers_in_bf16: 0
+            num_first_layers_in_bf16: 0
+            distributed_executor_backend: null
+
+        colocated:
+            # true: generation shares training GPUs
+            # false: uses dedicated generation resources
+            enabled: true
+            # only relevant when enabled is false
+            resources:
+                gpus_per_node: null # Decides num gpus to be dedicated to generation when there is one node in the cluster i.e cluster.num_nodes == 1
+                num_nodes: null # Decides number of nodes to be dedicated to generation
+
+
+teacher:
+    <<: *POLICY_BASE
+    model_name: "Qwen/Qwen3-4B"
+    dtensor_cfg:
+        <<: *DTENSOR_BASE
+        context_parallel_size: 2 
+        tensor_parallel_size: 4
+
+data:
+    max_input_seq_length: ${policy.max_total_sequence_length} # upper bound, real truncation occurs at vllm.max_model_len
+    prompt_file: "examples/prompts/cot.txt"
+    system_prompt_file: null
+    dataset_name: "DeepScaler"
+    shuffle: true
+
+env:
+    math:
+        num_workers: 8
+
+logger:
+    log_dir: "logs/distillation"
+    num_val_samples_to_print: 5
+    wandb_enabled: true
+    tensorboard_enabled: true
+    mlflow_enabled: false
+    swanlab_enabled: false
+    monitor_gpus: true
+    wandb:
+        project: "nemo-distillation"
+        name: "distillation-${data.dataset_name}-${teacher.model_name}-${policy.model_name}-${loss_fn.kl_type}-${distillation.topk_logits_k}"
+    tensorboard:
+        log_dir: "tb_logs-distillation-${data.dataset_name}"
+    mlflow:
+        experiment_name: "distillation-dev"
+        run_name: "distillation-math-cl-logger"
+    gpu_monitoring:
+        collection_interval: 10
+        flush_interval: 10
+
+cluster:
+    gpus_per_node: 8
+    num_nodes: 1
diff --git a/examples/configs/distillation_math_megatron.yaml b/examples/configs/distillation_math_megatron.yaml
new file mode 100644
index 0000000000..8f7860b1a1
--- /dev/null
+++ b/examples/configs/distillation_math_megatron.yaml
@@ -0,0 +1,157 @@
+defaults: distillation_math.yaml
+
+checkpointing:
+    checkpoint_dir: "checkpoints/distillation-megatron-${policy.model_name}"
+
+policy: &POLICY_BASE
+    model_name: "Qwen/Qwen3-1.7B-Base"
+    tokenizer:
+        name: ${..model_name} ## specify if you'd like to use a tokenizer different from the model's default
+    train_global_batch_size: 64
+    train_micro_batch_size: 1
+    generation_batch_size: 64
+    logprob_batch_size: 1
+    max_total_sequence_length: 8192
+    precision: "bfloat16"
+    logprob_chunk_size: null
+
+    dtensor_cfg:
+        enabled: false
+
+    dynamic_batching:
+        enabled: false
+        train_mb_tokens: ${mul:${..max_total_sequence_length}, ${..train_micro_batch_size}}
+        logprob_mb_tokens: ${mul:${..max_total_sequence_length}, ${..logprob_batch_size}}
+        sequence_length_round: 64
+
+    sequence_packing:
+        enabled: true
+        train_mb_tokens: ${mul:${..max_total_sequence_length}, ${..train_micro_batch_size}}
+        logprob_mb_tokens: ${mul:${..max_total_sequence_length}, ${..logprob_batch_size}}
+        algorithm: "modified_first_fit_decreasing"
+        sequence_length_round: 64
+
+    max_grad_norm: 1.0
+
+    make_sequence_length_divisible_by: ${mul:${mul:${.megatron_cfg.tensor_model_parallel_size}, ${.megatron_cfg.context_parallel_size}}, 2}
+
+    megatron_cfg: &MEGATRON_BASE
+        enabled: true
+        empty_unused_memory_level: 0
+        activation_checkpointing: false
+        converter_type: "Qwen3ForCausalLM"
+        tensor_model_parallel_size: 2
+        expert_tensor_parallel_size: 1
+        expert_model_parallel_size: 1
+        pipeline_model_parallel_size: 2
+        num_layers_in_first_pipeline_stage: null
+        num_layers_in_last_pipeline_stage: null
+        context_parallel_size: 2
+        pipeline_dtype: ${policy.precision}
+        sequence_parallel: false
+        freeze_moe_router: true
+        moe_router_dtype: "fp64"
+        moe_router_load_balancing_type: "none" # "seq_aux_loss" causes logprob error divergence for grpo
+        moe_router_bias_update_rate: 0.0 # by default, disable bias updates for grpo
+        moe_permute_fusion: false
+        #gives ~20% training perf speedup with sequence packing 
+        apply_rope_fusion: True
+        bias_activation_fusion: True
+        defer_fp32_logits: False
+        
+        optimizer:
+            optimizer: "adam"
+            lr: 2.00001e-5
+            min_lr: 2.0e-5
+            weight_decay: 0.01
+            bf16: true
+            fp16: false
+            params_dtype: "float32"
+
+            #adam
+            adam_beta1: 0.9
+            adam_beta2: 0.999
+            adam_eps: 1e-8
+
+            #sgd
+            sgd_momentum: 0.9
+
+            #distributed optimizer
+            use_distributed_optimizer: true
+            use_precision_aware_optimizer: true
+
+            # optimizer cpu offload
+            optimizer_cpu_offload: false
+            optimizer_offload_fraction: 0.0
+
+            clip_grad: ${policy.max_grad_norm}
+
+        scheduler:
+            start_weight_decay: ${policy.megatron_cfg.optimizer.weight_decay}
+            end_weight_decay: ${policy.megatron_cfg.optimizer.weight_decay}
+            weight_decay_incr_style: "constant"
+            lr_decay_style: "constant"
+            lr_decay_iters: 1000
+            lr_warmup_iters: 10
+            lr_warmup_init: 2.0e-6
+
+        distributed_data_parallel_config:
+            grad_reduce_in_fp32: false
+            overlap_grad_reduce: true
+            overlap_param_gather: true
+            use_custom_fsdp: false
+            data_parallel_sharding_strategy: "optim_grads_params"
+
+    generation:
+        backend: "vllm"
+        max_new_tokens: ${..max_total_sequence_length} # refer to local policy/teacher config
+        temperature: 1.0
+        top_p: 1.0
+        top_k: null
+        stop_token_ids: null
+        stop_strings: null
+        vllm_cfg:
+            async_engine: false
+            precision: ${...precision}
+            tensor_parallel_size: 1
+            pipeline_parallel_size: 1
+            expert_parallel_size: 1  # When EP > 1, EP must be a multiple of TP since vLLM's EP = DP * TP
+            gpu_memory_utilization: 0.6
+            max_model_len: ${...max_total_sequence_length} # refer to local policy/teacher config
+            enforce_eager: False
+            use_deep_gemm: False
+            num_last_layers_in_bf16: 0
+            num_first_layers_in_bf16: 0
+            distributed_executor_backend: null
+
+        colocated:
+            # true: generation shares training GPUs
+            # false: uses dedicated generation resources
+            enabled: true
+            # only relevant when enabled is false
+            resources:
+                gpus_per_node: null # Decides num gpus to be dedicated to generation when there is one node in the cluster i.e cluster.num_nodes == 1
+                num_nodes: null # Decides number of nodes to be dedicated to generation
+
+teacher:
+    <<: *POLICY_BASE
+    model_name: "Qwen/Qwen3-4B"
+    megatron_cfg:
+        <<: *MEGATRON_BASE
+        context_parallel_size: 2
+        tensor_model_parallel_size: 2
+        pipeline_model_parallel_size: 2
+
+logger:
+    wandb_enabled: true
+    wandb:
+        project: "nemo-distillation"
+        name: "distillation-megatron-${data.dataset_name}-${teacher.model_name}-${policy.model_name}-${loss_fn.kl_type}-${distillation.topk_logits_k}"
+    tensorboard:
+        log_dir: "tb_logs-distillation-megatron-${data.dataset_name}-${teacher.model_name}-${policy.model_name}-${loss_fn.kl_type}-${distillation.topk_logits_k}"
+    mlflow:
+        run_name: "distillation-math-megatron-${data.dataset_name}-${teacher.model_name}-${policy.model_name}-${loss_fn.kl_type}-${distillation.topk_logits_k}"
+
+cluster:
+    gpus_per_node: 8
+    num_nodes: 1
diff --git a/examples/configs/dpo.yaml b/examples/configs/dpo.yaml
index 4524338e4f..d142c116f1 100755
--- a/examples/configs/dpo.yaml
+++ b/examples/configs/dpo.yaml
@@ -22,15 +22,17 @@ dpo:
 checkpointing:
   enabled: true
   checkpoint_dir: "results/dpo"
-  metric_name: "val_loss"
+  metric_name: "val:validation-default_loss"
   higher_is_better: false
   keep_top_k: 3
   save_period: 50
+  checkpoint_must_save_by: null
 
 policy:
   model_name: "meta-llama/Llama-3.2-1B-Instruct"
   tokenizer:
     name: "meta-llama/Llama-3.2-1B-Instruct"
+    chat_template_kwargs: null # can be used to pass kwargs to the chat template, e.g., enable_thinking=true
 
   # number of preference samples per batch
   # each preference sample corresponds to a pair of chosen and rejected responses
@@ -43,7 +45,11 @@ policy:
   max_total_sequence_length: 1024
   precision: "bfloat16"
 
+  offload_optimizer_for_logprob: false
+
   dtensor_cfg:
+    env_vars:
+      PYTORCH_CUDA_ALLOC_CONF: ""  # Refers to https://docs.pytorch.org/docs/stable/notes/cuda.html#optimizing-memory-usage-with-pytorch-cuda-alloc-conf
     enabled: true
     cpu_offload: False
     sequence_parallel: false
@@ -51,6 +57,7 @@ policy:
     tensor_parallel_size: 1
     context_parallel_size: 1
     custom_parallel_plan: null
+    clear_cache_every_n_steps: null
 
   dynamic_batching:
     enabled: false
@@ -105,8 +112,12 @@ policy:
     moe_router_dtype: "fp64"
     moe_router_load_balancing_type: "aux_loss"
     moe_router_bias_update_rate: 1e-3
+    moe_permute_fusion: false
     #gives ~20% training perf speedup with sequence packing 
     apply_rope_fusion: True
+    # gives ~25% training perf speedup with sequence packing and apply_rope_fusion
+    bias_activation_fusion: True
+    defer_fp32_logits: False
     
     optimizer:
       optimizer: "adam"
@@ -131,32 +142,67 @@ policy:
 
       clip_grad: ${policy.max_grad_norm}
 
+      # optimizer cpu offload
+      optimizer_cpu_offload: false
+      optimizer_offload_fraction: 0.0
+
     scheduler:
       start_weight_decay: ${policy.megatron_cfg.optimizer.weight_decay}
       end_weight_decay: ${policy.megatron_cfg.optimizer.weight_decay}
       weight_decay_incr_style: "constant"
-      lr_decay_style: "linear"
-      lr_decay_iters: 1000000000
-      lr_warmup_iters: 2
+      lr_decay_style: "constant"
+      lr_warmup_iters: 1
       lr_warmup_init: 0.00000001
 
     distributed_data_parallel_config:
       grad_reduce_in_fp32: false
       overlap_grad_reduce: true
       overlap_param_gather: true
-      average_in_collective: true
       data_parallel_sharding_strategy: "optim_grads_params"
+      use_custom_fsdp: false
     
 data:
-  dataset_name: "HelpSteer3"
   max_input_seq_length: ${policy.max_total_sequence_length}
+  shuffle: true
+  num_workers: 1
+
+  dataset_name: HelpSteer3
+  # You can use custom preference datasets for training and validation. For example:
+  # 1. PreferenceDataset
+  #   data:
+  #     dataset_name: PreferenceDataset
+  #     train_data_path: <PathToTrainingDataset>  # e.g., /path/to/local/dataset.jsonl or hf_org/hf_dataset_name (HuggingFace)
+  #     val_data_paths:
+  #       <NameOfValidationDataset1>: <PathToValidationDataset1>
+  #       ...
+  #     train_split: <TrainSplit>, default is None  # used for HuggingFace datasets
+  #     val_split: <ValSplit>, default is None  # used for HuggingFace datasets
+  # 2. BinaryPreferenceDataset
+  #   data:
+  #     dataset_name: BinaryPreferenceDataset
+  #     train_data_path: <PathToTrainingDataset>  # e.g., /path/to/local/dataset.jsonl or hf_org/hf_dataset_name (HuggingFace)
+  #     val_data_path: <PathToValidationDataset>
+  #     prompt_key: <PromptKey>, default is "prompt"
+  #     chosen_key: <ChosenKey>, default is "chosen"
+  #     rejected_key: <RejectedKey>, default is "rejected"
+  #     train_split: <TrainSplit>, default is None  # used for HuggingFace datasets
+  #     val_split: <ValSplit>, default is None  # used for HuggingFace datasets
+  # See https://github.com/NVIDIA-NeMo/RL/blob/main/docs/guides/dpo.md#datasets for more details.
+
+  # If you are doing checkpointing, `metric_name` should reflect the metric and validation set to be tracked. For example:
+  #   checkpointing:
+  #     metric_name: "val:validation-<NameOfValidationDataset1>_loss"
+  #   ...
+
 logger:
   log_dir: "logs"  # Base directory for all logs
   wandb_enabled: false # Make sure you do a ``wandb login [Your API key]'' before running
+
   tensorboard_enabled: false
   mlflow_enabled: false  # Disable MLflow logging
+  swanlab_enabled: false # Disable SwanLab logging
   monitor_gpus: true  # If true, will monitor GPU usage and log to wandb and/or tensorboard
-  num_val_samples_to_print: 0  # Number of validation samples to pretty print on terminal
+  num_val_samples_to_print: 0 # Number of validation samples to pretty print on terminal
   wandb:
     project: "dpo-dev"
     name: "dpo"
diff --git a/examples/configs/evals/eval.yaml b/examples/configs/evals/eval.yaml
index e642ad59df..6546219b4b 100644
--- a/examples/configs/evals/eval.yaml
+++ b/examples/configs/evals/eval.yaml
@@ -1,9 +1,9 @@
 # Evaluation Configuration
 eval:
-  metric: "pass@k"
+  metric: "pass@k" # pass@k and cons@k are supported
   num_tests_per_prompt: 1 # every prompt will be tested num_tests_per_prompt times and use the average score as the final score
   seed: 42
-  pass_k_value: 1
+  k_value: 1
   save_path: null # Path to save evaluation results and configuration of the evaluation. Set to null to disable saving. Example: "results/eval_output" or "/path/to/evaluation_results"
 
 generation:
@@ -21,6 +21,7 @@ generation:
     precision: "bfloat16"
     tensor_parallel_size: 1
     pipeline_parallel_size: 1
+    expert_parallel_size: 1
     gpu_memory_utilization: 0.9
     max_model_len: 2048
     enforce_eager: False
@@ -37,6 +38,7 @@ generation:
 tokenizer:
   name: ${generation.model_name} ## specify if you'd like to use a tokenizer different from the model's default
   chat_template: "default"
+  chat_template_kwargs: null # can be used to pass kwargs to the chat template, e.g., enable_thinking=true
 
 data:
   max_input_seq_length: ${generation.vllm_cfg.max_model_len} # useless since we directly use prompts in evaluation
diff --git a/examples/configs/evals/gpqa_eval.yaml b/examples/configs/evals/gpqa_eval.yaml
index 463702d3a4..7e7bc9cda5 100644
--- a/examples/configs/evals/gpqa_eval.yaml
+++ b/examples/configs/evals/gpqa_eval.yaml
@@ -12,4 +12,4 @@ data:
 
 env:
   math:
-    verifier_type: "multichoice"
+    verifier_type: "multilingual_multichoice"
diff --git a/examples/configs/evals/local_eval.yaml b/examples/configs/evals/local_eval.yaml
index ad9def2112..bcb10c66ea 100644
--- a/examples/configs/evals/local_eval.yaml
+++ b/examples/configs/evals/local_eval.yaml
@@ -1,14 +1,19 @@
 # Evaluation Configuration from local files.
+# Other settings (e.g., eval metrics, vLLM, cluster, etc.) are inherited from examples/configs/evals/eval.yaml.
 defaults: "eval.yaml"
 
 generation:
   model_name: "Qwen/Qwen2.5-7B-Instruct"
+  vllm_cfg:
+    max_model_len: 2048
 
 data:
   prompt_file: "examples/prompts/cot.txt"
-  dataset_name: "local"
+  system_prompt_file: null
+  # You can also use custom datasets from a local dataset or HuggingFace.
+  # e.g., /path/to/local/dataset or hf_org/hf_dataset_name (HuggingFace)
+  dataset_name: "https://openaipublic.blob.core.windows.net/simple-evals/math_500_test.csv"
   problem_key: "Question"
   solution_key: "Answer"
   split: "train"
-  data_paths: "https:\/\/openaipublic.blob.core.windows.net\/simple-evals\/math_500_test.csv"
   file_format: "csv"
diff --git a/examples/configs/evals/mmlu.yaml b/examples/configs/evals/mmlu.yaml
index 179e54d9fb..3b4968fa8f 100644
--- a/examples/configs/evals/mmlu.yaml
+++ b/examples/configs/evals/mmlu.yaml
@@ -10,4 +10,4 @@ data:
 
 env:
   math:
-    verifier_type: "multichoice"
+    verifier_type: "multilingual_multichoice"
diff --git a/examples/configs/evals/mmlu_zh_cn.yaml b/examples/configs/evals/mmlu_zh_cn.yaml
index ee3cd9bc30..3fe9dedda9 100644
--- a/examples/configs/evals/mmlu_zh_cn.yaml
+++ b/examples/configs/evals/mmlu_zh_cn.yaml
@@ -3,4 +3,3 @@ defaults: "mmlu.yaml"
 
 data:
   dataset_name: "mmlu_ZH-CN"
-
diff --git a/examples/configs/grpo-deepscaler-1.5b-16K.yaml b/examples/configs/grpo-deepscaler-1.5b-16K.yaml
deleted file mode 100644
index 575db2f538..0000000000
--- a/examples/configs/grpo-deepscaler-1.5b-16K.yaml
+++ /dev/null
@@ -1,14 +0,0 @@
-# GRPO Algorithm Configuration
-defaults: "grpo-deepscaler-1.5b-8K.yaml"
-
-loss_fn:
-  reference_policy_kl_penalty: 0.001
-  ratio_clip_max: 0.28
-
-
-policy:
-  max_total_sequence_length: 16384
-
-
-  dynamic_batching:
-    enabled: False
\ No newline at end of file
diff --git a/examples/configs/grpo-deepscaler-1.5b-8K.yaml b/examples/configs/grpo-deepscaler-1.5b-8K.yaml
deleted file mode 100644
index 576494b9b9..0000000000
--- a/examples/configs/grpo-deepscaler-1.5b-8K.yaml
+++ /dev/null
@@ -1,146 +0,0 @@
-# GRPO Algorithm Configuration
-grpo:
-  num_prompts_per_step: 128
-  num_generations_per_prompt: 8
-  max_rollout_turns: 1 # for multi-turn rollouts. Math Environments just have 1 turn (answering the question)
-  max_num_steps: 1000000
-  normalize_rewards: true
-  use_leave_one_out_baseline: true
-  val_period: 10
-  val_at_start: false
-  max_val_samples: 480
-  val_batch_size: 32
-
-loss_fn:
-  reference_policy_kl_penalty: 0.0
-  ratio_clip_min: 0.2
-  ratio_clip_max: 0.2
-  ratio_clip_c: null
-  # (default off) loss formulation improvements (docs/guides/grpo.md#loss)
-  use_on_policy_kl_approximation: false
-  use_importance_sampling_correction: false
-  token_level_loss: true
-
-checkpointing:
-  enabled: true
-  checkpoint_dir: "results/grpo"
-  metric_name: "val_reward"
-  higher_is_better: true
-  keep_top_k: 10
-  save_period: 10
-
-policy:
-  # Qwen/Qwen2.5-1.5B has tied weights which are only supported with dtensor policy with tp size 1 (https://github.com/NVIDIA-NeMo/RL/issues/227)
-  model_name: "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
-  tokenizer:
-    name: ${policy.model_name} ## specify if you'd like to use a tokenizer different from the model's default
-  train_global_batch_size: 64
-  train_micro_batch_size: 1
-  generation_batch_size: 32 # Only used when generating using HF backend
-  logprob_batch_size: 4
-  max_total_sequence_length: 8192
-  precision: "bfloat16"
-
-  dtensor_cfg:
-    enabled: true
-    cpu_offload: False
-    sequence_parallel: false
-    activation_checkpointing: false
-    tensor_parallel_size: 1
-    context_parallel_size: 1
-    custom_parallel_plan: null
-
-  dynamic_batching:
-    enabled: False
-
-  sequence_packing:
-    enabled: False
-
-  # makes the training sequence length divisible by the tensor parallel size
-  # this is useful for sequence parallel training
-  make_sequence_length_divisible_by: ${policy.dtensor_cfg.tensor_parallel_size}
-  max_grad_norm: 1.0
-
-  optimizer:
-    name: "torch.optim.AdamW"
-    kwargs:
-      lr: 2.0e-6
-      weight_decay: 0.01
-      betas: [0.9, 0.999]
-      eps: 1e-8
-      # when using Dtensor, we need to set foreach
-      # and fused to False
-      foreach: False
-      fused: False
-
-  scheduler:
-    - name: "torch.optim.lr_scheduler.LinearLR"
-      kwargs:
-        start_factor: 0.1
-        end_factor: 1.0
-        total_iters: 50
-    - name: "torch.optim.lr_scheduler.ConstantLR"
-      kwargs:
-        factor: 1.0
-        total_iters: 10000000000
-    - milestones: [50]
-
-  generation:
-    backend: "vllm"
-    max_new_tokens: ${policy.max_total_sequence_length}
-    temperature: 1.0
-    top_p: 1.0
-    top_k: null
-    stop_token_ids: null
-    stop_strings: null
-    vllm_cfg:
-      async_engine: false
-      precision: ${policy.precision}
-      tensor_parallel_size: 1
-      pipeline_parallel_size: 1
-      gpu_memory_utilization: 0.6
-      max_model_len: ${policy.max_total_sequence_length}
-      enforce_eager: False
-      # For most cases, use "dummy" to load the initial weights, since they will be overwritten during refit
-      # For Gemma models, we need to use "auto" due to a vllm bug
-      load_format: dummy
-    colocated:
-      # true: generation shares training GPUs
-      # false: uses dedicated generation resources
-      enabled: true
-      # only relevant when enabled is false
-      resources:
-        gpus_per_node: null # Decides num gpus to be dedicated to generation when there is one node in the cluster i.e cluster.num_nodes == 1
-        num_nodes: null # Decides number of nodes to be dedicated to generation
-
-data:
-  max_input_seq_length: ${policy.max_total_sequence_length} # upper bound, real truncation occurs at vllm.max_model_len
-  prompt_file: "examples/prompts/cot.txt"
-  system_prompt_file: null
-  dataset_name: "DeepScaler"
-
-env:
-  math:
-    num_workers: 16
-
-logger:
-  log_dir: "logs"  # Base directory for all logs
-  num_val_samples_to_print: 0 # Number of validation samples to pretty print on terminal
-  wandb_enabled: false
-  tensorboard_enabled: false
-  mlflow_enabled: false
-  monitor_gpus: false  # If true, will monitor GPU usage and log to wandb and/or tensorboard
-  wandb:
-    project: "grpo-dev"
-    name: "grpo-dev-logger"
-  tensorboard: {}
-  mlflow:
-    experiment_name: "grpo-dev"
-    run_name: "grpo-dev-logger"
-  gpu_monitoring:
-    collection_interval: 10  # How often to collect GPU usage metrics (in seconds)
-    flush_interval: 10  # How often to flush GPU usage metrics to the loggers (in seconds)
-
-cluster:
-  gpus_per_node: 8
-  num_nodes: 1
diff --git a/examples/configs/grpo_deepscaler-1.5b-24K.yaml b/examples/configs/grpo_deepscaler-1.5b-24K.yaml
deleted file mode 100644
index dc9db4ceab..0000000000
--- a/examples/configs/grpo_deepscaler-1.5b-24K.yaml
+++ /dev/null
@@ -1,48 +0,0 @@
-# GRPO Algorithm Configuration
-defaults: "grpo-deepscaler-1.5b-8K.yaml"
-
-loss_fn:
-  reference_policy_kl_penalty: 0.0001
-  ratio_clip_min: 0.2
-  ratio_clip_max: 0.28
-
-policy:
-  max_total_sequence_length: 24576
-
-  dtensor_cfg:
-    enabled: true
-    cpu_offload: true
-    sequence_parallel: true
-    activation_checkpointing: true
-    tensor_parallel_size: 4
-    context_parallel_size: 1
-    custom_parallel_plan: null
-
-  dynamic_batching:
-    enabled: False
-
-  sequence_packing:
-    enabled: False
-
-  optimizer:
-    name: "torch.optim.AdamW"
-    kwargs:
-      lr: 5.0e-7
-
-  generation:
-    backend: "vllm"
-    max_new_tokens: ${policy.max_total_sequence_length}
-    temperature: 1.0
-    top_p: 1.0
-    top_k: null
-    stop_token_ids: null
-    stop_strings: null
-    vllm_cfg:
-      precision: ${policy.precision}
-      tensor_parallel_size: 1
-      pipeline_parallel_size: 1
-      gpu_memory_utilization: 0.8
-      max_model_len: ${policy.max_total_sequence_length}
-      # For most cases, use "dummy" to load the initial weights, since they will be overwritten during refit
-      # For Gemma models, we need to use "auto" due to a vllm bug
-      load_format: dummy
diff --git a/examples/configs/grpo_math_1B.yaml b/examples/configs/grpo_math_1B.yaml
index b9be32bdda..50124bb71e 100644
--- a/examples/configs/grpo_math_1B.yaml
+++ b/examples/configs/grpo_math_1B.yaml
@@ -3,45 +3,85 @@ grpo:
   num_prompts_per_step: 32
   num_generations_per_prompt: 16
   max_rollout_turns: 1 # for multi-turn rollouts. Math Environments just have 1 turn (answering the question)
+  max_num_epochs: 1
   max_num_steps: 1000000
   normalize_rewards: true
   use_leave_one_out_baseline: true
   val_period: 10
   val_at_start: false
+  overlong_filtering: false
   max_val_samples: 256
   val_batch_size: 256
+  seed: 42
+  use_dynamic_sampling: false
+  dynamic_sampling_max_gen_batches: 10
+  batch_multiplier: 1
+  reward_shaping:
+    enabled: false
+    overlong_buffer_length: 128
+    overlong_buffer_penalty: 1
+    max_response_length: ${policy.max_total_sequence_length}
+  reward_scaling:
+    enabled: false
+    source_min: 0.0
+    source_max: 1.0
+    target_min: 0.0
+    target_max: 1.0
+
+  async_grpo:
+    enabled: false # Set to true to enable async training mode
+    # Max age (in training steps) for trajectories used in training
+    max_trajectory_age_steps: 1
+    in_flight_weight_updates: false # Set to true to enable in-flight weight updates
+    recompute_kv_cache_after_weight_updates: false # Set to true to recompute kv cache after in-flight-weight-updates
 
 loss_fn:
   reference_policy_kl_penalty: 0.01
+  # Can be set to k1, k2, k3
+  # For more details, see http://joschu.net/blog/kl-approx.html
+  reference_policy_kl_type: "k3"
+  kl_input_clamp_value: 20.0
+  kl_output_clamp_value: 10.0
   ratio_clip_min: 0.2
   ratio_clip_max: 0.2
   ratio_clip_c: null
   # (default off) loss formulation improvements (docs/guides/grpo.md#loss)
   use_on_policy_kl_approximation: false
+  # Async GRPO requires importance sampling correction enabled
+  # Set to true when async_grpo.enabled is true
   use_importance_sampling_correction: false
+  truncated_importance_sampling_ratio: null
+  sequence_level_importance_ratios: false
   token_level_loss: true
 
 checkpointing:
   enabled: true
   checkpoint_dir: "results/grpo"
-  metric_name: "val_reward"
+  metric_name: "val:accuracy" # one of "val:" or "train:" followed by the metric name
   higher_is_better: true
   keep_top_k: 3
   save_period: 10
+  checkpoint_must_save_by: null
+  model_save_format: "safetensors"
+  save_consolidated: false
 
 policy:
-  # Qwen/Qwen2.5-1.5B has tied weights which are only supported with dtensor policy with tp size 1 (https://github.com/NVIDIA-NeMo/RL/issues/227)
   model_name: "Qwen/Qwen2.5-1.5B"
   tokenizer:
     name: ${policy.model_name} ## specify if you'd like to use a tokenizer different from the model's default
+    chat_template_kwargs: null # can be used to pass kwargs to the chat template, e.g., enable_thinking=true
+  hf_config_overrides: {} 
   train_global_batch_size: 512
   train_micro_batch_size: 4
   generation_batch_size: 32 # Only used when generating using HF backend
   logprob_batch_size: 4
   max_total_sequence_length: 512
   precision: "bfloat16"
+  logprob_chunk_size: null
+  offload_optimizer_for_logprob: false # Only useful for non-colocated generation since colocated generation will always offload optimizer to cuda before refit
 
   dtensor_cfg:
+    _v2: true
     enabled: true
     cpu_offload: False
     sequence_parallel: false
@@ -52,6 +92,75 @@ policy:
   
   megatron_cfg:
     enabled: false
+    empty_unused_memory_level: 1  # 1 is the minimum recommendation for RL since we almost always need to offload before beginning generation. Setting to 0 is faster, but you are more likely to run out of GPU memory.
+    activation_checkpointing: false
+    converter_type: "Qwen2ForCausalLM"
+    tensor_model_parallel_size: 1
+    expert_tensor_parallel_size: 1
+    expert_model_parallel_size: 1
+    pipeline_model_parallel_size: 1
+    num_layers_in_first_pipeline_stage: null
+    num_layers_in_last_pipeline_stage: null
+    context_parallel_size: 1
+    pipeline_dtype: ${policy.precision}
+    sequence_parallel: false
+    freeze_moe_router: true
+    moe_router_dtype: "fp64"
+    moe_router_load_balancing_type: "none" # "seq_aux_loss" causes logprob error divergence for grpo
+    moe_router_bias_update_rate: 0.0 # by default, disable bias updates for grpo
+    moe_permute_fusion: false
+    #gives ~20% training perf speedup with sequence packing
+    apply_rope_fusion: True
+    # gives ~25% training perf speedup with sequence packing and apply_rope_fusion
+    bias_activation_fusion: True
+    defer_fp32_logits: False
+
+    optimizer:
+      optimizer: "adam"
+      lr: 5.0e-6
+      min_lr: 5.0e-7
+      weight_decay: 0.01
+      bf16: true
+      fp16: false
+      params_dtype: "float32"
+
+      #adam
+      adam_beta1: 0.9
+      adam_beta2: 0.999
+      adam_eps: 1e-8
+
+      #sgd
+      sgd_momentum: 0.9
+
+      #distributed optimizer
+      use_distributed_optimizer: true
+      use_precision_aware_optimizer: true
+
+      clip_grad: ${policy.max_grad_norm}
+
+      # optimizer cpu offload
+      optimizer_cpu_offload: false
+      optimizer_offload_fraction: 0.0
+
+    scheduler:
+      start_weight_decay: ${policy.megatron_cfg.optimizer.weight_decay}
+      end_weight_decay: ${policy.megatron_cfg.optimizer.weight_decay}
+      weight_decay_incr_style: "constant"
+      lr_decay_style: "constant"
+      lr_decay_iters: 1000
+      lr_warmup_iters: 13
+      lr_warmup_init: 5.0e-7
+
+    distributed_data_parallel_config:
+      grad_reduce_in_fp32: false
+      overlap_grad_reduce: true
+      overlap_param_gather: true
+      use_custom_fsdp: false
+      data_parallel_sharding_strategy: "optim_grads_params"
+
+    fp8_cfg: null
+
+    env_vars: null
 
   # See docs/design-docs/sequence-packing-and-dynamic-batching.md 
   # for more details on dynamic batching and sequence packing.
@@ -110,9 +219,17 @@ policy:
       precision: ${policy.precision}
       tensor_parallel_size: 1
       pipeline_parallel_size: 1
+      expert_parallel_size: 1  # When EP > 1, EP must be a multiple of TP since vLLM's EP = DP * TP
       gpu_memory_utilization: 0.6
       max_model_len: ${policy.max_total_sequence_length}
+      # when enforce_eager is False, it is optional to set ++policy.generation.vllm_kwargs.compilation_config.use_inductor=False for better accuracy,
+      # with the flag, vllm will use the custom CUDA kernels instead of the Triton kernels generated by torch.compile
+      # for more details, see convergence issue https://github.com/NVIDIA-NeMo/RL/issues/998
       enforce_eager: False
+      use_deep_gemm: False
+      num_last_layers_in_bf16: 0
+      num_first_layers_in_bf16: 0
+    vllm_kwargs: {}
     colocated:
       # true: generation shares training GPUs
       # false: uses dedicated generation resources
@@ -126,11 +243,29 @@ data:
   max_input_seq_length: ${policy.max_total_sequence_length} # upper bound, real truncation occurs at vllm.max_model_len
   prompt_file: "examples/prompts/cot.txt"
   system_prompt_file: null
+  shuffle: true
+  num_workers: 1
+
   dataset_name: "OpenMathInstruct-2"
+  # You can use custom response datasets for training and validation. For example:
+  #   data:
+  #     dataset_name: ResponseDataset
+  #     train_data_path: <PathToTrainingDataset>  # e.g., /path/to/local/dataset.jsonl or hf_org/hf_dataset_name (HuggingFace)
+  #     val_data_path: <PathToValidationDataset>
+  #     input_key: <QuestionKey>, default is "input"
+  #     output_key: <AnswerKey>, default is "output"
+  #     train_split: <TrainSplit>, default is None  # used for HuggingFace datasets
+  #     val_split: <ValSplit>, default is None  # used for HuggingFace datasets
+  # See https://github.com/NVIDIA-NeMo/RL/blob/main/docs/guides/grpo.md#datasets for more details.
 
 env:
   math:
     num_workers: 8
+    math_verify_impl: "hf_math_verify"
+  ## unused in this config but needed for DAPO recipe
+  dapo:
+    num_workers: 8
+    math_verify_impl: "dapo_math_verify"
 
 logger:
   log_dir: "logs"  # Base directory for all logs
@@ -138,6 +273,7 @@ logger:
   wandb_enabled: false
   tensorboard_enabled: false
   mlflow_enabled: false  # Disable MLflow logging
+  swanlab_enabled: false # Disable SwanLab logging
   monitor_gpus: true  # If true, will monitor GPU usage and log to wandb and/or tensorboard
   wandb:
     project: "grpo-dev"
diff --git a/examples/configs/grpo_math_1B_megatron.yaml b/examples/configs/grpo_math_1B_megatron.yaml
index cf6ba44d75..58ecbc7787 100644
--- a/examples/configs/grpo_math_1B_megatron.yaml
+++ b/examples/configs/grpo_math_1B_megatron.yaml
@@ -4,6 +4,7 @@ defaults: "grpo_math_1B.yaml"
 grpo:
   num_prompts_per_step: 32
   num_generations_per_prompt: 16
+  max_num_epochs: 1
   max_num_steps: 1000000
   normalize_rewards: true
   use_leave_one_out_baseline: true
@@ -11,6 +12,9 @@ grpo:
   val_at_start: false
   max_val_samples: 256
   val_batch_size: 256
+  async_grpo:
+    enabled: false
+    max_trajectory_age_steps: 1
 
 loss_fn:
   reference_policy_kl_penalty: 0.01
@@ -25,15 +29,17 @@ loss_fn:
 checkpointing:
   enabled: false
   checkpoint_dir: "results/grpo_megatron"
-  metric_name: "val_reward"
+  metric_name: "val:accuracy" # one of "val:" or "train:" followed by the metric name
   higher_is_better: true
   keep_top_k: 3
   save_period: 10
+  checkpoint_must_save_by: null
 
 policy:
   model_name: "Qwen/Qwen2.5-1.5B"
   tokenizer:
     name: ${policy.model_name} ## specify if you'd like to use a tokenizer different from the model's default
+    chat_template_kwargs: null # can be used to pass kwargs to the chat template, e.g., enable_thinking=true
   train_global_batch_size: 512
   train_micro_batch_size: 4
   generation_batch_size: 64 # Only used when generating using megatron backend
@@ -71,7 +77,7 @@ policy:
 
   megatron_cfg:
     enabled: true
-    empty_unused_memory_level: 0
+    empty_unused_memory_level: 1  # 1 is the minimum recommendation for RL since we almost always need to offload before beginning generation. Setting to 0 is faster, but you are more likely to run out of GPU memory.
     activation_checkpointing: false
     converter_type: "Qwen2ForCausalLM"
     tensor_model_parallel_size: 1
@@ -87,6 +93,7 @@ policy:
     moe_router_dtype: "fp64"
     moe_router_load_balancing_type: "none" # "seq_aux_loss" causes logprob error divergence for grpo
     moe_router_bias_update_rate: 0.0 # by default, disable bias updates for grpo
+    moe_permute_fusion: false
     #gives ~20% training perf speedup with sequence packing 
     apply_rope_fusion: True
     
@@ -118,7 +125,7 @@ policy:
       end_weight_decay: ${policy.megatron_cfg.optimizer.weight_decay}
       weight_decay_incr_style: "constant"
       lr_decay_style: "constant"
-      lr_decay_iters: null
+      lr_decay_iters: 1000
       lr_warmup_iters: 13
       lr_warmup_init: 5.0e-7
 
@@ -126,7 +133,6 @@ policy:
       grad_reduce_in_fp32: false
       overlap_grad_reduce: true
       overlap_param_gather: true
-      average_in_collective: true
       use_custom_fsdp: false
       data_parallel_sharding_strategy: "optim_grads_params"
 
@@ -146,10 +152,12 @@ data:
   prompt_file: "examples/prompts/cot.txt"
   system_prompt_file: null
   dataset_name: "OpenMathInstruct-2"
+  shuffle: true
 
 env:
   math:
     num_workers: 8
+    math_verify_impl: "hf_math_verify"
 
 logger:
   log_dir: "logs"  # Base directory for all logs
@@ -157,6 +165,7 @@ logger:
   wandb_enabled: false
   tensorboard_enabled: false
   mlflow_enabled: false  # Disable MLflow logging
+  swanlab_enabled: false # Disable SwanLab logging
   monitor_gpus: false  # If true, will monitor GPU usage and log to wandb and/or tensorboard
   wandb:
     project: "grpo-dev"
diff --git a/examples/configs/grpo_math_70B_megatron.yaml b/examples/configs/grpo_math_70B_megatron.yaml
index 5d55df99e5..f29f24d111 100644
--- a/examples/configs/grpo_math_70B_megatron.yaml
+++ b/examples/configs/grpo_math_70B_megatron.yaml
@@ -46,7 +46,7 @@ policy:
       end_weight_decay: ${policy.megatron_cfg.optimizer.weight_decay}
       weight_decay_incr_style: "constant"
       lr_decay_style: "constant"
-      lr_decay_iters: null
+      lr_decay_iters: 1000
       lr_warmup_iters: 13
       lr_warmup_init: 3.0e-8
 
@@ -59,7 +59,7 @@ policy:
     stop_token_ids: null
     stop_strings: null
     vllm_cfg:
-      tensor_parallel_size: 4
+      tensor_parallel_size: 8
       gpu_memory_utilization: 0.6
       max_model_len: ${policy.max_total_sequence_length}
 
diff --git a/examples/configs/grpo_math_70B_megatron_fp8.yaml b/examples/configs/grpo_math_70B_megatron_fp8.yaml
new file mode 100644
index 0000000000..df239cd8ff
--- /dev/null
+++ b/examples/configs/grpo_math_70B_megatron_fp8.yaml
@@ -0,0 +1,22 @@
+# GRPO Algorithm Configuration
+defaults: "grpo_math_70B_megatron.yaml"
+
+loss_fn:
+  use_importance_sampling_correction: true
+
+policy:
+  generation:
+    vllm_cfg:
+      precision: "fp8"
+      use_deep_gemm: true
+  megatron_cfg:
+    pipeline_model_parallel_size: 8
+    fp8_cfg:
+      enabled: true
+      fp8: "e4m3"
+      fp8_recipe: "blockwise"
+      fp8_param: false
+    optimizer:
+      use_precision_aware_optimizer: false
+    env_vars:
+      NVTE_FP8_BLOCK_SCALING_FP32_SCALES: "1"
\ No newline at end of file
diff --git a/examples/configs/grpo_math_8B.yaml b/examples/configs/grpo_math_8B.yaml
index ce46263375..51331ec509 100644
--- a/examples/configs/grpo_math_8B.yaml
+++ b/examples/configs/grpo_math_8B.yaml
@@ -4,6 +4,9 @@ defaults: "grpo_math_1B.yaml"
 grpo:
   num_prompts_per_step: 64
   num_generations_per_prompt: 32
+  async_grpo:
+    enabled: false
+    max_trajectory_age_steps: 1
 
 policy:
   model_name: "meta-llama/Llama-3.1-8B-Instruct"
diff --git a/examples/configs/grpo_math_8B_megatron.yaml b/examples/configs/grpo_math_8B_megatron.yaml
index 3f68344417..977ab394b5 100644
--- a/examples/configs/grpo_math_8B_megatron.yaml
+++ b/examples/configs/grpo_math_8B_megatron.yaml
@@ -8,6 +8,7 @@ grpo:
 checkpointing:
   enabled: false
   checkpoint_dir: "results/grpo_8b_megatron"
+  checkpoint_must_save_by: null
 
 policy:
   model_name: "meta-llama/Llama-3.1-8B-Instruct"
@@ -29,7 +30,7 @@ policy:
 
   megatron_cfg:
     enabled: true
-    empty_unused_memory_level: 0
+    empty_unused_memory_level: 1  # 1 is the minimum recommendation for RL since we almost always need to offload before beginning generation. Setting to 0 is faster, but you are more likely to run out of GPU memory.
     converter_type: "LlamaForCausalLM"
     tensor_model_parallel_size: 1
     # On H100, can run with pp=1 for better performance with expandable segments (which is enabled by default)
@@ -51,7 +52,7 @@ policy:
       end_weight_decay: ${policy.megatron_cfg.optimizer.weight_decay}
       weight_decay_incr_style: "constant"
       lr_decay_style: "constant"
-      lr_decay_iters: null
+      lr_decay_iters: 1000
       lr_warmup_iters: 13
       lr_warmup_init: 3.0e-8
 
diff --git a/examples/configs/grpo_math_8B_megatron_fp8.yaml b/examples/configs/grpo_math_8B_megatron_fp8.yaml
new file mode 100644
index 0000000000..ba6ee6e5c8
--- /dev/null
+++ b/examples/configs/grpo_math_8B_megatron_fp8.yaml
@@ -0,0 +1,22 @@
+# GRPO Algorithm Configuration
+defaults: "grpo_math_8B_megatron.yaml"
+
+loss_fn:
+  use_importance_sampling_correction: true
+
+policy:
+  generation:
+    vllm_cfg:
+      precision: "fp8"
+      use_deep_gemm: true
+      gpu_memory_utilization: 0.5
+  megatron_cfg:
+    fp8_cfg:
+      enabled: true
+      fp8: "e4m3"
+      fp8_recipe: "blockwise"
+      fp8_param: false
+    optimizer:
+      use_precision_aware_optimizer: false
+    env_vars:
+      NVTE_FP8_BLOCK_SCALING_FP32_SCALES: "1"
\ No newline at end of file
diff --git a/examples/configs/grpo_math_qwen30ba3b_megatron.yaml b/examples/configs/grpo_math_qwen30ba3b_megatron.yaml
index e7cae09858..37616e32b0 100644
--- a/examples/configs/grpo_math_qwen30ba3b_megatron.yaml
+++ b/examples/configs/grpo_math_qwen30ba3b_megatron.yaml
@@ -52,10 +52,10 @@ policy:
       end_weight_decay: ${policy.megatron_cfg.optimizer.weight_decay}
       weight_decay_incr_style: "constant"
       lr_decay_style: "constant"
-      lr_decay_iters: null
+      lr_decay_iters: 1000
       lr_warmup_iters: 13
       lr_warmup_init: 3.0e-8
-    
+
     env_vars:
       PYTORCH_CUDA_ALLOC_CONF: "expandable_segments:False"
 
diff --git a/examples/configs/grpo_rm_1B.yaml b/examples/configs/grpo_rm_1B.yaml
new file mode 100644
index 0000000000..cffe3d83a7
--- /dev/null
+++ b/examples/configs/grpo_rm_1B.yaml
@@ -0,0 +1,39 @@
+# GRPO Algorithm Configuration
+defaults: "grpo_math_1B.yaml"
+
+env:
+  reward_model:  
+    enabled: true
+    model_name: "Skywork/Skywork-Reward-V2-Qwen3-0.6B"
+    tokenizer:
+      name: ${env.reward_model.model_name}
+    precision: "bfloat16"
+    batch_size: ${policy.train_micro_batch_size}
+    checkpoint_path: null
+    max_model_len: 2048
+    resources:
+      gpus_per_node: 1
+      num_nodes: 1
+    # TODO: Mcore path support https://github.com/NVIDIA-NeMo/RL/issues/1154
+    dtensor_cfg:
+      _v2: true
+      enabled: true
+      sequence_parallel: false
+      tensor_parallel_size: 1
+      context_parallel_size: 1
+      custom_parallel_plan: null
+      cpu_offload: false
+      activation_checkpointing: false
+    reward_model_cfg:
+      enabled: true
+      reward_model_type: "bradley_terry"
+    dynamic_batching:
+      enabled: false
+    sequence_packing:
+      enabled: false
+    max_grad_norm: null
+    
+
+cluster:
+  gpus_per_node: 2
+  num_nodes: 1
diff --git a/examples/configs/grpo_sliding_puzzle.yaml b/examples/configs/grpo_sliding_puzzle.yaml
index aeb6b48da4..842318b133 100644
--- a/examples/configs/grpo_sliding_puzzle.yaml
+++ b/examples/configs/grpo_sliding_puzzle.yaml
@@ -6,18 +6,26 @@ grpo:
   num_generations_per_prompt: 16
   max_rollout_turns: 50 # Maximum turns allowed per rollout
   max_num_steps: 10000
+  max_num_epochs: 1
 
 checkpointing:
   enabled: true
   checkpoint_dir: "results/grpo-sliding-puzzle"
-  metric_name: "val_reward"
+  metric_name: "val:accuracy" # one of "val:" or "train:" followed by the metric name
   higher_is_better: true
   keep_top_k: 3
   save_period: 10
+  checkpoint_must_save_by: null
 
 policy:
   model_name: "Qwen/Qwen2.5-1.5B-Instruct"
-  max_total_sequence_length: 3072
+  max_total_sequence_length: 1024
+
+  dtensor_cfg:
+    enabled: true
+    cpu_offload: true
+    activation_checkpointing: true
+    sequence_parallel: true
 
   generation:
     backend: "vllm"
@@ -33,19 +41,21 @@ policy:
       async_engine: false
       tensor_parallel_size: 1
       pipeline_parallel_size: 1
+      expert_parallel_size: 1
       gpu_memory_utilization: 0.6
       max_model_len: ${policy.max_total_sequence_length}
 
 data:
   add_system_prompt: false
+  shuffle: false  # disable dataloader shuffle, shuffle is handled within the dataset
 
 env:
   sliding_puzzle_game:
     cfg:
       game_config:
         size: 5 # Size of the puzzle (e.g., 2 for 2x2, 3 for 3x3)
-        shuffle_moves: 15 # Number of random moves to shuffle the solved state
-      max_moves: 50 # Maximum moves allowed per episode
+        shuffle_moves: 10 # Number of random moves to shuffle the solved state
+      max_moves: 30 # Maximum moves allowed per episode
 
 logger:
   log_dir: "logs"  # Base directory for all logs
@@ -53,6 +63,7 @@ logger:
   wandb_enabled: false
   tensorboard_enabled: false
   mlflow_enabled: false
+  swanlab_enabled: false # Disable SwanLab logging
   monitor_gpus: true  # If true, will monitor GPU usage and log to wandb and/or tensorboard
   wandb:
     project: "grpo-dev"
@@ -63,4 +74,4 @@ logger:
     run_name: "grpo-dev-sliding_puzzle"
   gpu_monitoring:
     collection_interval: 10  # How often to collect GPU usage metrics (in seconds)
-    flush_interval: 10  # How often to flush GPU usage metrics to the loggers (in seconds)
+    flush_interval: 10  # How often to flush GPU usage metrics to the loggers (in seconds)
\ No newline at end of file
diff --git a/examples/configs/recipes/llm/dapo-qwen2.5-7b.yaml b/examples/configs/recipes/llm/dapo-qwen2.5-7b.yaml
new file mode 100644
index 0000000000..f3ddd4675c
--- /dev/null
+++ b/examples/configs/recipes/llm/dapo-qwen2.5-7b.yaml
@@ -0,0 +1,104 @@
+defaults: ../../grpo_math_1B.yaml
+grpo:
+  num_prompts_per_step: 512
+  num_generations_per_prompt: 16
+  batch_multiplier: 3 # Multiplier for dataloader batch size calculation (batch_multiplier × num_prompts_per_step). Following DAPO dynamic sampling, the actual training batch size equals num_prompts_per_step × num_generations_per_prompt.
+  max_rollout_turns: 1 # for multi-turn rollouts. Math Environments just have 1 turn (answering the question)
+  max_num_steps: 10000
+  use_leave_one_out_baseline: false
+  val_period: 20
+  max_val_samples: 960
+  val_batch_size: 960
+  use_dynamic_sampling: true
+  dynamic_sampling_max_gen_batches: 10
+  reward_scaling:
+    enabled: true
+    source_min: 0.0
+    source_max: 1.0
+    target_min: -1.0
+    target_max: 1.0
+  reward_shaping:
+    enabled: true
+    overlong_buffer_length: 2048
+    max_response_length: 14336
+loss_fn:
+  reference_policy_kl_penalty: 0.0
+  ratio_clip_max: 0.28
+  ratio_clip_c: 10
+checkpointing:
+  checkpoint_dir: results/dapo-qwen2.5-7b
+  keep_top_k: 5
+  save_period: 5
+  model_save_format: "dcp"
+policy:
+  model_name: Qwen/Qwen2.5-Math-7B
+  hf_config_overrides:
+    max_position_embeddings: 16384
+  train_micro_batch_size: 1
+  logprob_batch_size: 1
+  max_total_sequence_length: 16384
+  dtensor_cfg:
+    _v2: false
+    context_parallel_size: 4
+  megatron_cfg:
+    empty_unused_memory_level: 1
+    tensor_model_parallel_size: 4
+    pipeline_model_parallel_size: 2
+    context_parallel_size: 2
+    sequence_parallel: true
+    optimizer:
+      lr: 1.0e-06
+      min_lr: 1.0e-06
+      weight_decay: 0.1
+    scheduler:
+      lr_decay_iters: null
+      lr_warmup_iters: 10
+      lr_warmup_init: 1.0e-07
+  sequence_packing:
+    enabled: false
+  make_sequence_length_divisible_by: ${mul:${policy.dtensor_cfg.tensor_parallel_size},
+    ${mul:2, ${policy.dtensor_cfg.context_parallel_size}}}
+  optimizer:
+    kwargs:
+      lr: 1.0e-06
+      weight_decay: 0.1
+  scheduler:
+  - name: torch.optim.lr_scheduler.LinearLR
+    kwargs:
+      start_factor: 1.0e-08
+      end_factor: 1.0
+      total_iters: 10
+  - name: torch.optim.lr_scheduler.ConstantLR
+    kwargs:
+      factor: 1.0
+      total_iters: 10000000000
+  - milestones:
+    - 10
+  generation:
+    max_new_tokens: 16384
+    vllm_cfg:
+      tensor_parallel_size: 2
+      gpu_memory_utilization: 0.7
+      enforce_eager: true
+data:
+  max_input_seq_length: 2048
+  prompt_file: null
+  dataset_name: DAPOMath17K
+env:
+  dapo:
+    num_workers: 16
+  math:
+    num_workers: 16
+    math_verify_impl: "dapo_math_verify"
+
+logger:
+  monitor_gpus: false
+  wandb:
+    project: dapo-dev
+    name: dapo-dev-logger
+  mlflow:
+    experiment_name: dapo-dev
+    run_name: dapo-dev-logger
+cluster:
+  gpus_per_node: 8
+  num_nodes: 16
diff --git a/examples/configs/recipes/llm/distillation-qwen3-32b-to-1.7b-base-1n8g-fsdp2tp1.v1.yaml b/examples/configs/recipes/llm/distillation-qwen3-32b-to-1.7b-base-1n8g-fsdp2tp1.v1.yaml
new file mode 100644
index 0000000000..93a5b69ccd
--- /dev/null
+++ b/examples/configs/recipes/llm/distillation-qwen3-32b-to-1.7b-base-1n8g-fsdp2tp1.v1.yaml
@@ -0,0 +1,58 @@
+defaults: ../../distillation_math.yaml
+distillation:
+  num_prompts_per_step: 64
+  max_num_steps: 20
+  val_batch_size: 32
+  val_period: 10
+  max_val_samples: 256
+loss_fn:
+  kl_type: reverse
+checkpointing:
+  checkpoint_dir: checkpoints/distillation-qwen3-32b-to-1.7b-base
+policy:
+  train_global_batch_size: 32
+  generation_batch_size: 32
+  dtensor_cfg:
+    tensor_parallel_size: 1
+    context_parallel_size: 1
+  dynamic_batching:
+    enabled: false
+  make_sequence_length_divisible_by: 1
+  scheduler:
+  - name: torch.optim.lr_scheduler.LinearLR
+    kwargs:
+      start_factor: 0.1
+      end_factor: 1.0
+      total_iters: 20
+  - name: torch.optim.lr_scheduler.ConstantLR
+    kwargs:
+      factor: 1.0
+      total_iters: 10000000000
+  - milestones:
+    - 20
+teacher:
+  model_name: Qwen/Qwen3-32B
+  train_global_batch_size: 32
+  generation_batch_size: 32
+  dtensor_cfg:
+    context_parallel_size: 1
+  dynamic_batching:
+    enabled: false
+  make_sequence_length_divisible_by: 1
+  scheduler:
+  - name: torch.optim.lr_scheduler.LinearLR
+    kwargs:
+      start_factor: 0.1
+      end_factor: 1.0
+      total_iters: 20
+  - name: torch.optim.lr_scheduler.ConstantLR
+    kwargs:
+      factor: 1.0
+      total_iters: 10000000000
+  - milestones:
+    - 20
+logger:
+  log_dir: logs/distillation-qwen3-32b-to-1.7b-base
+  wandb:
+    project: nemo-rl
+    name: distillation-qwen3-32b-to-1.7b-base
diff --git a/examples/configs/recipes/llm/distillation-qwen3-32b-to-1.7b-base-1n8g-megatron-tp2pp2cp2-pack.yaml b/examples/configs/recipes/llm/distillation-qwen3-32b-to-1.7b-base-1n8g-megatron-tp2pp2cp2-pack.yaml
new file mode 100644
index 0000000000..6fda3fe24e
--- /dev/null
+++ b/examples/configs/recipes/llm/distillation-qwen3-32b-to-1.7b-base-1n8g-megatron-tp2pp2cp2-pack.yaml
@@ -0,0 +1,41 @@
+defaults: ../../distillation_math.yaml
+distillation:
+  num_prompts_per_step: 32
+  max_num_steps: 20
+  val_batch_size: 32
+  val_period: 10
+  max_val_samples: 256
+loss_fn:
+  kl_type: reverse
+checkpointing:
+  checkpoint_dir: checkpoints/distillation-qwen3-32b-to-1.7b-base-megatron-tp2pp2cp2-pack
+policy:
+  train_global_batch_size: 32
+  generation_batch_size: 32
+  dtensor_cfg:
+    enabled: false
+  dynamic_batching:
+    enabled: false
+  sequence_packing:
+    enabled: true
+  make_sequence_length_divisible_by: ${mul:${mul:${.megatron_cfg.tensor_model_parallel_size},
+    ${.megatron_cfg.context_parallel_size}}, 2}
+  megatron_cfg:
+    enabled: true
+teacher:
+  model_name: Qwen/Qwen3-32B
+  dtensor_cfg:
+    enabled: false
+  dynamic_batching:
+    enabled: false
+  sequence_packing:
+    enabled: true
+  megatron_cfg:
+    enabled: true
+    tensor_model_parallel_size: 4
+    context_parallel_size: 1
+logger:
+  log_dir: logs/distillation-qwen3-32b-to-1.7b-base-megatron-tp2pp2cp2-pack
+  wandb:
+    project: nemo-rl
+    name: distillation-qwen3-32b-to-1.7b-base-megatron-tp2pp2cp2-pack
diff --git a/examples/configs/recipes/llm/distillation-qwen3-32b-to-4b-base-1n8g-fsdp2tp2-dynamicbatch.v1.yaml b/examples/configs/recipes/llm/distillation-qwen3-32b-to-4b-base-1n8g-fsdp2tp2-dynamicbatch.v1.yaml
new file mode 100644
index 0000000000..f9551a954e
--- /dev/null
+++ b/examples/configs/recipes/llm/distillation-qwen3-32b-to-4b-base-1n8g-fsdp2tp2-dynamicbatch.v1.yaml
@@ -0,0 +1,27 @@
+defaults: ../../distillation_math.yaml
+distillation:
+  num_prompts_per_step: 64
+  max_num_steps: 20
+  val_batch_size: 256
+  val_period: 10
+  max_val_samples: 256
+loss_fn:
+  kl_type: reverse
+checkpointing:
+  checkpoint_dir: checkpoints/distillation-qwen3-32b-to-4b-base-dynamicbatch
+policy:
+  model_name: Qwen/Qwen3-4B-Base
+  dtensor_cfg:
+    context_parallel_size: 1
+  make_sequence_length_divisible_by: 2
+teacher:
+  model_name: Qwen/Qwen3-32B
+  dtensor_cfg:
+    tensor_parallel_size: 8
+    context_parallel_size: 1
+  make_sequence_length_divisible_by: 2
+logger:
+  log_dir: logs/distillation-qwen3-32b-to-4b-base-dynamicbatch
+  wandb:
+    project: nemo-rl
+    name: distillation-qwen3-32b-to-4b-base-dynamicbatch
diff --git a/examples/configs/recipes/llm/distillation-qwen3-32b-to-4b-base-2n8g-fsdp2tp2-long.v1.yaml b/examples/configs/recipes/llm/distillation-qwen3-32b-to-4b-base-2n8g-fsdp2tp2-long.v1.yaml
new file mode 100644
index 0000000000..6496b11c2c
--- /dev/null
+++ b/examples/configs/recipes/llm/distillation-qwen3-32b-to-4b-base-2n8g-fsdp2tp2-long.v1.yaml
@@ -0,0 +1,26 @@
+defaults: ../../distillation_math.yaml
+distillation:
+  num_prompts_per_step: 64
+  max_num_steps: 500
+  val_batch_size: 512
+  val_period: 50
+loss_fn:
+  kl_type: reverse
+checkpointing:
+  checkpoint_dir: checkpoints/distillation-qwen3-32b-to-4b-base-long
+policy:
+  model_name: Qwen/Qwen3-4B-Base
+  max_total_sequence_length: 20480
+  generation:
+    vllm_cfg:
+      tensor_parallel_size: 2
+teacher:
+  model_name: Qwen/Qwen3-32B
+  max_total_sequence_length: 20480
+logger:
+  log_dir: logs/distillation-qwen3-32b-to-4b-base-long
+  wandb:
+    project: nemo-rl
+    name: distillation-qwen3-32b-to-4b-base-long
+cluster:
+  num_nodes: 2
diff --git a/examples/configs/recipes/llm/distillation-qwen3-32b-to-4b-base-2n8g-fsdp2tp2-seqpack.v1.yaml b/examples/configs/recipes/llm/distillation-qwen3-32b-to-4b-base-2n8g-fsdp2tp2-seqpack.v1.yaml
new file mode 100644
index 0000000000..9d7b8746dc
--- /dev/null
+++ b/examples/configs/recipes/llm/distillation-qwen3-32b-to-4b-base-2n8g-fsdp2tp2-seqpack.v1.yaml
@@ -0,0 +1,37 @@
+defaults: ../../distillation_math.yaml
+distillation:
+  num_prompts_per_step: 64
+  max_num_steps: 20
+  val_batch_size: 256
+  val_period: 10
+  max_val_samples: 256
+loss_fn:
+  kl_type: reverse
+checkpointing:
+  checkpoint_dir: checkpoints/distillation-qwen3-32b-to-4b-base-seqpack
+policy:
+  model_name: Qwen/Qwen3-4B-Base
+  dtensor_cfg:
+    context_parallel_size: 1
+  dynamic_batching:
+    enabled: false
+  sequence_packing:
+    enabled: true
+  make_sequence_length_divisible_by: 2
+teacher:
+  model_name: Qwen/Qwen3-32B
+  dtensor_cfg:
+    tensor_parallel_size: 8
+    context_parallel_size: 1
+  dynamic_batching:
+    enabled: false
+  sequence_packing:
+    enabled: true
+  make_sequence_length_divisible_by: 2
+logger:
+  log_dir: logs/distillation-qwen3-32b-to-4b-base-seqpack
+  wandb:
+    project: nemo-rl
+    name: distillation-qwen3-32b-to-4b-base-seqpack
+cluster:
+  num_nodes: 2
diff --git a/examples/configs/recipes/llm/distillation-qwen3-32b-to-4b-base-2n8g-fsdp2tp8-noncolocated.v1.yaml b/examples/configs/recipes/llm/distillation-qwen3-32b-to-4b-base-2n8g-fsdp2tp8-noncolocated.v1.yaml
new file mode 100644
index 0000000000..8f1d235d69
--- /dev/null
+++ b/examples/configs/recipes/llm/distillation-qwen3-32b-to-4b-base-2n8g-fsdp2tp8-noncolocated.v1.yaml
@@ -0,0 +1,43 @@
+defaults: ../../distillation_math.yaml
+distillation:
+  num_prompts_per_step: 64
+  max_num_steps: 20
+  val_batch_size: 256
+  val_period: 10
+  max_val_samples: 256
+loss_fn:
+  kl_type: reverse
+checkpointing:
+  checkpoint_dir: checkpoints/distillation-qwen3-32b-to-4b-base-noncolocated
+  save_period: 50
+policy:
+  model_name: Qwen/Qwen3-4B-Base
+  dtensor_cfg:
+    tensor_parallel_size: 8
+    context_parallel_size: 1
+  make_sequence_length_divisible_by: 2
+  generation:
+    colocated:
+      enabled: false
+      resources:
+        gpus_per_node: 8
+        num_nodes: 1
+teacher:
+  model_name: Qwen/Qwen3-32B
+  dtensor_cfg:
+    tensor_parallel_size: 8
+    context_parallel_size: 1
+  make_sequence_length_divisible_by: 2
+  generation:
+    colocated:
+      enabled: false
+      resources:
+        gpus_per_node: 8
+        num_nodes: 1
+logger:
+  log_dir: logs/distillation-qwen3-32b-to-4b-base-noncolocated
+  wandb:
+    project: nemo-rl
+    name: distillation-qwen3-32b-to-4b-base-noncolocated
+cluster:
+  num_nodes: 2
diff --git a/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp1.v2.yaml b/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp1.v2.yaml
deleted file mode 100644
index e7eaef706a..0000000000
--- a/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp1.v2.yaml
+++ /dev/null
@@ -1,94 +0,0 @@
-dpo:
-  max_num_epochs: 1
-  max_num_steps: 150
-  val_period: 50
-  val_batches: 16
-  val_global_batch_size: 32
-  val_micro_batch_size: 1
-  val_at_start: false
-  seed: 42
-
-  reference_policy_kl_penalty: 0.05
-  preference_average_log_probs: False
-  sft_average_log_probs: ${.preference_average_log_probs}
-  preference_loss_weight: 1
-  sft_loss_weight: 0.01
-
-checkpointing:
-  enabled: true
-  checkpoint_dir: "results/dpo"
-  metric_name: "val_loss"
-  higher_is_better: false
-  keep_top_k: 3
-  save_period: 10000
-
-policy:
-  model_name: "meta-llama/Llama-3.1-8B-Instruct"
-  tokenizer:
-    name: ${policy.model_name}
-  train_global_batch_size: 256
-  train_micro_batch_size: 1
-  max_total_sequence_length: 2048
-  precision: "bfloat16"
-  dtensor_cfg:
-    enabled: true
-    cpu_offload: False
-    sequence_parallel: false
-    activation_checkpointing: false
-    tensor_parallel_size: 1
-    context_parallel_size: 1
-    custom_parallel_plan: null
-
-  dynamic_batching:
-    enabled: false
-
-  sequence_packing:
-    enabled: false
-
-  make_sequence_length_divisible_by: ${policy.dtensor_cfg.tensor_parallel_size}
-  max_grad_norm: 1.0
-
-  optimizer:
-    name: "torch.optim.AdamW"
-    kwargs:
-      lr: 5.0e-6
-      weight_decay: 0.1
-      betas: [0.9, 0.98]
-      eps: 1e-8
-      foreach: False
-      fused: False
-
-  scheduler:
-    - name: "torch.optim.lr_scheduler.LinearLR"
-      kwargs:
-        start_factor: 0.000000001
-        end_factor: 1.0
-        total_iters: 1
-    - name: "torch.optim.lr_scheduler.ConstantLR"
-      kwargs:
-        factor: 1.0
-        total_iters: 10000000000
-    - milestones: [1]
-
-data:
-  dataset_name: "HelpSteer3"
-  max_input_seq_length: ${policy.max_total_sequence_length}
-
-logger:
-  log_dir: "logs"
-  wandb_enabled: true
-  tensorboard_enabled: true
-  mlflow_enabled: false
-  monitor_gpus: true
-  num_val_samples_to_print: 0  # Number of validation samples to pretty print on terminal
-  wandb:
-    project: nemo-rl
-    name: dpo-llama3.1-8b-instruct-4n8g-fsdp2tp1
-  tensorboard: {}
-  gpu_monitoring:
-    collection_interval: 10
-    flush_interval: 10
-
-cluster:
-  gpus_per_node: 8
-  num_nodes: 4
diff --git a/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.yaml b/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.yaml
index 4906550001..18a84b9cee 100644
--- a/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.yaml
+++ b/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.yaml
@@ -1,94 +1,44 @@
+defaults: ../../dpo.yaml
 dpo:
   max_num_epochs: 2
   max_num_steps: 20
   val_period: 50
   val_batches: 16
   val_global_batch_size: 32
-  val_micro_batch_size: 1
   val_at_start: false
-  seed: 42
-
-  reference_policy_kl_penalty: 0.05
-  preference_average_log_probs: False
-  sft_average_log_probs: ${.preference_average_log_probs}
-  preference_loss_weight: 1
   sft_loss_weight: 0.01
-
 checkpointing:
-  enabled: true
-  checkpoint_dir: "results/dpo"
-  metric_name: "val_loss"
-  higher_is_better: false
-  keep_top_k: 3
   save_period: 10000
-
 policy:
-  model_name: "meta-llama/Llama-3.1-8B-Instruct"
+  model_name: meta-llama/Llama-3.1-8B-Instruct
   tokenizer:
     name: ${policy.model_name}
   train_global_batch_size: 256
   train_micro_batch_size: 1
   max_total_sequence_length: 2048
-  precision: "bfloat16"
   dtensor_cfg:
-    enabled: true
-    cpu_offload: False
-    sequence_parallel: false
-    activation_checkpointing: false
     tensor_parallel_size: 2
-    context_parallel_size: 1
-    custom_parallel_plan: null
-  
-  dynamic_batching:
-    enabled: false
-
-  sequence_packing:
-    enabled: false
-
-  make_sequence_length_divisible_by: ${policy.dtensor_cfg.tensor_parallel_size}
-  max_grad_norm: 1.0
-
   optimizer:
-    name: "torch.optim.AdamW"
     kwargs:
-      lr: 5.0e-6
-      weight_decay: 0.1
-      betas: [0.9, 0.98]
-      eps: 1e-8
-      foreach: False
-      fused: False
-
+      eps: 1.0e-08
   scheduler:
-    - name: "torch.optim.lr_scheduler.LinearLR"
-      kwargs:
-        start_factor: 0.000000001
-        end_factor: 1.0
-        total_iters: 1
-    - name: "torch.optim.lr_scheduler.ConstantLR"
-      kwargs:
-        factor: 1.0
-        total_iters: 10000000000
-    - milestones: [1]
-
-data:
-  dataset_name: "HelpSteer3"
-  max_input_seq_length: ${policy.max_total_sequence_length}
-
+  - name: torch.optim.lr_scheduler.LinearLR
+    kwargs:
+      start_factor: 1.0e-09
+      end_factor: 1.0
+      total_iters: 1
+  - name: torch.optim.lr_scheduler.ConstantLR
+    kwargs:
+      factor: 1.0
+      total_iters: 10000000000
+  - milestones:
+    - 1
 logger:
-  log_dir: "logs"
   wandb_enabled: true
   tensorboard_enabled: true
-  mlflow_enabled: false
-  monitor_gpus: true
-  num_val_samples_to_print: 0  # Number of validation samples to pretty print on terminal
   wandb:
     project: nemo-rl
     name: dpo-llama3.1-8b-instruct-4n8g-fsdp2tp1
-  tensorboard: {}
-  gpu_monitoring:
-    collection_interval: 10
-    flush_interval: 10
-
 cluster:
   gpus_per_node: 8
   num_nodes: 4
diff --git a/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp4.yaml b/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp4.yaml
new file mode 100644
index 0000000000..f18407fd59
--- /dev/null
+++ b/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp4.yaml
@@ -0,0 +1,40 @@
+defaults: ../../dpo.yaml
+dpo:
+  val_period: 50
+  val_batches: 16
+  val_global_batch_size: 32
+  val_at_start: false
+  sft_loss_weight: 0.01
+policy:
+  model_name: meta-llama/Llama-3.1-8B-Instruct
+  tokenizer:
+    name: ${policy.model_name}
+  train_global_batch_size: 256
+  train_micro_batch_size: 1
+  max_total_sequence_length: 8192
+  dtensor_cfg:
+    tensor_parallel_size: 4
+  optimizer:
+    kwargs:
+      eps: 1.0e-08
+  scheduler:
+  - name: torch.optim.lr_scheduler.LinearLR
+    kwargs:
+      start_factor: 1.0e-09
+      end_factor: 1.0
+      total_iters: 1
+  - name: torch.optim.lr_scheduler.ConstantLR
+    kwargs:
+      factor: 1.0
+      total_iters: 10000000000
+  - milestones:
+    - 1
+logger:
+  wandb_enabled: true
+  tensorboard_enabled: true
+  wandb:
+    project: nemo-rl
+    name: dpo-llama3.1-8b-instruct-4n8g-fsdp2tp4
+cluster:
+  gpus_per_node: 8
+  num_nodes: 4
diff --git a/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatron.v2.yaml b/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatron.v2.yaml
new file mode 100644
index 0000000000..8df4bc3fb0
--- /dev/null
+++ b/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatron.v2.yaml
@@ -0,0 +1,32 @@
+defaults: ../../dpo.yaml
+dpo:
+  val_period: 50
+  val_batches: 16
+  val_global_batch_size: 32
+  val_at_start: false
+  sft_loss_weight: 0.01
+checkpointing:
+  enabled: false
+policy:
+  model_name: meta-llama/Llama-3.1-8B-Instruct
+  tokenizer:
+    name: ${policy.model_name}
+  train_global_batch_size: 256
+  train_micro_batch_size: 1
+  max_total_sequence_length: 8192
+  dtensor_cfg:
+    enabled: false
+  make_sequence_length_divisible_by: ${policy.megatron_cfg.tensor_model_parallel_size}
+  optimizer: null
+  megatron_cfg:
+    enabled: true
+    tensor_model_parallel_size: 4
+logger:
+  wandb_enabled: true
+  tensorboard_enabled: true
+  wandb:
+    project: nemo-rl
+    name: dpo-llama3.1-8b-instruct-4n8g-megatron-tp4.v2
+cluster:
+  gpus_per_node: 8
+  num_nodes: 4
diff --git a/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatron.yaml b/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatron.yaml
deleted file mode 100644
index 789f4fcbdf..0000000000
--- a/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatron.yaml
+++ /dev/null
@@ -1,127 +0,0 @@
-dpo:
-  max_num_epochs: 1
-  max_num_steps: 150
-  val_period: 50
-  val_batches: 16
-  val_global_batch_size: 32
-  val_micro_batch_size: 1
-  val_at_start: false
-  seed: 42
-
-  reference_policy_kl_penalty: 0.05
-  preference_average_log_probs: False
-  sft_average_log_probs: ${.preference_average_log_probs}
-  preference_loss_weight: 1
-  sft_loss_weight: 0.01
-
-checkpointing:
-  enabled: false #true
-  checkpoint_dir: "results/dpo"
-  metric_name: "val_loss"
-  higher_is_better: false
-  keep_top_k: 3
-  save_period: 10000
-
-policy:
-  model_name: "meta-llama/Llama-3.1-8B-Instruct"
-  tokenizer:
-    name: ${policy.model_name}
-  train_global_batch_size: 256
-  train_micro_batch_size: 1
-  max_total_sequence_length: 2048
-  precision: "bfloat16"
-  dtensor_cfg:
-    enabled: false
-
-  dynamic_batching:
-    enabled: false
-
-  sequence_packing:
-    enabled: false
-
-  make_sequence_length_divisible_by: ${policy.megatron_cfg.tensor_model_parallel_size}
-  max_grad_norm: 1.0
-
-  optimizer: null
-
-  megatron_cfg:
-    enabled: true
-    empty_unused_memory_level: 1
-    activation_checkpointing: false
-    tensor_model_parallel_size: 2
-    expert_tensor_parallel_size: 1
-    expert_model_parallel_size: 1
-    pipeline_model_parallel_size: 1
-    context_parallel_size: 1
-    pipeline_dtype: ${policy.precision}
-    num_layers_in_first_pipeline_stage: null
-    num_layers_in_last_pipeline_stage: null
-    sequence_parallel: true
-    freeze_moe_router: false
-    moe_router_dtype: "fp64"
-    moe_router_load_balancing_type: "aux_loss"
-    moe_router_bias_update_rate: 1e-3
-    #gives ~20% training perf speedup with sequence packing 
-    apply_rope_fusion: True
-    
-    optimizer:
-      optimizer: "adam"
-      lr: 5.0e-6 #4.0e-5
-      min_lr: 5.0e-6 #4.0e-5
-      weight_decay: 0.1
-      bf16: true
-      fp16: false
-      params_dtype: "float32"
-
-      #adam
-      adam_beta1: 0.9
-      adam_beta2: 0.98
-      adam_eps: 1e-8
-
-      #sgd
-      sgd_momentum: 0.9
-
-      #distributed optimizer
-      use_distributed_optimizer: true
-      use_precision_aware_optimizer: true
-
-      clip_grad: ${policy.max_grad_norm}
-
-    scheduler:
-      start_weight_decay: ${policy.megatron_cfg.optimizer.weight_decay}
-      end_weight_decay: ${policy.megatron_cfg.optimizer.weight_decay}
-      weight_decay_incr_style: "constant"
-      lr_decay_style: "linear"
-      lr_decay_iters: 1000000000
-      lr_warmup_iters: 2
-      lr_warmup_init: 0.00000001
-
-    distributed_data_parallel_config:
-      grad_reduce_in_fp32: false
-      overlap_grad_reduce: true
-      overlap_param_gather: true
-      average_in_collective: true
-      data_parallel_sharding_strategy: "optim_grads_params"
-
-data:
-  dataset_name: "HelpSteer3"
-  max_input_seq_length: ${policy.max_total_sequence_length}
-
-logger:
-  log_dir: "logs"
-  wandb_enabled: true
-  tensorboard_enabled: true
-  mlflow_enabled: false
-  monitor_gpus: true
-  num_val_samples_to_print: 0  # Number of validation samples to pretty print on terminal
-  wandb:
-    project: nemo-rl
-    name: dpo-llama3.1-8b-instruct-4n8g-fsdp2tp1
-  tensorboard: {}
-  gpu_monitoring:
-    collection_interval: 10
-    flush_interval: 10
-
-cluster:
-  gpus_per_node: 8
-  num_nodes: 4
diff --git a/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatrontp2pp2-quick.yaml b/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatrontp2pp2-quick.yaml
index 7d480f58a3..8b3a43ea28 100644
--- a/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatrontp2pp2-quick.yaml
+++ b/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatrontp2pp2-quick.yaml
@@ -1,127 +1,34 @@
+defaults: ../../dpo.yaml
 dpo:
-  max_num_epochs: 1
   max_num_steps: 20
   val_period: 50
   val_batches: 16
   val_global_batch_size: 32
-  val_micro_batch_size: 1
   val_at_start: false
-  seed: 42
-
-  reference_policy_kl_penalty: 0.05
-  preference_average_log_probs: False
-  sft_average_log_probs: ${.preference_average_log_probs}
-  preference_loss_weight: 1
   sft_loss_weight: 0.01
-
 checkpointing:
-  enabled: false #true
-  checkpoint_dir: "results/dpo"
-  metric_name: "val_loss"
-  higher_is_better: false
-  keep_top_k: 3
+  enabled: false
   save_period: 10000
-
 policy:
-  model_name: "meta-llama/Llama-3.1-8B-Instruct"
+  model_name: meta-llama/Llama-3.1-8B-Instruct
   tokenizer:
     name: ${policy.model_name}
   train_global_batch_size: 256
   train_micro_batch_size: 1
   max_total_sequence_length: 2048
-  precision: "bfloat16"
   dtensor_cfg:
     enabled: false
-
-  dynamic_batching:
-    enabled: false
-
-  sequence_packing:
-    enabled: false
-
   make_sequence_length_divisible_by: ${policy.megatron_cfg.tensor_model_parallel_size}
-  max_grad_norm: 1.0
-
   optimizer: null
-
   megatron_cfg:
     enabled: true
-    empty_unused_memory_level: 1
-    activation_checkpointing: false
-    tensor_model_parallel_size: 2
-    expert_tensor_parallel_size: 1
-    expert_model_parallel_size: 1
     pipeline_model_parallel_size: 2
-    context_parallel_size: 1
-    pipeline_dtype: ${policy.precision}
-    num_layers_in_first_pipeline_stage: null
-    num_layers_in_last_pipeline_stage: null
-    sequence_parallel: true
-    freeze_moe_router: false
-    moe_router_dtype: "fp64"
-    moe_router_load_balancing_type: "aux_loss"
-    moe_router_bias_update_rate: 1e-3
-    #gives ~20% training perf speedup with sequence packing 
-    apply_rope_fusion: True
-    
-    optimizer:
-      optimizer: "adam"
-      lr: 5.0e-6 #4.0e-5
-      min_lr: 5.0e-6 #4.0e-5
-      weight_decay: 0.1
-      bf16: true
-      fp16: false
-      params_dtype: "float32"
-
-      #adam
-      adam_beta1: 0.9
-      adam_beta2: 0.98
-      adam_eps: 1e-8
-
-      #sgd
-      sgd_momentum: 0.9
-
-      #distributed optimizer
-      use_distributed_optimizer: true
-      use_precision_aware_optimizer: true
-
-      clip_grad: ${policy.max_grad_norm}
-
-    scheduler:
-      start_weight_decay: ${policy.megatron_cfg.optimizer.weight_decay}
-      end_weight_decay: ${policy.megatron_cfg.optimizer.weight_decay}
-      weight_decay_incr_style: "constant"
-      lr_decay_style: "linear"
-      lr_decay_iters: 1000000000
-      lr_warmup_iters: 2
-      lr_warmup_init: 0.00000001
-
-    distributed_data_parallel_config:
-      grad_reduce_in_fp32: false
-      overlap_grad_reduce: true
-      overlap_param_gather: true
-      average_in_collective: true
-      data_parallel_sharding_strategy: "optim_grads_params"
-
-data:
-  dataset_name: "HelpSteer3"
-  max_input_seq_length: ${policy.max_total_sequence_length}
-
 logger:
-  log_dir: "logs"
   wandb_enabled: true
   tensorboard_enabled: true
-  mlflow_enabled: false
-  monitor_gpus: true
-  num_val_samples_to_print: 0  # Number of validation samples to pretty print on terminal
   wandb:
     project: nemo-rl
-    name: dpo-llama3.1-8b-instruct-4n8g-fsdp2tp1
-  tensorboard: {}
-  gpu_monitoring:
-    collection_interval: 10
-    flush_interval: 10
-
+    name: dpo-llama3.1-8b-instruct-4n8g-megatrontp2pp2-quick
 cluster:
   gpus_per_node: 8
   num_nodes: 4
diff --git a/examples/configs/recipes/llm/dpo-llama3.1-8b-tulu3-1n8g-fsdp2tp1.yaml b/examples/configs/recipes/llm/dpo-llama3.1-8b-tulu3-1n8g-fsdp2tp1.yaml
new file mode 100644
index 0000000000..3527838c62
--- /dev/null
+++ b/examples/configs/recipes/llm/dpo-llama3.1-8b-tulu3-1n8g-fsdp2tp1.yaml
@@ -0,0 +1,43 @@
+defaults: ../../dpo.yaml
+cluster:
+  gpus_per_node: 8
+policy:
+  model_name: allenai/Llama-3.1-Tulu-3-8B-SFT
+  tokenizer:
+    name: allenai/Llama-3.1-Tulu-3-8B-SFT
+  train_micro_batch_size: 1
+  max_total_sequence_length: 2048
+  optimizer:
+    kwargs:
+      lr: 5.0e-07
+      weight_decay: 0.0
+  scheduler:
+  - name: torch.optim.lr_scheduler.LinearLR
+    kwargs:
+      start_factor: 1.0e-06
+      end_factor: 1.0
+      total_iters: 211
+  - name: torch.optim.lr_scheduler.LinearLR
+    kwargs:
+      start_factor: 1.0
+      end_factor: 0.0
+      total_iters: 1899
+  - milestones:
+    - 211
+data:
+  dataset_name: Tulu3Preference
+dpo:
+  max_num_steps: 2110
+  val_period: -1
+  val_at_start: false
+  preference_average_log_probs: true
+  reference_policy_kl_penalty: 5
+  val_micro_batch_size: ${policy.train_micro_batch_size}
+  val_global_batch_size: ${policy.train_global_batch_size}
+checkpointing:
+  metric_name: null
+  save_period: 250
+logger:
+  wandb_enabled: true
+  wandb:
+    name: dpo-tulu3-8b
diff --git a/examples/configs/recipes/llm/dpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v2.yaml b/examples/configs/recipes/llm/dpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v2.yaml
index 8863fad45f..252251fd76 100644
--- a/examples/configs/recipes/llm/dpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v2.yaml
+++ b/examples/configs/recipes/llm/dpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v2.yaml
@@ -1,94 +1,15 @@
+defaults: ../../dpo.yaml
 dpo:
-  max_num_epochs: 1
-  max_num_steps: 150
-  val_period: 25
-  val_batches: 8
   val_global_batch_size: 32
-  val_micro_batch_size: 1
   val_at_start: false
-  seed: 42
-
-  reference_policy_kl_penalty: 0.05
-  preference_average_log_probs: False
-  sft_average_log_probs: ${.preference_average_log_probs}
-  preference_loss_weight: 1
-  sft_loss_weight: 0
-
-checkpointing:
-  enabled: true
-  checkpoint_dir: "results/dpo"
-  metric_name: "val_loss"
-  higher_is_better: false
-  keep_top_k: 3
-  save_period: 50
-
 policy:
-  model_name: "meta-llama/Llama-3.2-1B-Instruct"
   tokenizer:
     name: ${policy.model_name}
-
-  train_global_batch_size: 128
-  train_micro_batch_size: 2
-  max_total_sequence_length: 1024
-  precision: "bfloat16"
-  dtensor_cfg:
-    enabled: true
-    cpu_offload: False
-    sequence_parallel: false
-    activation_checkpointing: false
-    tensor_parallel_size: 1
-    context_parallel_size: 1
-    custom_parallel_plan: null
-
-  dynamic_batching:
-    enabled: false
-
-  sequence_packing:
-    enabled: false
-
-  make_sequence_length_divisible_by: ${policy.dtensor_cfg.tensor_parallel_size}
-  max_grad_norm: 1.0
-
-  optimizer:
-    name: "torch.optim.AdamW"
-    kwargs:
-      lr: 5.0e-6
-      weight_decay: 0.1
-      betas: [0.9, 0.98]
-      eps: 1e-5
-      foreach: False
-      fused: False
-    
-  scheduler:
-    - name: "torch.optim.lr_scheduler.LinearLR"
-      kwargs:
-        start_factor: 0.1
-        end_factor: 1.0
-        total_iters: 20
-    - name: "torch.optim.lr_scheduler.ConstantLR"
-      kwargs:
-        factor: 1.0
-        total_iters: 10000000000
-    - milestones: [20]
-    
-data:
-  dataset_name: "HelpSteer3"
-  max_input_seq_length: ${policy.max_total_sequence_length}
 logger:
-  log_dir: "logs"
   wandb_enabled: true
   tensorboard_enabled: true
-  mlflow_enabled: false
-  monitor_gpus: true
-  num_val_samples_to_print: 0  # Number of validation samples to pretty print on terminal
   wandb:
     project: nemo-rl
     name: dpo-llama3.2-1b-instruct-1n8g-fsdp2tp1
-  tensorboard: {}
-  gpu_monitoring:
-    collection_interval: 10
-    flush_interval: 10
-
 cluster:
   gpus_per_node: 8
-  num_nodes: 1
diff --git a/examples/configs/recipes/llm/dpo-mistral-nemo-instruct-2407-1n8g-fsdp2tp8-actckpt-long.yaml.disabled b/examples/configs/recipes/llm/dpo-mistral-nemo-instruct-2407-1n8g-fsdp2tp8-actckpt-long.yaml.disabled
new file mode 100644
index 0000000000..9833aa30d0
--- /dev/null
+++ b/examples/configs/recipes/llm/dpo-mistral-nemo-instruct-2407-1n8g-fsdp2tp8-actckpt-long.yaml.disabled
@@ -0,0 +1,46 @@
+defaults: ../../dpo.yaml
+dpo:
+  max_num_steps: 100
+  val_period: 10
+  val_batches: 1
+  val_global_batch_size: 16
+  reference_policy_kl_penalty: 0.1
+checkpointing:
+  checkpoint_dir: results/dpo-mistral-nemo-instruct-2407-1n8g-fsdp2tp8-actckpt-long
+  keep_top_k: null
+policy:
+  model_name: mistralai/Mistral-Nemo-Instruct-2407
+  tokenizer:
+    name: ${policy.model_name}
+  train_global_batch_size: 8
+  train_micro_batch_size: 1
+  max_total_sequence_length: 12288
+  dtensor_cfg:
+    activation_checkpointing: true
+    tensor_parallel_size: 8
+    clear_cache_every_n_steps: 1
+    env_vars:
+      PYTORCH_CUDA_ALLOC_CONF: max_split_size_mb:64
+  optimizer:
+    kwargs:
+      lr: 1.0e-06
+      weight_decay: 0.01
+      betas:
+      - 0.9
+      - 0.999
+      eps: 1.0e-08
+  scheduler:
+  - name: torch.optim.lr_scheduler.ConstantLR
+    kwargs:
+      factor: 1.0
+      total_iters: 10000000000
+  - milestones: []
+data:
+  shuffle: false
+logger:
+  log_dir: logs/dpo-mistral-nemo-instruct-2407-1n8g-fsdp2tp8-actckpt-long
+  wandb:
+    project: nemo-rl
+    name: dpo-mistral-nemo-instruct-2407-1n8g-fsdp2tp8-actckpt-long
+cluster:
+  gpus_per_node: 8
diff --git a/examples/configs/recipes/llm/grpo-dapomath17k-dsv3-megatron.yaml b/examples/configs/recipes/llm/grpo-dapomath17k-dsv3-megatron.yaml
new file mode 100644
index 0000000000..6e00ecd37c
--- /dev/null
+++ b/examples/configs/recipes/llm/grpo-dapomath17k-dsv3-megatron.yaml
@@ -0,0 +1,52 @@
+defaults: ../../grpo_math_1B.yaml
+grpo:
+  max_val_samples: 10000
+loss_fn:
+  use_on_policy_kl_approximation: true
+  use_importance_sampling_correction: true
+checkpointing:
+  checkpoint_dir: results/grpo_dapomath17k_dsv3_megatron
+  keep_top_k: 100
+policy:
+  model_name: /path/to/dsv3-bf16-checkpoint
+  train_micro_batch_size: 1
+  generation_batch_size: 64
+  logprob_batch_size: 1
+  max_total_sequence_length: 16384
+  dtensor_cfg:
+    enabled: false
+  make_sequence_length_divisible_by: ${policy.megatron_cfg.tensor_model_parallel_size}
+  optimizer: null
+  megatron_cfg:
+    enabled: true
+    activation_checkpointing: true
+    tensor_model_parallel_size: 8
+    expert_model_parallel_size: 32
+    pipeline_model_parallel_size: 8
+    num_layers_in_first_pipeline_stage: 7
+    num_layers_in_last_pipeline_stage: 6
+    context_parallel_size: 4
+    sequence_parallel: true
+    moe_permute_fusion: true
+    apply_rope_fusion: false
+    optimizer:
+      lr: 5.0e-07
+      min_lr: 5.0e-08
+    scheduler:
+      lr_warmup_init: 5.0e-08
+  generation:
+    vllm_cfg:
+      async_engine: true
+      tensor_parallel_size: 32
+data:
+  prompt_file: null
+  dataset_name: DAPOMath17K
+logger:
+  monitor_gpus: false
+  wandb:
+    name: grpo_dapomath17k_1b_megatron
+  mlflow:
+    run_name: grpo_dapomath17k_1b_megatron
+cluster:
+  gpus_per_node: 8
+  num_nodes: 32
diff --git a/examples/configs/recipes/llm/grpo-deepscaler-1.5b-16K.yaml b/examples/configs/recipes/llm/grpo-deepscaler-1.5b-16K.yaml
new file mode 100644
index 0000000000..ccfa867209
--- /dev/null
+++ b/examples/configs/recipes/llm/grpo-deepscaler-1.5b-16K.yaml
@@ -0,0 +1,14 @@
+defaults:
+  - ../../grpo_math_1B.yaml
+  - grpo-deepscaler-1.5b-8K.yaml
+loss_fn:
+  reference_policy_kl_penalty: 0.001
+  ratio_clip_max: 0.28
+policy:
+  max_total_sequence_length: 16384
+  logprob_batch_size: 2
+  dtensor_cfg:
+    cpu_offload: true
+    activation_checkpointing: true
+    tensor_parallel_size: 2
+    _v2: false
diff --git a/examples/configs/recipes/llm/grpo-deepscaler-1.5b-24K.yaml b/examples/configs/recipes/llm/grpo-deepscaler-1.5b-24K.yaml
new file mode 100644
index 0000000000..ccdb67197b
--- /dev/null
+++ b/examples/configs/recipes/llm/grpo-deepscaler-1.5b-24K.yaml
@@ -0,0 +1,23 @@
+defaults:
+  - ../../grpo_math_1B.yaml
+  - grpo-deepscaler-1.5b-8K.yaml
+loss_fn:
+  reference_policy_kl_penalty: 0.0001
+  ratio_clip_max: 0.28
+policy:
+  max_total_sequence_length: 24576
+  logprob_batch_size: 2
+  dtensor_cfg:
+    cpu_offload: true
+    activation_checkpointing: true
+    tensor_parallel_size: 2
+    _v2: false
+  sequence_packing:
+    enabled: false
+  optimizer:
+    kwargs:
+      lr: 5.0e-07
+  generation:
+    vllm_cfg:
+      gpu_memory_utilization: 0.8
+      enforce_eager: true
diff --git a/examples/configs/recipes/llm/grpo-deepscaler-1.5b-8K.yaml b/examples/configs/recipes/llm/grpo-deepscaler-1.5b-8K.yaml
new file mode 100644
index 0000000000..584b807663
--- /dev/null
+++ b/examples/configs/recipes/llm/grpo-deepscaler-1.5b-8K.yaml
@@ -0,0 +1,38 @@
+defaults: ../../grpo_math_1B.yaml
+grpo:
+  num_prompts_per_step: 128
+  num_generations_per_prompt: 8
+  max_val_samples: 480
+  val_batch_size: 32
+loss_fn:
+  reference_policy_kl_penalty: 0.0
+checkpointing:
+  keep_top_k: 10
+  model_save_format: null
+policy:
+  model_name: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
+  train_global_batch_size: 64
+  train_micro_batch_size: 1
+  max_total_sequence_length: 8192
+  dtensor_cfg:
+    cpu_offload: true
+    activation_checkpointing: true
+    _v2: false
+  sequence_packing:
+    enabled: false
+  optimizer:
+    kwargs:
+      lr: 2.0e-06
+  generation:
+    vllm_kwargs:
+      compilation_config:
+        use_inductor: false
+data:
+  dataset_name: DeepScaler
+env:
+  math:
+    num_workers: 16
+logger:
+  monitor_gpus: false
+cluster:
+  gpus_per_node: 8
diff --git a/examples/configs/recipes/llm/grpo-gemma3-1b-it-1n8g-fsdp2tp1.yaml b/examples/configs/recipes/llm/grpo-gemma3-1b-it-1n8g-fsdp2tp1.yaml
index cd63c3ba79..15ca65c8f9 100644
--- a/examples/configs/recipes/llm/grpo-gemma3-1b-it-1n8g-fsdp2tp1.yaml
+++ b/examples/configs/recipes/llm/grpo-gemma3-1b-it-1n8g-fsdp2tp1.yaml
@@ -1,122 +1,29 @@
+defaults: ../../grpo_math_1B.yaml
 grpo:
-  num_prompts_per_step: 32
-  num_generations_per_prompt: 16
-  max_rollout_turns: 1
   max_num_steps: 500
-  normalize_rewards: true
-  use_leave_one_out_baseline: true
-  val_period: 10
-  val_at_start: false
-  max_val_samples: 256
-  val_batch_size: 256
-loss_fn:
-  reference_policy_kl_penalty: 0.01
-  ratio_clip_min: 0.2
-  ratio_clip_max: 0.2
-  ratio_clip_c: null
-  use_on_policy_kl_approximation: false
-  use_importance_sampling_correction: false
-  token_level_loss: true
 checkpointing:
-  enabled: true
   checkpoint_dir: results/grpo-gemma3-1b-it-1n8g-fsdp2tp1
-  metric_name: val_reward
-  higher_is_better: true
-  keep_top_k: 3
-  save_period: 10
 policy:
   model_name: google/gemma-3-1b-it
   tokenizer:
     name: google/gemma-3-1b-it
-  train_global_batch_size: 512
-  train_micro_batch_size: 4
-  generation_batch_size: 32
-  logprob_batch_size: 4
-  max_total_sequence_length: 512
-  precision: bfloat16
-  dtensor_cfg:
-    enabled: true
-    cpu_offload: false
-    sequence_parallel: false
-    activation_checkpointing: false
-    tensor_parallel_size: 1
-    context_parallel_size: 1
-    custom_parallel_plan: null
   dynamic_batching:
-    enabled: True
-    train_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.train_micro_batch_size}}
-    logprob_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.logprob_batch_size}}
-    sequence_length_round: 64
+    enabled: true
   sequence_packing:
     enabled: false
   make_sequence_length_divisible_by: 1
-  max_grad_norm: 1
-  optimizer:
-    name: torch.optim.AdamW
-    kwargs:
-      lr: 5e-06
-      weight_decay: 0.01
-      betas:
-        - 0.9
-        - 0.999
-      eps: 1e-08
-      foreach: false
-      fused: false
-  scheduler:
-    - name: torch.optim.lr_scheduler.LinearLR
-      kwargs:
-        start_factor: 0.1
-        end_factor: 1
-        total_iters: 50
-    - name: torch.optim.lr_scheduler.ConstantLR
-      kwargs:
-        factor: 1
-        total_iters: 10000000000
-    - milestones:
-        - 50
   generation:
-    backend: vllm
     max_new_tokens: 512
-    temperature: 1
-    top_p: 1
-    top_k: null
-    stop_token_ids: null
-    stop_strings: null
     vllm_cfg:
-      async_engine: false
-      precision: ${policy.precision}
-      tensor_parallel_size: 1
-      pipeline_parallel_size: 1
-      gpu_memory_utilization: 0.6
       max_model_len: 512
-      enforce_eager: False
-    colocated:
-      enabled: true
-      resources:
-        gpus_per_node: null
-        num_nodes: null
 data:
   max_input_seq_length: 512
-  prompt_file: examples/prompts/cot.txt
-  system_prompt_file: null
-  dataset_name: OpenMathInstruct-2
-env:
-  math:
-    num_workers: 8
 logger:
   log_dir: logs/grpo-gemma3-1b-it-1n8g-fsdp2tp1
-  num_val_samples_to_print: 0
   wandb_enabled: true
   tensorboard_enabled: true
-  mlflow_enabled: false
-  monitor_gpus: true
   wandb:
     project: nemo-rl
     name: grpo-gemma3-1b-it-1n8g-fsdp2tp1
-  tensorboard: {}
-  gpu_monitoring:
-    collection_interval: 10
-    flush_interval: 10
 cluster:
   gpus_per_node: 8
-  num_nodes: 1
diff --git a/examples/configs/recipes/llm/grpo-gemma3-27b-it-16n8g-fsdp2tp8sp-actckpt-long.yaml b/examples/configs/recipes/llm/grpo-gemma3-27b-it-16n8g-fsdp2tp8sp-actckpt-long.yaml
deleted file mode 100644
index bdd0361cf8..0000000000
--- a/examples/configs/recipes/llm/grpo-gemma3-27b-it-16n8g-fsdp2tp8sp-actckpt-long.yaml
+++ /dev/null
@@ -1,123 +0,0 @@
-grpo:
-  num_prompts_per_step: 64
-  num_generations_per_prompt: 32
-  max_rollout_turns: 1
-  max_num_steps: 20
-  normalize_rewards: true
-  use_leave_one_out_baseline: true
-  val_period: 10
-  val_at_start: false
-  max_val_samples: 256
-  val_batch_size: 256
-loss_fn:
-  reference_policy_kl_penalty: 0.01
-  ratio_clip_min: 0.2
-  ratio_clip_max: 0.2
-  ratio_clip_c: null
-  use_on_policy_kl_approximation: false
-  use_importance_sampling_correction: false
-  token_level_loss: true
-checkpointing:
-  enabled: true
-  checkpoint_dir: results/grpo-gemma3-27b-it-16n8g-fsdp2tp8sp-actckpt-long
-  metric_name: val_reward
-  higher_is_better: true
-  keep_top_k: 3
-  save_period: 10
-policy:
-  model_name: google/gemma-3-27b-it
-  tokenizer:
-    name: google/gemma-3-27b-it
-  train_global_batch_size: 512
-  train_micro_batch_size: 1
-  generation_batch_size: 32
-  logprob_batch_size: 2
-  max_total_sequence_length: 16384
-  precision: bfloat16
-  dtensor_cfg:
-    enabled: true
-    cpu_offload: false
-    sequence_parallel: true
-    activation_checkpointing: true
-    tensor_parallel_size: 8
-    context_parallel_size: 1
-    custom_parallel_plan: null
-  dynamic_batching:
-    # TODO: OOMs if enabled https://github.com/NVIDIA-NeMo/RL/issues/383
-    enabled: False
-    train_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.train_micro_batch_size}}
-    logprob_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.logprob_batch_size}}
-    sequence_length_round: 64
-  sequence_packing:
-    enabled: false
-  make_sequence_length_divisible_by: 8
-  max_grad_norm: 1
-  optimizer:
-    name: torch.optim.AdamW
-    kwargs:
-      lr: 3e-07
-      weight_decay: 0.01
-      betas:
-        - 0.9
-        - 0.999
-      eps: 1e-08
-      foreach: false
-      fused: false
-  scheduler:
-    - name: torch.optim.lr_scheduler.LinearLR
-      kwargs:
-        start_factor: 0.1
-        end_factor: 1
-        total_iters: 50
-    - name: torch.optim.lr_scheduler.ConstantLR
-      kwargs:
-        factor: 1
-        total_iters: 10000000000
-    - milestones:
-        - 50
-  generation:
-    backend: vllm
-    max_new_tokens: 16384
-    temperature: 1
-    top_p: 1
-    top_k: null
-    stop_token_ids: null
-    stop_strings: null
-    vllm_cfg:
-      async_engine: false
-      precision: ${policy.precision}
-      tensor_parallel_size: 4
-      pipeline_parallel_size: 1
-      gpu_memory_utilization: 0.6
-      max_model_len: 16384
-      enforce_eager: False
-    colocated:
-      enabled: true
-      resources:
-        gpus_per_node: null
-        num_nodes: null
-data:
-  max_input_seq_length: 16384
-  prompt_file: examples/prompts/cot.txt
-  system_prompt_file: null
-  dataset_name: OpenMathInstruct-2
-env:
-  math:
-    num_workers: 8
-logger:
-  log_dir: logs/grpo-gemma3-27b-it-16n8g-fsdp2tp8sp-actckpt-long
-  num_val_samples_to_print: 0
-  wandb_enabled: true
-  tensorboard_enabled: true
-  mlflow_enabled: false
-  monitor_gpus: true
-  wandb:
-    project: nemo-rl
-    name: grpo-gemma3-27b-it-16n8g-fsdp2tp8sp-actckpt-long
-  tensorboard: {}
-  gpu_monitoring:
-    collection_interval: 10
-    flush_interval: 10
-cluster:
-  gpus_per_node: 8
-  num_nodes: 16
diff --git a/examples/configs/recipes/llm/grpo-gemma3-27b-it-8n8g-fsdp2tp8-actckpt-long.yaml b/examples/configs/recipes/llm/grpo-gemma3-27b-it-8n8g-fsdp2tp8-actckpt-long.yaml
new file mode 100644
index 0000000000..ebf15c6e58
--- /dev/null
+++ b/examples/configs/recipes/llm/grpo-gemma3-27b-it-8n8g-fsdp2tp8-actckpt-long.yaml
@@ -0,0 +1,42 @@
+defaults: ../../grpo_math_1B.yaml
+grpo:
+  num_prompts_per_step: 64
+  num_generations_per_prompt: 32
+  max_num_steps: 20
+checkpointing:
+  checkpoint_dir: results/grpo-gemma3-27b-it-8n8g-fsdp2tp8sp-actckpt-long
+  model_save_format: null
+policy:
+  model_name: google/gemma-3-27b-it
+  tokenizer:
+    name: google/gemma-3-27b-it
+  train_micro_batch_size: 1
+  logprob_batch_size: 2
+  max_total_sequence_length: 16384
+  dtensor_cfg:
+    activation_checkpointing: true
+    tensor_parallel_size: 8
+    _v2: false
+  sequence_packing:
+    enabled: false
+  make_sequence_length_divisible_by: 8
+  optimizer:
+    kwargs:
+      lr: 3.0e-07
+  generation:
+    max_new_tokens: 16384
+    vllm_cfg:
+      tensor_parallel_size: 4
+      max_model_len: 16384
+data:
+  max_input_seq_length: 16384
+logger:
+  log_dir: logs/grpo-gemma3-27b-it-8n8g-fsdp2tp8sp-actckpt-long
+  wandb_enabled: true
+  tensorboard_enabled: true
+  wandb:
+    project: nemo-rl
+    name: grpo-gemma3-27b-it-8n8g-fsdp2tp8sp-actckpt-long
+cluster:
+  gpus_per_node: 8
+  num_nodes: 8
diff --git a/examples/configs/recipes/llm/grpo-gspo-deepscaler-1.5b-8K.yaml b/examples/configs/recipes/llm/grpo-gspo-deepscaler-1.5b-8K.yaml
new file mode 100644
index 0000000000..d5525fc027
--- /dev/null
+++ b/examples/configs/recipes/llm/grpo-gspo-deepscaler-1.5b-8K.yaml
@@ -0,0 +1,40 @@
+defaults: ../../grpo_math_1B.yaml
+grpo:
+  num_prompts_per_step: 128
+  num_generations_per_prompt: 8
+  max_val_samples: 480
+  val_batch_size: 32
+loss_fn:
+  reference_policy_kl_penalty: 0.0
+  sequence_level_importance_ratios: true
+  token_level_loss: false
+checkpointing:
+  keep_top_k: 10
+  model_save_format: null
+policy:
+  model_name: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
+  train_global_batch_size: 64
+  train_micro_batch_size: 1
+  max_total_sequence_length: 8192
+  dtensor_cfg:
+    cpu_offload: true
+    sequence_parallel: true
+    activation_checkpointing: true
+    _v2: false
+  sequence_packing:
+    enabled: false
+  optimizer:
+    kwargs:
+      lr: 2.0e-06
+  generation:
+    vllm_cfg:
+      enforce_eager: true
+data:
+  dataset_name: DeepScaler
+env:
+  math:
+    num_workers: 16
+logger:
+  monitor_gpus: false
+cluster:
+  gpus_per_node: 8
diff --git a/examples/configs/recipes/llm/grpo-llama3.1-8b-instruct-1n8g-megatron-fp8-e2e.yaml b/examples/configs/recipes/llm/grpo-llama3.1-8b-instruct-1n8g-megatron-fp8-e2e.yaml
new file mode 100644
index 0000000000..4ed7b47d06
--- /dev/null
+++ b/examples/configs/recipes/llm/grpo-llama3.1-8b-instruct-1n8g-megatron-fp8-e2e.yaml
@@ -0,0 +1,58 @@
+defaults: ../../grpo_math_1B.yaml
+grpo:
+  num_prompts_per_step: 64
+  num_generations_per_prompt: 32
+  max_num_steps: 500
+loss_fn:
+  use_importance_sampling_correction: true
+checkpointing:
+  checkpoint_dir: results/grpo-llama3.1-8b-instruct-1n8g-megatron-fp8-e2e
+policy:
+  model_name: meta-llama/Llama-3.1-8B-Instruct
+  tokenizer:
+    name: meta-llama/Llama-3.1-8B-Instruct
+  train_micro_batch_size: 1
+  logprob_batch_size: 2
+  max_total_sequence_length: 4096
+  make_sequence_length_divisible_by: 1
+  dtensor_cfg:
+    enabled: false
+  megatron_cfg:
+    enabled: true
+    converter_type: LlamaForCausalLM
+    pipeline_model_parallel_size: 2
+    activation_checkpointing: true
+    defer_fp32_logits: true
+    optimizer:
+      lr: 5.0e-07
+      min_lr: 5.0e-08
+      weight_decay: 0.0
+      use_precision_aware_optimizer: false
+    scheduler:
+      lr_warmup_iters: 2
+      lr_warmup_init: 5.0e-08
+    fp8_cfg:
+      enabled: true
+      fp8: e4m3
+      fp8_recipe: blockwise
+      fp8_param: false
+    env_vars:
+      NVTE_FP8_BLOCK_SCALING_FP32_SCALES: '1'
+  generation:
+    max_new_tokens: 4096
+    vllm_cfg:
+      precision: fp8
+      gpu_memory_utilization: 0.5
+      max_model_len: 4096
+      use_deep_gemm: true
+data:
+  max_input_seq_length: 4096
+logger:
+  log_dir: logs/grpo-llama3.1-8b-instruct-1n8g-megatron-fp8-e2e
+  wandb_enabled: true
+  tensorboard_enabled: true
+  wandb:
+    project: nemo-rl
+    name: grpo-llama3.1-8b-instruct-1n8g-megatron-fp8-e2e
+cluster:
+  gpus_per_node: 8
diff --git a/examples/configs/recipes/llm/grpo-llama3.1-8b-instruct-1n8g-megatron-fp8-rollouts.v3.yaml b/examples/configs/recipes/llm/grpo-llama3.1-8b-instruct-1n8g-megatron-fp8-rollouts.v3.yaml
new file mode 100644
index 0000000000..dcd791eee6
--- /dev/null
+++ b/examples/configs/recipes/llm/grpo-llama3.1-8b-instruct-1n8g-megatron-fp8-rollouts.v3.yaml
@@ -0,0 +1,48 @@
+defaults: ../../grpo_math_1B.yaml
+grpo:
+  num_generations_per_prompt: 32
+  max_num_steps: 500
+loss_fn:
+  use_importance_sampling_correction: true
+checkpointing:
+  checkpoint_dir: results/grpo-llama3.1-8b-instruct-1n8g-megatron-fp8-rollouts
+policy:
+  model_name: meta-llama/Llama-3.1-8B-Instruct
+  tokenizer:
+    name: meta-llama/Llama-3.1-8B-Instruct
+  train_micro_batch_size: 1
+  logprob_batch_size: 2
+  max_total_sequence_length: 4096
+  make_sequence_length_divisible_by: 1
+  dtensor_cfg:
+    enabled: false
+  megatron_cfg:
+    enabled: true
+    converter_type: LlamaForCausalLM
+    pipeline_model_parallel_size: 2
+    activation_checkpointing: true
+    defer_fp32_logits: true
+    optimizer:
+      lr: 5.0e-07
+      min_lr: 5.0e-08
+      weight_decay: 0.0
+    scheduler:
+      lr_warmup_iters: 2
+      lr_warmup_init: 5.0e-08
+  generation:
+    max_new_tokens: 4096
+    vllm_cfg:
+      precision: fp8
+      max_model_len: 4096
+      use_deep_gemm: true
+data:
+  max_input_seq_length: 4096
+logger:
+  log_dir: logs/grpo-llama3.1-8b-instruct-1n8g-megatron-fp8-rollouts.v2
+  wandb_enabled: true
+  tensorboard_enabled: true
+  wandb:
+    project: nemo-rl
+    name: grpo-llama3.1-8b-instruct-1n8g-megatron-fp8-rollouts.v2
+cluster:
+  gpus_per_node: 8
diff --git a/examples/configs/recipes/llm/grpo-llama3.1-8b-instruct-2n8g-fsdp2tp1-noncolocated.yaml b/examples/configs/recipes/llm/grpo-llama3.1-8b-instruct-2n8g-fsdp2tp1-noncolocated.yaml
new file mode 100644
index 0000000000..d9161477af
--- /dev/null
+++ b/examples/configs/recipes/llm/grpo-llama3.1-8b-instruct-2n8g-fsdp2tp1-noncolocated.yaml
@@ -0,0 +1,56 @@
+defaults: ../../grpo_math_1B.yaml
+grpo:
+  num_prompts_per_step: 64
+  num_generations_per_prompt: 32
+  max_num_steps: 500
+checkpointing:
+  checkpoint_dir: results/grpo-llama3.1-8b-instruct-2n8g-fsdp2tp1-noncolocated
+policy:
+  model_name: meta-llama/Llama-3.1-8B-Instruct
+  tokenizer:
+    name: meta-llama/Llama-3.1-8B-Instruct
+  train_micro_batch_size: 1
+  logprob_batch_size: 2
+  max_total_sequence_length: 4096
+  dynamic_batching:
+    enabled: true
+  sequence_packing:
+    enabled: false
+  make_sequence_length_divisible_by: 1
+  optimizer:
+    kwargs:
+      lr: 3.0e-07
+  scheduler:
+  - name: torch.optim.lr_scheduler.LinearLR
+    kwargs:
+      start_factor: 0.1
+      end_factor: 1
+      total_iters: 13
+  - name: torch.optim.lr_scheduler.ConstantLR
+    kwargs:
+      factor: 1
+      total_iters: 10000000000
+  - milestones:
+    - 13
+  generation:
+    max_new_tokens: 4096
+    vllm_cfg:
+      async_engine: true
+      max_model_len: 4096
+    colocated:
+      enabled: false
+      resources:
+        gpus_per_node: 8
+        num_nodes: 1
+data:
+  max_input_seq_length: 4096
+logger:
+  log_dir: logs/grpo-llama3.1-8b-instruct-2n8g-fsdp2tp1-noncolocated
+  wandb_enabled: true
+  tensorboard_enabled: true
+  wandb:
+    project: nemo-rl
+    name: grpo-llama3.1-8b-instruct-2n8g-fsdp2tp1-noncolocated
+cluster:
+  gpus_per_node: 8
+  num_nodes: 2
diff --git a/examples/configs/recipes/llm/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.v3.yaml b/examples/configs/recipes/llm/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.v3.yaml
index ae1eee46c8..cc8983a465 100644
--- a/examples/configs/recipes/llm/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.v3.yaml
+++ b/examples/configs/recipes/llm/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.v3.yaml
@@ -1,123 +1,50 @@
+defaults: ../../grpo_math_1B.yaml
 grpo:
   num_prompts_per_step: 64
   num_generations_per_prompt: 32
-  max_rollout_turns: 1
   max_num_steps: 500
-  normalize_rewards: true
-  use_leave_one_out_baseline: true
-  val_period: 10
-  val_at_start: false
-  max_val_samples: 256
-  val_batch_size: 256
-loss_fn:
-  reference_policy_kl_penalty: 0.01
-  ratio_clip_min: 0.2
-  ratio_clip_max: 0.2
-  ratio_clip_c: null
-  use_on_policy_kl_approximation: false
-  use_importance_sampling_correction: false
-  token_level_loss: true
 checkpointing:
-  enabled: true
   checkpoint_dir: results/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long
-  metric_name: val_reward
-  higher_is_better: true
-  keep_top_k: 3
-  save_period: 10
 policy:
   model_name: meta-llama/Llama-3.1-8B-Instruct
   tokenizer:
     name: meta-llama/Llama-3.1-8B-Instruct
-  train_global_batch_size: 512
   train_micro_batch_size: 1
-  generation_batch_size: 32
   logprob_batch_size: 2
   max_total_sequence_length: 4096
-  precision: bfloat16
-  dtensor_cfg:
-    enabled: true
-    cpu_offload: false
-    sequence_parallel: false
-    activation_checkpointing: false
-    tensor_parallel_size: 1
-    context_parallel_size: 1
-    custom_parallel_plan: null
   dynamic_batching:
-    enabled: True
-    train_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.train_micro_batch_size}}
-    logprob_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.logprob_batch_size}}
-    sequence_length_round: 64
+    enabled: true
   sequence_packing:
     enabled: false
   make_sequence_length_divisible_by: 1
-  max_grad_norm: 1
   optimizer:
-    name: torch.optim.AdamW
     kwargs:
-      lr: 3e-07
-      weight_decay: 0.01
-      betas:
-        - 0.9
-        - 0.999
-      eps: 1e-08
-      foreach: false
-      fused: false
+      lr: 3.0e-07
   scheduler:
-    - name: torch.optim.lr_scheduler.LinearLR
-      kwargs:
-        start_factor: 0.1
-        end_factor: 1
-        total_iters: 13
-    - name: torch.optim.lr_scheduler.ConstantLR
-      kwargs:
-        factor: 1
-        total_iters: 10000000000
-    - milestones:
-        - 13
+  - name: torch.optim.lr_scheduler.LinearLR
+    kwargs:
+      start_factor: 0.1
+      end_factor: 1
+      total_iters: 13
+  - name: torch.optim.lr_scheduler.ConstantLR
+    kwargs:
+      factor: 1
+      total_iters: 10000000000
+  - milestones:
+    - 13
   generation:
-    backend: vllm
     max_new_tokens: 4096
-    temperature: 1
-    top_p: 1
-    top_k: null
-    stop_token_ids:
-      - 128009
-    stop_strings: null
     vllm_cfg:
-      async_engine: false
-      precision: ${policy.precision}
-      tensor_parallel_size: 1
-      pipeline_parallel_size: 1
-      gpu_memory_utilization: 0.6
       max_model_len: 4096
-      enforce_eager: False
-    colocated:
-      enabled: true
-      resources:
-        gpus_per_node: null
-        num_nodes: null
 data:
   max_input_seq_length: 4096
-  prompt_file: examples/prompts/cot.txt
-  system_prompt_file: null
-  dataset_name: OpenMathInstruct-2
-env:
-  math:
-    num_workers: 8
 logger:
   log_dir: logs/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long
-  num_val_samples_to_print: 0
   wandb_enabled: true
   tensorboard_enabled: true
-  mlflow_enabled: false
-  monitor_gpus: true
   wandb:
     project: nemo-rl
     name: grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long
-  tensorboard: {}
-  gpu_monitoring:
-    collection_interval: 10
-    flush_interval: 10
 cluster:
   gpus_per_node: 8
   num_nodes: 4
diff --git a/examples/configs/recipes/llm/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v3.yaml b/examples/configs/recipes/llm/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v3.yaml
index 4571128a9a..6eb8ed4872 100644
--- a/examples/configs/recipes/llm/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v3.yaml
+++ b/examples/configs/recipes/llm/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v3.yaml
@@ -1,123 +1,29 @@
+defaults: ../../grpo_math_1B.yaml
 grpo:
-  num_prompts_per_step: 32
-  num_generations_per_prompt: 16
-  max_rollout_turns: 1
   max_num_steps: 500
-  normalize_rewards: true
-  use_leave_one_out_baseline: true
-  val_period: 10
-  val_at_start: false
-  max_val_samples: 256
-  val_batch_size: 256
-loss_fn:
-  reference_policy_kl_penalty: 0.01
-  ratio_clip_min: 0.2
-  ratio_clip_max: 0.2
-  ratio_clip_c: null
-  use_on_policy_kl_approximation: false
-  use_importance_sampling_correction: false
-  token_level_loss: true
 checkpointing:
-  enabled: true
   checkpoint_dir: results/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1
-  metric_name: val_reward
-  higher_is_better: true
-  keep_top_k: 3
-  save_period: 10
 policy:
   model_name: meta-llama/Llama-3.2-1B-Instruct
   tokenizer:
     name: meta-llama/Llama-3.2-1B-Instruct
-  train_global_batch_size: 512
-  train_micro_batch_size: 4
-  generation_batch_size: 32
-  logprob_batch_size: 4
-  max_total_sequence_length: 512
-  precision: bfloat16
-  dtensor_cfg:
-    enabled: true
-    cpu_offload: false
-    sequence_parallel: false
-    activation_checkpointing: false
-    tensor_parallel_size: 1
-    context_parallel_size: 1
-    custom_parallel_plan: null
   dynamic_batching:
-    enabled: True
-    train_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.train_micro_batch_size}}
-    logprob_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.logprob_batch_size}}
-    sequence_length_round: 64
+    enabled: true
   sequence_packing:
     enabled: false
   make_sequence_length_divisible_by: 1
-  max_grad_norm: 1
-  optimizer:
-    name: torch.optim.AdamW
-    kwargs:
-      lr: 5e-06
-      weight_decay: 0.01
-      betas:
-        - 0.9
-        - 0.999
-      eps: 1e-08
-      foreach: false
-      fused: false
-  scheduler:
-    - name: torch.optim.lr_scheduler.LinearLR
-      kwargs:
-        start_factor: 0.1
-        end_factor: 1
-        total_iters: 50
-    - name: torch.optim.lr_scheduler.ConstantLR
-      kwargs:
-        factor: 1
-        total_iters: 10000000000
-    - milestones:
-        - 50
   generation:
-    backend: vllm
     max_new_tokens: 512
-    temperature: 1
-    top_p: 1
-    top_k: null
-    stop_token_ids:
-      - 128009
-    stop_strings: null
     vllm_cfg:
-      async_engine: false
-      precision: ${policy.precision}
-      tensor_parallel_size: 1
-      pipeline_parallel_size: 1
-      gpu_memory_utilization: 0.6
       max_model_len: 512
-      enforce_eager: False
-    colocated:
-      enabled: true
-      resources:
-        gpus_per_node: null
-        num_nodes: null
 data:
   max_input_seq_length: 512
-  prompt_file: examples/prompts/cot.txt
-  system_prompt_file: null
-  dataset_name: OpenMathInstruct-2
-env:
-  math:
-    num_workers: 8
 logger:
   log_dir: logs/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1
-  num_val_samples_to_print: 0
   wandb_enabled: true
   tensorboard_enabled: true
-  mlflow_enabled: false
-  monitor_gpus: true
   wandb:
     project: nemo-rl
     name: grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1
-  tensorboard: {}
-  gpu_monitoring:
-    collection_interval: 10
-    flush_interval: 10
 cluster:
   gpus_per_node: 8
-  num_nodes: 1
diff --git a/examples/configs/recipes/llm/grpo-llama3.2-1b-instruct-1n8g-megatron.yaml b/examples/configs/recipes/llm/grpo-llama3.2-1b-instruct-1n8g-megatron.yaml
new file mode 100755
index 0000000000..333a06d980
--- /dev/null
+++ b/examples/configs/recipes/llm/grpo-llama3.2-1b-instruct-1n8g-megatron.yaml
@@ -0,0 +1,34 @@
+defaults: ../../grpo_math_1B.yaml
+grpo:
+  max_num_steps: 500
+checkpointing:
+  enabled: false
+  checkpoint_dir: results/grpo-llama3.2-1b-instruct-1n8g-megatron
+  save_period: 100
+policy:
+  model_name: meta-llama/Llama-3.2-1B-Instruct
+  tokenizer:
+    name: meta-llama/Llama-3.2-1B-Instruct
+  optimizer: null
+  megatron_cfg:
+    enabled: true
+    scheduler:
+      lr_warmup_iters: 50
+  dtensor_cfg:
+    enabled: false
+  make_sequence_length_divisible_by: 1
+  generation:
+    max_new_tokens: 512
+    vllm_cfg:
+      max_model_len: 512
+data:
+  max_input_seq_length: 512
+logger:
+  log_dir: logs/grpo-llama3.2-1b-instruct-1n8g-megatron
+  wandb_enabled: true
+  tensorboard_enabled: true
+  wandb:
+    project: nemo-rl
+    name: grpo-llama3.2-1b-instruct-1n8g-megatron
+cluster:
+  gpus_per_node: 8
diff --git a/examples/configs/recipes/llm/grpo-math-llama-nemotron-super-49b-v.5-4n8g-fsdp2tp8.yaml.disabled b/examples/configs/recipes/llm/grpo-math-llama-nemotron-super-49b-v.5-4n8g-fsdp2tp8.yaml.disabled
new file mode 100644
index 0000000000..574db88263
--- /dev/null
+++ b/examples/configs/recipes/llm/grpo-math-llama-nemotron-super-49b-v.5-4n8g-fsdp2tp8.yaml.disabled
@@ -0,0 +1,47 @@
+defaults: ../../grpo_math_1B.yaml
+grpo:
+  num_prompts_per_step: 128
+policy:
+  model_name: nvidia/Llama-3_3-Nemotron-Super-49B-v1_5
+  tokenizer:
+    name: nvidia/Llama-3_3-Nemotron-Super-49B-v1_5
+  max_total_sequence_length: 1024
+  train_global_batch_size: 128
+  dtensor_cfg:
+    activation_checkpointing: true
+    tensor_parallel_size: 8
+    custom_parallel_plan: examples.configs.recipes.llm.llama_nemotron_super_49b_custom_plan.custom_parallel_plan
+  dynamic_batching:
+    enabled: true
+  sequence_packing:
+    enabled: false
+  optimizer:
+    kwargs:
+      lr: 3.0e-07
+  scheduler:
+  - name: torch.optim.lr_scheduler.LinearLR
+    kwargs:
+      start_factor: 0.1
+      end_factor: 1.0
+      total_iters: 13
+  - name: torch.optim.lr_scheduler.ConstantLR
+    kwargs:
+      factor: 1.0
+      total_iters: 10000000000
+  - milestones:
+    - 13
+  generation:
+    vllm_cfg:
+      tensor_parallel_size: 4
+logger:
+  wandb_enabled: true
+  monitor_gpus: false
+  wandb:
+    project: grpo-nemotron-super-49b
+    name: grpo-${data.dataset_name}-nemotron-super-49b-tp${policy.dtensor_cfg.tensor_parallel_size}
+  mlflow:
+    experiment_name: sft-dev
+    run_name: grpo-nemotron-super-49b
+cluster:
+  gpus_per_node: 8
+  num_nodes: 4
diff --git a/examples/configs/recipes/llm/grpo-math-qwen3-30ba3b-megatron-tp4-32k.yaml b/examples/configs/recipes/llm/grpo-math-qwen3-30ba3b-megatron-tp4-32k.yaml
new file mode 100644
index 0000000000..92fb87c196
--- /dev/null
+++ b/examples/configs/recipes/llm/grpo-math-qwen3-30ba3b-megatron-tp4-32k.yaml
@@ -0,0 +1,52 @@
+defaults: ../../grpo_math_1B.yaml
+checkpointing:
+  checkpoint_dir: results/grpo-math-qwen3-30ba3b-megatron-tp4-32k
+  save_period: 3
+  keep_top_k: 1
+grpo:
+  max_num_steps: 3
+  num_prompts_per_step: 64
+  val_period: 3
+policy:
+  model_name: Qwen/Qwen3-30B-A3B
+  train_micro_batch_size: 1
+  logprob_batch_size: 1
+  max_total_sequence_length: 32768
+  logprob_chunk_size: 2048
+  dtensor_cfg:
+    enabled: false
+  sequence_packing:
+    enabled: false
+  make_sequence_length_divisible_by: ${policy.megatron_cfg.tensor_model_parallel_size}
+  optimizer: null
+  scheduler: null
+  megatron_cfg:
+    enabled: true
+    converter_type: LlamaForCausalLM
+    tensor_model_parallel_size: 4
+    expert_model_parallel_size: 8
+    sequence_parallel: true
+    activation_checkpointing: true
+    defer_fp32_logits: true
+    optimizer:
+      lr: 5.0e-07
+      min_lr: 5.0e-08
+      weight_decay: 0.0
+    scheduler:
+      lr_warmup_iters: 2
+      lr_warmup_init: 5.0e-08
+  generation:
+    vllm_cfg:
+      tensor_parallel_size: 4
+      enforce_eager: true
+logger:
+  log_dir: logs/grpo-math-qwen3-30ba3b-megatron-tp4-32k
+  wandb_enabled: true
+  tensorboard_enabled: true
+  monitor_gpus: false
+  wandb:
+    project: nemo-rl
+    name: grpo-math-qwen3-30ba3b-megatron-tp4-32k
+cluster:
+  gpus_per_node: 8
+  num_nodes: 4
diff --git a/examples/configs/recipes/llm/grpo-moonlight-16ba3b-4n8g-megatron.yaml b/examples/configs/recipes/llm/grpo-moonlight-16ba3b-4n8g-megatron.yaml
new file mode 100644
index 0000000000..e1e38fbbfc
--- /dev/null
+++ b/examples/configs/recipes/llm/grpo-moonlight-16ba3b-4n8g-megatron.yaml
@@ -0,0 +1,40 @@
+defaults: ../../grpo_math_1B.yaml
+grpo:
+  val_period: -1
+loss_fn:
+  reference_policy_kl_penalty: 0.04
+checkpointing:
+  enabled: false
+  checkpoint_dir: results/grpo_megatron
+  save_period: 10000
+policy:
+  model_name: moonshotai/Moonlight-16B-A3B-Instruct
+  train_micro_batch_size: 1
+  generation_batch_size: 64
+  logprob_batch_size: 1
+  max_total_sequence_length: 8192
+  dtensor_cfg:
+    enabled: false
+  sequence_packing:
+    enabled: false
+    algorithm: modified_ffd
+  make_sequence_length_divisible_by: ${policy.megatron_cfg.tensor_model_parallel_size}
+  optimizer: null
+  megatron_cfg:
+    enabled: true
+    expert_model_parallel_size: 4
+    pipeline_model_parallel_size: 4
+    num_layers_in_first_pipeline_stage: 7
+    num_layers_in_last_pipeline_stage: 6
+    apply_rope_fusion: false
+    optimizer:
+      lr: 1.0e-06
+    scheduler:
+      lr_warmup_iters: 50
+logger:
+  monitor_gpus: false
+  wandb:
+    name: grpo-moonlight-16B-A3B-Instruct
+cluster:
+  gpus_per_node: 8
+  num_nodes: 4
diff --git a/examples/configs/recipes/llm/grpo-nano-v2-12b-1n8g-megatron.yaml b/examples/configs/recipes/llm/grpo-nano-v2-12b-1n8g-megatron.yaml
new file mode 100644
index 0000000000..86690abcc2
--- /dev/null
+++ b/examples/configs/recipes/llm/grpo-nano-v2-12b-1n8g-megatron.yaml
@@ -0,0 +1,34 @@
+defaults: ../../grpo_math_1B.yaml
+grpo:
+  max_num_steps: 30
+checkpointing:
+  checkpoint_dir: results/grpo-nano-v2-12b-1n8g-megatron
+policy:
+  model_name: nvidia/NVIDIA-Nemotron-Nano-12B-v2
+  tokenizer:
+    name: nvidia/NVIDIA-Nemotron-Nano-12B-v2
+  optimizer: null
+  megatron_cfg:
+    enabled: true
+    bias_activation_fusion: false
+    tensor_model_parallel_size: 8
+  dtensor_cfg:
+    enabled: false
+  make_sequence_length_divisible_by: 1
+  generation:
+    max_new_tokens: 512
+    vllm_cfg:
+      max_model_len: 512
+  sequence_packing:
+    enabled: false
+data:
+  max_input_seq_length: 512
+logger:
+  log_dir: logs/grpo-nano-v2-12b-1n8g-megatron
+  wandb_enabled: true
+  tensorboard_enabled: true
+  wandb:
+    project: nemo-rl
+    name: grpo-nano-v2-12b-1n8g-megatron
+cluster:
+  gpus_per_node: 8
diff --git a/examples/configs/recipes/llm/grpo-nano-v2-12b-2n8g-fsdp2tp1.yaml b/examples/configs/recipes/llm/grpo-nano-v2-12b-2n8g-fsdp2tp1.yaml
new file mode 100644
index 0000000000..7f77edbb43
--- /dev/null
+++ b/examples/configs/recipes/llm/grpo-nano-v2-12b-2n8g-fsdp2tp1.yaml
@@ -0,0 +1,44 @@
+defaults: ../../grpo_math_1B.yaml
+grpo:
+  max_num_steps: 30
+checkpointing:
+  checkpoint_dir: results/grpo-nano-v2-12b-2n8g-fsdp2tp1
+policy:
+  model_name: nvidia/NVIDIA-Nemotron-Nano-12B-v2
+  tokenizer:
+    name: nvidia/NVIDIA-Nemotron-Nano-12B-v2
+  dtensor_cfg:
+    cpu_offload: true
+    activation_checkpointing: true
+  dynamic_batching:
+    enabled: true
+  sequence_packing:
+    enabled: false
+  make_sequence_length_divisible_by: 1
+  generation:
+    max_new_tokens: 512
+    vllm_cfg:
+      max_model_len: 512
+  scheduler:
+    - name: "torch.optim.lr_scheduler.LinearLR"
+      kwargs:
+        start_factor: 0.1
+        end_factor: 1.0
+        total_iters: 13
+    - name: "torch.optim.lr_scheduler.ConstantLR"
+      kwargs:
+        factor: 1.0
+        total_iters: 10000000000
+    - milestones: [13]
+data:
+  max_input_seq_length: 512
+logger:
+  log_dir: logs/grpo-nano-v2-12b-2n8g-fsdp2tp1
+  wandb_enabled: true
+  tensorboard_enabled: true
+  wandb:
+    project: nemo-rl
+    name: grpo-nano-v2-12b-2n8g-fsdp2tp1
+cluster:
+  gpus_per_node: 8
+  num_nodes: 2
diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8-actckpt-long.v3.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8-actckpt-long.v3.yaml
new file mode 100644
index 0000000000..d8f6fd380b
--- /dev/null
+++ b/examples/configs/recipes/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8-actckpt-long.v3.yaml
@@ -0,0 +1,54 @@
+defaults: ../../grpo_math_1B.yaml
+grpo:
+  num_prompts_per_step: 64
+  num_generations_per_prompt: 32
+  max_num_steps: 20
+checkpointing:
+  checkpoint_dir: results/grpo-qwen2.5-32b-32n8g-fsdp2tp8-actckpt-long
+policy:
+  model_name: Qwen/Qwen2.5-32B
+  tokenizer:
+    name: Qwen/Qwen2.5-32B
+  train_micro_batch_size: 1
+  logprob_batch_size: 2
+  max_total_sequence_length: 16384
+  dtensor_cfg:
+    activation_checkpointing: true
+    tensor_parallel_size: 8
+  dynamic_batching:
+    enabled: true
+  sequence_packing:
+    enabled: false
+  make_sequence_length_divisible_by: 8
+  optimizer:
+    kwargs:
+      lr: 3.0e-07
+  scheduler:
+  - name: torch.optim.lr_scheduler.LinearLR
+    kwargs:
+      start_factor: 0.1
+      end_factor: 1
+      total_iters: 13
+  - name: torch.optim.lr_scheduler.ConstantLR
+    kwargs:
+      factor: 1
+      total_iters: 10000000000
+  - milestones:
+    - 13
+  generation:
+    max_new_tokens: 16384
+    vllm_cfg:
+      tensor_parallel_size: 4
+      max_model_len: 16384
+data:
+  max_input_seq_length: 16384
+logger:
+  log_dir: logs/grpo-qwen2.5-32b-32n8g-fsdp2tp8-actckpt-long
+  wandb_enabled: true
+  tensorboard_enabled: true
+  wandb:
+    project: nemo-rl
+    name: grpo-qwen2.5-32b-32n8g-fsdp2tp8-actckpt-long
+cluster:
+  gpus_per_node: 8
+  num_nodes: 32
diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8-actckpt.v3.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8-actckpt.v3.yaml
new file mode 100644
index 0000000000..665068a4eb
--- /dev/null
+++ b/examples/configs/recipes/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8-actckpt.v3.yaml
@@ -0,0 +1,54 @@
+defaults: ../../grpo_math_1B.yaml
+grpo:
+  num_prompts_per_step: 64
+  num_generations_per_prompt: 32
+  max_num_steps: 2
+checkpointing:
+  checkpoint_dir: results/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt
+policy:
+  model_name: Qwen/Qwen2.5-32B
+  tokenizer:
+    name: Qwen/Qwen2.5-32B
+  train_micro_batch_size: 1
+  logprob_batch_size: 2
+  max_total_sequence_length: 16384
+  dtensor_cfg:
+    activation_checkpointing: true
+    tensor_parallel_size: 8
+  dynamic_batching:
+    enabled: true
+  sequence_packing:
+    enabled: false
+  make_sequence_length_divisible_by: 8
+  optimizer:
+    kwargs:
+      lr: 3.0e-07
+  scheduler:
+  - name: torch.optim.lr_scheduler.LinearLR
+    kwargs:
+      start_factor: 0.1
+      end_factor: 1
+      total_iters: 13
+  - name: torch.optim.lr_scheduler.ConstantLR
+    kwargs:
+      factor: 1
+      total_iters: 10000000000
+  - milestones:
+    - 13
+  generation:
+    max_new_tokens: 16384
+    vllm_cfg:
+      tensor_parallel_size: 4
+      max_model_len: 16384
+data:
+  max_input_seq_length: 16384
+logger:
+  log_dir: logs/grpo-qwen2.5-32b-32n8g-fsdp2tp8-actckpt
+  wandb_enabled: true
+  tensorboard_enabled: true
+  wandb:
+    project: nemo-rl
+    name: grpo-qwen2.5-32b-32n8g-fsdp2tp8-actckpt
+cluster:
+  gpus_per_node: 8
+  num_nodes: 32
diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt-long.v3.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt-long.v3.yaml
deleted file mode 100644
index 33ecfae6a4..0000000000
--- a/examples/configs/recipes/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt-long.v3.yaml
+++ /dev/null
@@ -1,123 +0,0 @@
-grpo:
-  num_prompts_per_step: 64
-  num_generations_per_prompt: 32
-  max_rollout_turns: 1
-  max_num_steps: 20
-  normalize_rewards: true
-  use_leave_one_out_baseline: true
-  val_period: 10
-  val_at_start: false
-  max_val_samples: 256
-  val_batch_size: 256
-loss_fn:
-  reference_policy_kl_penalty: 0.01
-  ratio_clip_min: 0.2
-  ratio_clip_max: 0.2
-  ratio_clip_c: null
-  use_on_policy_kl_approximation: false
-  use_importance_sampling_correction: false
-  token_level_loss: true
-checkpointing:
-  enabled: true
-  checkpoint_dir: results/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt-long
-  metric_name: val_reward
-  higher_is_better: true
-  keep_top_k: 3
-  save_period: 10
-policy:
-  model_name: Qwen/Qwen2.5-32B
-  tokenizer:
-    name: Qwen/Qwen2.5-32B
-  train_global_batch_size: 512
-  train_micro_batch_size: 1
-  generation_batch_size: 32
-  logprob_batch_size: 2
-  max_total_sequence_length: 16384
-  precision: bfloat16
-  dtensor_cfg:
-    enabled: true
-    cpu_offload: false
-    sequence_parallel: true
-    activation_checkpointing: true
-    tensor_parallel_size: 8
-    context_parallel_size: 1
-    custom_parallel_plan: null
-  dynamic_batching:
-    enabled: True
-    train_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.train_micro_batch_size}}
-    logprob_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.logprob_batch_size}}
-    sequence_length_round: 64
-  sequence_packing:
-    enabled: false
-  make_sequence_length_divisible_by: 8
-  max_grad_norm: 1
-  optimizer:
-    name: torch.optim.AdamW
-    kwargs:
-      lr: 3e-07
-      weight_decay: 0.01
-      betas:
-        - 0.9
-        - 0.999
-      eps: 1e-08
-      foreach: false
-      fused: false
-  scheduler:
-    - name: torch.optim.lr_scheduler.LinearLR
-      kwargs:
-        start_factor: 0.1
-        end_factor: 1
-        total_iters: 13
-    - name: torch.optim.lr_scheduler.ConstantLR
-      kwargs:
-        factor: 1
-        total_iters: 10000000000
-    - milestones:
-        - 13
-  generation:
-    backend: vllm
-    max_new_tokens: 16384
-    temperature: 1
-    top_p: 1
-    top_k: null
-    stop_token_ids:
-      - 151643
-    stop_strings: null
-    vllm_cfg:
-      async_engine: false
-      precision: ${policy.precision}
-      tensor_parallel_size: 4
-      pipeline_parallel_size: 1
-      gpu_memory_utilization: 0.6
-      max_model_len: 16384
-      enforce_eager: False
-    colocated:
-      enabled: true
-      resources:
-        gpus_per_node: null
-        num_nodes: null
-data:
-  max_input_seq_length: 16384
-  prompt_file: examples/prompts/cot.txt
-  system_prompt_file: null
-  dataset_name: OpenMathInstruct-2
-env:
-  math:
-    num_workers: 8
-logger:
-  log_dir: logs/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt-long
-  num_val_samples_to_print: 0
-  wandb_enabled: true
-  tensorboard_enabled: true
-  mlflow_enabled: false
-  monitor_gpus: true
-  wandb:
-    project: nemo-rl
-    name: grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt-long
-  tensorboard: {}
-  gpu_monitoring:
-    collection_interval: 10
-    flush_interval: 10
-cluster:
-  gpus_per_node: 8
-  num_nodes: 32
diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt.v3.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt.v3.yaml
deleted file mode 100644
index ea862ee9d3..0000000000
--- a/examples/configs/recipes/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt.v3.yaml
+++ /dev/null
@@ -1,123 +0,0 @@
-grpo:
-  num_prompts_per_step: 64
-  num_generations_per_prompt: 32
-  max_rollout_turns: 1
-  max_num_steps: 2
-  normalize_rewards: true
-  use_leave_one_out_baseline: true
-  val_period: 10
-  val_at_start: false
-  max_val_samples: 256
-  val_batch_size: 256
-loss_fn:
-  reference_policy_kl_penalty: 0.01
-  ratio_clip_min: 0.2
-  ratio_clip_max: 0.2
-  ratio_clip_c: null
-  use_on_policy_kl_approximation: false
-  use_importance_sampling_correction: false
-  token_level_loss: true
-checkpointing:
-  enabled: true
-  checkpoint_dir: results/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt
-  metric_name: val_reward
-  higher_is_better: true
-  keep_top_k: 3
-  save_period: 10
-policy:
-  model_name: Qwen/Qwen2.5-32B
-  tokenizer:
-    name: Qwen/Qwen2.5-32B
-  train_global_batch_size: 512
-  train_micro_batch_size: 1
-  generation_batch_size: 32
-  logprob_batch_size: 2
-  max_total_sequence_length: 16384
-  precision: bfloat16
-  dtensor_cfg:
-    enabled: true
-    cpu_offload: false
-    sequence_parallel: true
-    activation_checkpointing: true
-    tensor_parallel_size: 8
-    context_parallel_size: 1
-    custom_parallel_plan: null
-  dynamic_batching:
-    enabled: True
-    train_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.train_micro_batch_size}}
-    logprob_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.logprob_batch_size}}
-    sequence_length_round: 64
-  sequence_packing:
-    enabled: false
-  make_sequence_length_divisible_by: 8
-  max_grad_norm: 1
-  optimizer:
-    name: torch.optim.AdamW
-    kwargs:
-      lr: 3e-07
-      weight_decay: 0.01
-      betas:
-        - 0.9
-        - 0.999
-      eps: 1e-08
-      foreach: false
-      fused: false
-  scheduler:
-    - name: torch.optim.lr_scheduler.LinearLR
-      kwargs:
-        start_factor: 0.1
-        end_factor: 1
-        total_iters: 13
-    - name: torch.optim.lr_scheduler.ConstantLR
-      kwargs:
-        factor: 1
-        total_iters: 10000000000
-    - milestones:
-        - 13
-  generation:
-    backend: vllm
-    max_new_tokens: 16384
-    temperature: 1
-    top_p: 1
-    top_k: null
-    stop_token_ids:
-      - 151643
-    stop_strings: null
-    vllm_cfg:
-      async_engine: false
-      precision: ${policy.precision}
-      tensor_parallel_size: 4
-      pipeline_parallel_size: 1
-      gpu_memory_utilization: 0.6
-      max_model_len: 16384
-      enforce_eager: False
-    colocated:
-      enabled: true
-      resources:
-        gpus_per_node: null
-        num_nodes: null
-data:
-  max_input_seq_length: 16384
-  prompt_file: examples/prompts/cot.txt
-  system_prompt_file: null
-  dataset_name: OpenMathInstruct-2
-env:
-  math:
-    num_workers: 8
-logger:
-  log_dir: logs/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt
-  num_val_samples_to_print: 0
-  wandb_enabled: true
-  tensorboard_enabled: true
-  mlflow_enabled: false
-  monitor_gpus: true
-  wandb:
-    project: nemo-rl
-    name: grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt
-  tensorboard: {}
-  gpu_monitoring:
-    collection_interval: 10
-    flush_interval: 10
-cluster:
-  gpus_per_node: 8
-  num_nodes: 32
diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4.v3.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4.v3.yaml
new file mode 100644
index 0000000000..1702df0b94
--- /dev/null
+++ b/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4.v3.yaml
@@ -0,0 +1,53 @@
+defaults: ../../grpo_math_1B.yaml
+grpo:
+  num_prompts_per_step: 64
+  num_generations_per_prompt: 32
+  max_num_steps: 30
+checkpointing:
+  checkpoint_dir: results/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp
+policy:
+  model_name: Qwen/Qwen2.5-7B-Instruct
+  tokenizer:
+    name: Qwen/Qwen2.5-7B-Instruct
+  train_micro_batch_size: 1
+  logprob_batch_size: 2
+  max_total_sequence_length: 4096
+  dtensor_cfg:
+    tensor_parallel_size: 4
+  dynamic_batching:
+    enabled: true
+  sequence_packing:
+    enabled: false
+  make_sequence_length_divisible_by: 4
+  optimizer:
+    kwargs:
+      lr: 3.0e-07
+  scheduler:
+  - name: torch.optim.lr_scheduler.LinearLR
+    kwargs:
+      start_factor: 0.1
+      end_factor: 1
+      total_iters: 13
+  - name: torch.optim.lr_scheduler.ConstantLR
+    kwargs:
+      factor: 1
+      total_iters: 10000000000
+  - milestones:
+    - 13
+  generation:
+    max_new_tokens: 4096
+    vllm_cfg:
+      tensor_parallel_size: 4
+      max_model_len: 4096
+data:
+  max_input_seq_length: 4096
+logger:
+  log_dir: logs/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4
+  wandb_enabled: true
+  tensorboard_enabled: true
+  wandb:
+    project: nemo-rl
+    name: grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4
+cluster:
+  gpus_per_node: 8
+  num_nodes: 4
diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.v3.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.v3.yaml
deleted file mode 100644
index 0961b8f2c7..0000000000
--- a/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.v3.yaml
+++ /dev/null
@@ -1,123 +0,0 @@
-grpo:
-  num_prompts_per_step: 64
-  num_generations_per_prompt: 32
-  max_rollout_turns: 1
-  max_num_steps: 30
-  normalize_rewards: true
-  use_leave_one_out_baseline: true
-  val_period: 10
-  val_at_start: false
-  max_val_samples: 256
-  val_batch_size: 256
-loss_fn:
-  reference_policy_kl_penalty: 0.01
-  ratio_clip_min: 0.2
-  ratio_clip_max: 0.2
-  ratio_clip_c: null
-  use_on_policy_kl_approximation: false
-  use_importance_sampling_correction: false
-  token_level_loss: true
-checkpointing:
-  enabled: true
-  checkpoint_dir: results/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp
-  metric_name: val_reward
-  higher_is_better: true
-  keep_top_k: 3
-  save_period: 10
-policy:
-  model_name: Qwen/Qwen2.5-7B-Instruct
-  tokenizer:
-    name: Qwen/Qwen2.5-7B-Instruct
-  train_global_batch_size: 512
-  train_micro_batch_size: 1
-  generation_batch_size: 32
-  logprob_batch_size: 2
-  max_total_sequence_length: 4096
-  precision: bfloat16
-  dtensor_cfg:
-    enabled: true
-    cpu_offload: false
-    sequence_parallel: true
-    activation_checkpointing: false
-    tensor_parallel_size: 4
-    context_parallel_size: 1
-    custom_parallel_plan: null
-  dynamic_batching:
-    enabled: True
-    train_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.train_micro_batch_size}}
-    logprob_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.logprob_batch_size}}
-    sequence_length_round: 64
-  sequence_packing:
-    enabled: false
-  make_sequence_length_divisible_by: 4
-  max_grad_norm: 1
-  optimizer:
-    name: torch.optim.AdamW
-    kwargs:
-      lr: 3e-07
-      weight_decay: 0.01
-      betas:
-        - 0.9
-        - 0.999
-      eps: 1e-08
-      foreach: false
-      fused: false
-  scheduler:
-    - name: torch.optim.lr_scheduler.LinearLR
-      kwargs:
-        start_factor: 0.1
-        end_factor: 1
-        total_iters: 13
-    - name: torch.optim.lr_scheduler.ConstantLR
-      kwargs:
-        factor: 1
-        total_iters: 10000000000
-    - milestones:
-        - 13
-  generation:
-    backend: vllm
-    max_new_tokens: 4096
-    temperature: 1
-    top_p: 1
-    top_k: null
-    stop_token_ids:
-      - 151645
-    stop_strings: null
-    vllm_cfg:
-      async_engine: false
-      precision: ${policy.precision}
-      tensor_parallel_size: 4
-      pipeline_parallel_size: 1
-      gpu_memory_utilization: 0.6
-      max_model_len: 4096
-      enforce_eager: False
-    colocated:
-      enabled: true
-      resources:
-        gpus_per_node: null
-        num_nodes: null
-data:
-  max_input_seq_length: 4096
-  prompt_file: examples/prompts/cot.txt
-  system_prompt_file: null
-  dataset_name: OpenMathInstruct-2
-env:
-  math:
-    num_workers: 8
-logger:
-  log_dir: logs/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp
-  num_val_samples_to_print: 0
-  wandb_enabled: true
-  tensorboard_enabled: true
-  mlflow_enabled: false
-  monitor_gpus: true
-  wandb:
-    project: nemo-rl
-    name: grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp
-  tensorboard: {}
-  gpu_monitoring:
-    collection_interval: 10
-    flush_interval: 10
-cluster:
-  gpus_per_node: 8
-  num_nodes: 4
diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-megatron.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-megatron.yaml
new file mode 100755
index 0000000000..fd0a48a663
--- /dev/null
+++ b/examples/configs/recipes/llm/grpo-qwen2.5-7b-instruct-4n8g-megatron.yaml
@@ -0,0 +1,54 @@
+defaults: ../../grpo_math_1B.yaml
+grpo:
+  num_prompts_per_step: 64
+  num_generations_per_prompt: 32
+  max_num_steps: 30
+checkpointing:
+  enabled: false
+  checkpoint_dir: results/grpo-qwen2.5-7b-instruct-4n8g-megatron
+  save_period: 100
+policy:
+  model_name: Qwen/Qwen2.5-7B-Instruct
+  train_micro_batch_size: 1
+  logprob_batch_size: 2
+  max_total_sequence_length: 4096
+  dtensor_cfg:
+    enabled: false
+  megatron_cfg:
+    enabled: true
+    tensor_model_parallel_size: 2
+    scheduler:
+      lr_warmup_iters: 50
+  make_sequence_length_divisible_by: 4
+  optimizer:
+    kwargs:
+      lr: 3.0e-07
+  scheduler:
+  - name: torch.optim.lr_scheduler.LinearLR
+    kwargs:
+      start_factor: 0.1
+      end_factor: 1
+      total_iters: 13
+  - name: torch.optim.lr_scheduler.ConstantLR
+    kwargs:
+      factor: 1
+      total_iters: 10000000000
+  - milestones:
+    - 13
+  generation:
+    max_new_tokens: 4096
+    vllm_cfg:
+      tensor_parallel_size: 4
+      max_model_len: 4096
+data:
+  max_input_seq_length: 4096
+logger:
+  log_dir: logs/grpo-qwen2.5-7b-instruct-4n8g-megatron
+  wandb_enabled: true
+  tensorboard_enabled: true
+  wandb:
+    project: nemo-rl
+    name: grpo-qwen2.5-7b-instruct-4n8g-megatron
+cluster:
+  gpus_per_node: 8
+  num_nodes: 4
diff --git a/examples/configs/recipes/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.v3.yaml b/examples/configs/recipes/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.v3.yaml
index 978832bad0..bb62bf99ef 100644
--- a/examples/configs/recipes/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.v3.yaml
+++ b/examples/configs/recipes/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.v3.yaml
@@ -1,123 +1,29 @@
+defaults: ../../grpo_math_1B.yaml
 grpo:
-  num_prompts_per_step: 32
-  num_generations_per_prompt: 16
-  max_rollout_turns: 1
   max_num_steps: 450
-  normalize_rewards: true
-  use_leave_one_out_baseline: true
-  val_period: 10
-  val_at_start: false
-  max_val_samples: 256
-  val_batch_size: 256
-loss_fn:
-  reference_policy_kl_penalty: 0.01
-  ratio_clip_min: 0.2
-  ratio_clip_max: 0.2
-  ratio_clip_c: null
-  use_on_policy_kl_approximation: false
-  use_importance_sampling_correction: false
-  token_level_loss: true
 checkpointing:
-  enabled: true
   checkpoint_dir: results/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1
-  metric_name: val_reward
-  higher_is_better: true
-  keep_top_k: 3
-  save_period: 10
 policy:
   model_name: Qwen/Qwen2.5-Math-1.5B-Instruct
   tokenizer:
     name: Qwen/Qwen2.5-Math-1.5B-Instruct
-  train_global_batch_size: 512
-  train_micro_batch_size: 4
-  generation_batch_size: 32
-  logprob_batch_size: 4
-  max_total_sequence_length: 512
-  precision: bfloat16
-  dtensor_cfg:
-    enabled: true
-    cpu_offload: false
-    sequence_parallel: false
-    activation_checkpointing: false
-    tensor_parallel_size: 1
-    context_parallel_size: 1
-    custom_parallel_plan: null
   dynamic_batching:
-    enabled: True
-    train_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.train_micro_batch_size}}
-    logprob_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.logprob_batch_size}}
-    sequence_length_round: 64
+    enabled: true
   sequence_packing:
     enabled: false
   make_sequence_length_divisible_by: 1
-  max_grad_norm: 1
-  optimizer:
-    name: torch.optim.AdamW
-    kwargs:
-      lr: 5e-06
-      weight_decay: 0.01
-      betas:
-        - 0.9
-        - 0.999
-      eps: 1e-08
-      foreach: false
-      fused: false
-  scheduler:
-    - name: torch.optim.lr_scheduler.LinearLR
-      kwargs:
-        start_factor: 0.1
-        end_factor: 1
-        total_iters: 50
-    - name: torch.optim.lr_scheduler.ConstantLR
-      kwargs:
-        factor: 1
-        total_iters: 10000000000
-    - milestones:
-        - 50
   generation:
-    backend: vllm
     max_new_tokens: 512
-    temperature: 1
-    top_p: 1
-    top_k: null
-    stop_token_ids:
-      - 151645
-    stop_strings: null
     vllm_cfg:
-      async_engine: false
-      precision: ${policy.precision}
-      tensor_parallel_size: 1
-      pipeline_parallel_size: 1
-      gpu_memory_utilization: 0.6
       max_model_len: 512
-      enforce_eager: False
-    colocated:
-      enabled: true
-      resources:
-        gpus_per_node: null
-        num_nodes: null
 data:
   max_input_seq_length: 512
-  prompt_file: examples/prompts/cot.txt
-  system_prompt_file: null
-  dataset_name: OpenMathInstruct-2
-env:
-  math:
-    num_workers: 8
 logger:
   log_dir: logs/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1
-  num_val_samples_to_print: 0
   wandb_enabled: true
   tensorboard_enabled: true
-  mlflow_enabled: false
-  monitor_gpus: true
   wandb:
     project: nemo-rl
     name: grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1
-  tensorboard: {}
-  gpu_monitoring:
-    collection_interval: 10
-    flush_interval: 10
 cluster:
   gpus_per_node: 8
-  num_nodes: 1
diff --git a/examples/configs/recipes/llm/grpo-qwen3-30ba3b-8n8g-megatron.yaml b/examples/configs/recipes/llm/grpo-qwen3-30ba3b-8n8g-megatron.yaml
new file mode 100755
index 0000000000..6e0aa5cd81
--- /dev/null
+++ b/examples/configs/recipes/llm/grpo-qwen3-30ba3b-8n8g-megatron.yaml
@@ -0,0 +1,47 @@
+defaults: ../../grpo_math_1B.yaml
+grpo:
+  num_prompts_per_step: 64
+  num_generations_per_prompt: 32
+checkpointing:
+  enabled: false
+  checkpoint_dir: results/grpo-qwen3-30ba3b-8n8g-megatron
+policy:
+  model_name: Qwen/Qwen3-30B-A3B
+  train_micro_batch_size: 1
+  max_total_sequence_length: 4096
+  dtensor_cfg:
+    enabled: false
+  optimizer: null
+  scheduler: null
+  sequence_packing:
+    enabled: false
+    algorithm: modified_ffd
+  make_sequence_length_divisible_by: ${policy.megatron_cfg.tensor_model_parallel_size}
+  megatron_cfg:
+    enabled: true
+    tensor_model_parallel_size: 4
+    pipeline_model_parallel_size: 4
+    expert_model_parallel_size: 4
+    sequence_parallel: true
+    optimizer:
+      lr: 3.0e-07
+      min_lr: 3.0e-08
+    scheduler:
+      lr_warmup_iters: 50
+      lr_warmup_init: 3.0e-08
+    env_vars:
+      PYTORCH_CUDA_ALLOC_CONF: expandable_segments:False
+  generation:
+    vllm_cfg:
+      tensor_parallel_size: 4
+      gpu_memory_utilization: 0.7
+logger:
+  log_dir: logs/grpo-qwen3-30ba3b-8n8g-megatron
+  wandb_enabled: true
+  tensorboard_enabled: true
+  wandb:
+    project: nemo-rl
+    name: grpo-qwen3-30ba3b-8n8g-megatron
+cluster:
+  gpus_per_node: 8
+  num_nodes: 8
diff --git a/examples/configs/recipes/llm/llama_nemotron_super_49b_custom_plan.py b/examples/configs/recipes/llm/llama_nemotron_super_49b_custom_plan.py
new file mode 100644
index 0000000000..a0381adf9c
--- /dev/null
+++ b/examples/configs/recipes/llm/llama_nemotron_super_49b_custom_plan.py
@@ -0,0 +1,49 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from torch.distributed.tensor.parallel import (
+    ColwiseParallel,
+    ParallelStyle,
+    PrepareModuleInput,
+    PrepareModuleOutput,
+    RowwiseParallel,
+)
+from torch.distributed.tensor.placement_types import Replicate, Shard
+
+custom_parallel_plan: dict[str, ParallelStyle] = {
+    "model.layers.*.self_attn": PrepareModuleInput(
+        input_kwarg_layouts={"attention_mask": Replicate()},
+        desired_input_kwarg_layouts={"attention_mask": Replicate()},
+    ),
+    "model.embed_tokens": RowwiseParallel(
+        input_layouts=Replicate(), output_layouts=Replicate(), use_local_output=True
+    ),
+    "model.layers.*.self_attn.q_proj": ColwiseParallel(use_local_output=False),
+    "model.layers.*.self_attn.k_proj": ColwiseParallel(use_local_output=False),
+    "model.layers.*.self_attn.v_proj": ColwiseParallel(use_local_output=False),
+    "model.layers.*.self_attn.o_proj": RowwiseParallel(
+        output_layouts=Replicate(), use_local_output=True
+    ),
+    "model.layers.*.self_attn.rotary_emb": PrepareModuleOutput(
+        output_layouts=(Replicate(), Replicate()),
+        desired_output_layouts=(Replicate(), Replicate()),
+        use_local_output=False,
+    ),
+    "model.layers.*.mlp.up_proj": ColwiseParallel(),
+    "model.layers.*.mlp.gate_proj": ColwiseParallel(),
+    "model.layers.*.mlp.down_proj": RowwiseParallel(
+        output_layouts=Replicate(), use_local_output=True
+    ),
+    "lm_head": ColwiseParallel(output_layouts=Shard(-1), use_local_output=False),
+}
diff --git a/examples/configs/recipes/llm/performance/grpo-deepseek-v3-32n8g.yaml b/examples/configs/recipes/llm/performance/grpo-deepseek-v3-32n8g.yaml
new file mode 100644
index 0000000000..75457ab802
--- /dev/null
+++ b/examples/configs/recipes/llm/performance/grpo-deepseek-v3-32n8g.yaml
@@ -0,0 +1,57 @@
+defaults: ../../../grpo_math_1B.yaml
+grpo:
+  num_prompts_per_step: 32
+  num_generations_per_prompt: 16
+  max_num_steps: 500
+  val_batch_size: 5
+  max_val_samples: 16
+loss_fn:
+  use_importance_sampling_correction: true
+checkpointing:
+  checkpoint_dir: results/grpo-deepseek-v3-32n8g
+policy:
+  model_name: unsloth/DeepSeek-V3-0324-BF16
+  tokenizer:
+    name: unsloth/DeepSeek-V3-0324-BF16
+  train_micro_batch_size: 1
+  logprob_batch_size: 1
+  max_total_sequence_length: 1536
+  make_sequence_length_divisible_by: 1
+  dtensor_cfg:
+    enabled: false
+  megatron_cfg:
+    enabled: true
+    empty_unused_memory_level: 1
+    converter_type: LlamaForCausalLM
+    pipeline_model_parallel_size: 16
+    expert_model_parallel_size: 16
+    activation_checkpointing: true
+    num_layers_in_first_pipeline_stage: 3
+    num_layers_in_last_pipeline_stage: 2
+    apply_rope_fusion: false
+    moe_permute_fusion: true
+    defer_fp32_logits: true
+    optimizer:
+      lr: 5.0e-07
+      min_lr: 5.0e-08
+      weight_decay: 0.0
+      use_precision_aware_optimizer: true
+    scheduler:
+      lr_warmup_iters: 2
+      lr_warmup_init: 5.0e-08
+    fp8_cfg:
+      enabled: false
+  generation:
+    vllm_cfg:
+      tensor_parallel_size: 32
+      async_engine: true
+logger:
+  log_dir: logs/grpo-deepseek-v3-32n8g
+  wandb_enabled: true
+  tensorboard_enabled: true
+  wandb:
+    project: nemo-rl
+    name: grpo-deepseek-v3-32n8g
+cluster:
+  gpus_per_node: 8
+  num_nodes: 32
diff --git a/examples/configs/recipes/llm/performance/grpo-deepseek-v3-64n8g-async-1off.yaml b/examples/configs/recipes/llm/performance/grpo-deepseek-v3-64n8g-async-1off.yaml
new file mode 100644
index 0000000000..595654a3a3
--- /dev/null
+++ b/examples/configs/recipes/llm/performance/grpo-deepseek-v3-64n8g-async-1off.yaml
@@ -0,0 +1,33 @@
+defaults: ./grpo-deepseek-v3-32n8g.yaml
+grpo:
+  async_grpo:
+    enabled: true
+    max_trajectory_age_steps: 1
+    in_flight_weight_updates: true
+loss_fn:
+  use_importance_sampling_correction: true
+checkpointing:
+  checkpoint_dir: results/grpo-deepseek-v3-64n8g-async-1off
+policy:
+  logprob_batch_size: 2
+  megatron_cfg:
+    tensor_model_parallel_size: 1
+    pipeline_model_parallel_size: 16
+    expert_model_parallel_size: 16
+  generation:
+    colocated:
+      enabled: false
+      resources:
+        num_nodes: 32
+        gpus_per_node: 8
+    vllm_cfg:
+      tensor_parallel_size: 32
+      gpu_memory_utilization: 0.8
+      async_engine: true
+logger:
+  log_dir: logs/grpo-deepseek-v3-64n8g-async-32T32G-1off
+  wandb:
+    name: grpo-deepseek-v3-64n8g-async-32T32G-1off
+cluster:
+  gpus_per_node: 8
+  num_nodes: 64
diff --git a/examples/configs/recipes/llm/performance/grpo-llama3.1-8b-instruct-2n8g-async-1off.yaml b/examples/configs/recipes/llm/performance/grpo-llama3.1-8b-instruct-2n8g-async-1off.yaml
new file mode 100644
index 0000000000..b6d7ed441d
--- /dev/null
+++ b/examples/configs/recipes/llm/performance/grpo-llama3.1-8b-instruct-2n8g-async-1off.yaml
@@ -0,0 +1,27 @@
+defaults: ./grpo-llama3.1-8b-instruct-2n8g.yaml
+grpo:
+  async_grpo:
+    enabled: true
+    max_trajectory_age_steps: 1
+    in_flight_weight_updates: true
+loss_fn:
+  use_importance_sampling_correction: true
+checkpointing:
+  checkpoint_dir: results/grpo-llama3.1-8b-instruct-2n8g-async-1off
+policy:
+  generation:
+    colocated:
+      enabled: false
+      resources:
+        num_nodes: 1
+        gpus_per_node: 8
+    vllm_cfg:
+      async_engine: true
+      gpu_memory_utilization: 0.8
+logger:
+  log_dir: logs/grpo-llama3.1-8b-instruct-2n8g-1T1G-async-1off
+  wandb:
+    name: grpo-llama3.1-8b-instruct-2n8g-1T1G-async-1off
+cluster:
+  gpus_per_node: 8
+  num_nodes: 2
\ No newline at end of file
diff --git a/examples/configs/recipes/llm/performance/grpo-llama3.1-8b-instruct-2n8g.yaml b/examples/configs/recipes/llm/performance/grpo-llama3.1-8b-instruct-2n8g.yaml
new file mode 100644
index 0000000000..afdbf8c414
--- /dev/null
+++ b/examples/configs/recipes/llm/performance/grpo-llama3.1-8b-instruct-2n8g.yaml
@@ -0,0 +1,54 @@
+defaults: ../../../grpo_math_1B.yaml
+grpo:
+  num_prompts_per_step: 64
+  num_generations_per_prompt: 32
+  max_num_steps: 500
+loss_fn:
+  use_importance_sampling_correction: true
+checkpointing:
+  checkpoint_dir: results/grpo-llama3.1-8b-instruct-2n8g
+policy:
+  model_name: meta-llama/Llama-3.1-8B-Instruct
+  tokenizer:
+    name: meta-llama/Llama-3.1-8B-Instruct
+  train_micro_batch_size: 1
+  logprob_batch_size: 2
+  max_total_sequence_length: 4096
+  make_sequence_length_divisible_by: 1
+  dtensor_cfg:
+    enabled: false
+  megatron_cfg:
+    enabled: true
+    empty_unused_memory_level: 1
+    converter_type: LlamaForCausalLM
+    pipeline_model_parallel_size: 2
+    activation_checkpointing: true
+    defer_fp32_logits: true
+    optimizer:
+      lr: 5.0e-07
+      min_lr: 5.0e-08
+      weight_decay: 0.0
+      use_precision_aware_optimizer: true
+    scheduler:
+      lr_warmup_iters: 2
+      lr_warmup_init: 5.0e-08
+    fp8_cfg:
+      enabled: false
+  generation:
+    max_new_tokens: 4096
+    stop_token_ids:
+    - 128009
+    vllm_cfg:
+      max_model_len: 4096
+data:
+  max_input_seq_length: 4096
+logger:
+  log_dir: logs/grpo-llama3.1-8b-instruct-2n8g
+  wandb_enabled: true
+  tensorboard_enabled: true
+  wandb:
+    project: nemo-rl
+    name: grpo-llama3.1-8b-instruct-2n8g
+cluster:
+  gpus_per_node: 8
+  num_nodes: 2
\ No newline at end of file
diff --git a/examples/configs/recipes/llm/performance/grpo-qwen3-235b-16n8g.yaml b/examples/configs/recipes/llm/performance/grpo-qwen3-235b-16n8g.yaml
new file mode 100644
index 0000000000..1376c8d340
--- /dev/null
+++ b/examples/configs/recipes/llm/performance/grpo-qwen3-235b-16n8g.yaml
@@ -0,0 +1,59 @@
+defaults: ../../../grpo_math_1B.yaml
+grpo:
+  num_prompts_per_step: 16
+  num_generations_per_prompt: 32
+  max_num_steps: 500
+  val_batch_size: 5
+  max_val_samples: 16
+loss_fn:
+  use_importance_sampling_correction: true
+checkpointing:
+  checkpoint_dir: results/grpo-qwen3-235b-16n8g
+policy:
+  model_name: Qwen/Qwen3-235B-A22B
+  tokenizer:
+    name: Qwen/Qwen3-235B-A22B
+  train_micro_batch_size: 1
+  logprob_batch_size: 1
+  max_total_sequence_length: 8192
+  make_sequence_length_divisible_by: 1
+  dtensor_cfg:
+    enabled: false
+  megatron_cfg:
+    enabled: true
+    empty_unused_memory_level: 1
+    converter_type: LlamaForCausalLM
+    tensor_model_parallel_size: 2
+    sequence_parallel: true
+    pipeline_model_parallel_size: 8
+    context_parallel_size: 2
+    expert_model_parallel_size: 16
+    activation_checkpointing: true
+    num_layers_in_first_pipeline_stage: 11
+    num_layers_in_last_pipeline_stage: 11
+    moe_permute_fusion: true
+    defer_fp32_logits: true
+    optimizer:
+      lr: 5.0e-07
+      min_lr: 5.0e-08
+      weight_decay: 0.0
+      use_precision_aware_optimizer: true
+    scheduler:
+      lr_warmup_iters: 2
+      lr_warmup_init: 5.0e-08
+    fp8_cfg:
+      enabled: false
+  generation:
+    vllm_cfg:
+      tensor_parallel_size: 16
+      async_engine: true
+logger:
+  log_dir: logs/grpo-qwen3-235b-16n8g
+  wandb_enabled: true
+  tensorboard_enabled: false # to avoid a bug
+  wandb:
+    project: nemo-rl
+    name: grpo-qwen3-235b-16n8g
+cluster:
+  gpus_per_node: 8
+  num_nodes: 16
diff --git a/examples/configs/recipes/llm/performance/grpo-qwen3-235b-32n8g-async-1off.yaml b/examples/configs/recipes/llm/performance/grpo-qwen3-235b-32n8g-async-1off.yaml
new file mode 100644
index 0000000000..cf4f5a6f98
--- /dev/null
+++ b/examples/configs/recipes/llm/performance/grpo-qwen3-235b-32n8g-async-1off.yaml
@@ -0,0 +1,35 @@
+defaults: ./grpo-qwen3-235b-16n8g.yaml
+grpo:
+  async_grpo:
+    enabled: true
+    max_trajectory_age_steps: 1
+    in_flight_weight_updates: true
+loss_fn:
+  use_importance_sampling_correction: true
+checkpointing:
+  checkpoint_dir: results/grpo-qwen3-235b-32n8g-async-1off
+policy:
+  megatron_cfg:
+    tensor_model_parallel_size: 4
+    sequence_parallel: true
+    context_parallel_size: 1
+    pipeline_model_parallel_size: 8
+    expert_model_parallel_size: 16
+    defer_fp32_logits: false
+  generation:
+    colocated:
+      enabled: false
+      resources:
+        num_nodes: 16
+        gpus_per_node: 8
+    vllm_cfg:
+      tensor_parallel_size: 8
+      gpu_memory_utilization: 0.8
+      async_engine: true
+logger:
+  log_dir: logs/grpo-qwen3-235b-32n8g-16T16G-async-1off
+  wandb:
+    name: grpo-qwen3-235b-32n8g-16T16G-async-1off
+cluster:
+  gpus_per_node: 8
+  num_nodes: 32
diff --git a/examples/configs/recipes/llm/performance/grpo-qwen3-30ba3b-4n8g-async-1off.yaml b/examples/configs/recipes/llm/performance/grpo-qwen3-30ba3b-4n8g-async-1off.yaml
new file mode 100644
index 0000000000..4cc5981460
--- /dev/null
+++ b/examples/configs/recipes/llm/performance/grpo-qwen3-30ba3b-4n8g-async-1off.yaml
@@ -0,0 +1,33 @@
+defaults: ./grpo-qwen3-30ba3b-4n8g.yaml
+grpo:
+  async_grpo:
+    enabled: true
+    max_trajectory_age_steps: 1
+    in_flight_weight_updates: true
+loss_fn:
+  use_importance_sampling_correction: true
+checkpointing:
+  checkpoint_dir: results/grpo-qwen3-30ba3b-4n8g-async-1off
+policy:
+  megatron_cfg:
+    tensor_model_parallel_size: 1
+    pipeline_model_parallel_size: 2
+    expert_model_parallel_size: 8
+    sequence_parallel: false
+  generation:
+    colocated:
+      enabled: false
+      resources:
+        num_nodes: 2
+        gpus_per_node: 8
+    vllm_cfg:
+      async_engine: true
+      tensor_parallel_size: 2
+      gpu_memory_utilization: 0.8
+logger:
+  log_dir: logs/grpo-qwen3-30ba3b-4n8g-2T2G-async-1off
+  wandb:
+    name: grpo-qwen3-30ba3b-4n8g-2T2G-async-1off
+cluster:
+  gpus_per_node: 8
+  num_nodes: 4
diff --git a/examples/configs/recipes/llm/performance/grpo-qwen3-30ba3b-4n8g.yaml b/examples/configs/recipes/llm/performance/grpo-qwen3-30ba3b-4n8g.yaml
new file mode 100644
index 0000000000..053c703eee
--- /dev/null
+++ b/examples/configs/recipes/llm/performance/grpo-qwen3-30ba3b-4n8g.yaml
@@ -0,0 +1,44 @@
+defaults: ../../../grpo_math_1B.yaml
+grpo:
+  num_prompts_per_step: 64
+  num_generations_per_prompt: 32
+checkpointing:
+  enabled: false
+  checkpoint_dir: results/grpo-qwen3-30ba3b-4n8g
+policy:
+  model_name: Qwen/Qwen3-30B-A3B
+  train_micro_batch_size: 1
+  max_total_sequence_length: 4096
+  dtensor_cfg:
+    enabled: false
+  optimizer: null
+  scheduler: null
+  make_sequence_length_divisible_by: ${policy.megatron_cfg.tensor_model_parallel_size}
+  megatron_cfg:
+    enabled: true
+    empty_unused_memory_level: 1
+    tensor_model_parallel_size: 1
+    pipeline_model_parallel_size: 1
+    expert_model_parallel_size: 8
+    sequence_parallel: false
+    optimizer:
+      lr: 3.0e-07
+      min_lr: 3.0e-08
+    scheduler:
+      lr_warmup_iters: 50
+      lr_warmup_init: 3.0e-08
+    env_vars:
+      PYTORCH_CUDA_ALLOC_CONF: expandable_segments:False
+  generation:
+    vllm_cfg:
+      tensor_parallel_size: 4
+logger:
+  log_dir: logs/grpo-qwen3-30ba3b-4n8g
+  wandb_enabled: true
+  tensorboard_enabled: true
+  wandb:
+    project: nemo-rl
+    name: grpo-qwen3-30ba3b-4n8g
+cluster:
+  gpus_per_node: 8
+  num_nodes: 4
diff --git a/examples/configs/recipes/llm/performance/grpo-qwen3-32b-4n8g.yaml b/examples/configs/recipes/llm/performance/grpo-qwen3-32b-4n8g.yaml
new file mode 100644
index 0000000000..ad780ebc50
--- /dev/null
+++ b/examples/configs/recipes/llm/performance/grpo-qwen3-32b-4n8g.yaml
@@ -0,0 +1,41 @@
+defaults: ../../../grpo_math_1B.yaml
+grpo:
+  num_prompts_per_step: 64
+  num_generations_per_prompt: 32
+checkpointing:
+  enabled: false
+  checkpoint_dir: results/grpo-qwen3-32b-4n8g
+policy:
+  model_name: Qwen/Qwen3-32B
+  train_micro_batch_size: 1
+  max_total_sequence_length: 4096
+  dtensor_cfg:
+    enabled: false
+  optimizer: null
+  scheduler: null
+  make_sequence_length_divisible_by: ${policy.megatron_cfg.tensor_model_parallel_size}
+  megatron_cfg:
+    enabled: true
+    empty_unused_memory_level: 1
+    tensor_model_parallel_size: 4
+    pipeline_model_parallel_size: 4
+    sequence_parallel: true
+    optimizer:
+      lr: 3.0e-07
+      min_lr: 3.0e-08
+    scheduler:
+      lr_warmup_iters: 2
+      lr_warmup_init: 3.0e-08
+  generation:
+    vllm_cfg:
+      tensor_parallel_size: 4
+logger:
+  log_dir: logs/grpo-qwen3-32b-4n8g
+  wandb_enabled: true
+  tensorboard_enabled: true
+  wandb:
+    project: nemo-rl
+    name: grpo-qwen3-32b-4n8g
+cluster:
+  gpus_per_node: 8
+  num_nodes: 4
diff --git a/examples/configs/recipes/llm/performance/grpo-qwen3-32b-8n8g-async-1off.yaml b/examples/configs/recipes/llm/performance/grpo-qwen3-32b-8n8g-async-1off.yaml
new file mode 100644
index 0000000000..9f20f34f40
--- /dev/null
+++ b/examples/configs/recipes/llm/performance/grpo-qwen3-32b-8n8g-async-1off.yaml
@@ -0,0 +1,32 @@
+defaults: ./grpo-qwen3-32b-4n8g.yaml
+grpo:
+  async_grpo:
+    enabled: true
+    max_trajectory_age_steps: 1
+    in_flight_weight_updates: true
+loss_fn:
+  use_importance_sampling_correction: true
+checkpointing:
+  checkpoint_dir: results/grpo-qwen3-32b-8n8g-async-1off
+policy:
+  megatron_cfg:
+    tensor_model_parallel_size: 4
+    pipeline_model_parallel_size: 4
+    sequence_parallel: true
+  generation:
+    colocated:
+      enabled: false
+      resources:
+        num_nodes: 4
+        gpus_per_node: 8
+    vllm_cfg:
+      async_engine: true
+      tensor_parallel_size: 4
+      gpu_memory_utilization: 0.8
+logger:
+  log_dir: logs/grpo-qwen3-32b-8n8g-4T4G-async-1off
+  wandb:
+    name: grpo-qwen3-32b-8n8g-4T4G-async-1off
+cluster:
+  gpus_per_node: 8
+  num_nodes: 8
diff --git a/examples/configs/recipes/llm/sft-llama3.1-70b-8n8g-tp4pp2-long-megatron.yaml b/examples/configs/recipes/llm/sft-llama3.1-70b-8n8g-tp4pp2-long-megatron.yaml
new file mode 100644
index 0000000000..37e3bff33c
--- /dev/null
+++ b/examples/configs/recipes/llm/sft-llama3.1-70b-8n8g-tp4pp2-long-megatron.yaml
@@ -0,0 +1,61 @@
+defaults: ../../sft.yaml
+sft:
+  max_num_steps: 1000000
+  val_period: 500
+  val_batches: 4
+  val_global_batch_size: 128
+  val_at_start: false
+checkpointing:
+  checkpoint_dir: results/sft-llama3.1-70b-8n8g-tp4pp2-long-megatron
+  save_period: 100
+policy:
+  model_name: meta-llama/Llama-3.1-70B
+  tokenizer:
+    name: meta-llama/Llama-3.1-8B-Instruct
+    chat_template: default
+  train_global_batch_size: 512
+  max_total_sequence_length: 4096
+  dtensor_cfg:
+    enabled: false
+  megatron_cfg:
+    enabled: true
+    tensor_model_parallel_size: 4
+    pipeline_model_parallel_size: 2
+    freeze_moe_router: true
+    moe_router_dtype: fp64
+    moe_router_load_balancing_type: none
+    moe_router_bias_update_rate: 0.0
+    optimizer:
+      lr: 2.0e-05
+      min_lr: 2.0e-05
+      weight_decay: 0.01
+      bf16: true
+      adam_beta2: 0.999
+      adam_eps: 1.0e-08
+      clip_grad: 0.0
+    scheduler:
+      lr_warmup_iters: 1
+      lr_warmup_init: 2.0e-05
+  make_sequence_length_divisible_by: ${policy.megatron_cfg.tensor_model_parallel_size}
+  max_grad_norm: null
+  optimizer:
+    kwargs:
+      lr: 2.0e-05
+      weight_decay: 0.01
+      eps: 1.0e-08
+data:
+  dataset_name: openmathinstruct2
+  prompt_file: examples/prompts/math.txt
+  split: train_1M
+  add_generation_prompt: true
+  output_key: generated_solution
+  seed: 42
+logger:
+  monitor_gpus: false
+  wandb:
+    name: openmathinstruct-nemorl-1M_train
+  tensorboard:
+    log_dir: tb_logs-openmathinstruct-nemorl-1M_train
+cluster:
+  gpus_per_node: 8
+  num_nodes: 8
diff --git a/examples/configs/recipes/llm/sft-llama3.1-8b-1n8g-fsdp2tp1-dynamicbatch.yaml b/examples/configs/recipes/llm/sft-llama3.1-8b-1n8g-fsdp2tp1-dynamicbatch.yaml
new file mode 100644
index 0000000000..88d446283d
--- /dev/null
+++ b/examples/configs/recipes/llm/sft-llama3.1-8b-1n8g-fsdp2tp1-dynamicbatch.yaml
@@ -0,0 +1,45 @@
+defaults: ../../sft.yaml
+sft:
+  max_num_steps: 10000
+  val_period: 500
+  val_batches: 4
+  val_global_batch_size: 128
+  val_micro_batch_size: 2
+  val_at_start: false
+checkpointing:
+  checkpoint_dir: results/sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long
+  save_period: 50
+policy:
+  model_name: meta-llama/Llama-3.1-8B
+  tokenizer:
+    name: meta-llama/Llama-3.1-8B-Instruct
+    chat_template: default
+  train_global_batch_size: 512
+  train_micro_batch_size: 2
+  max_total_sequence_length: 4096
+  dtensor_cfg:
+    tensor_parallel_size: 4
+  dynamic_batching:
+    enabled: true
+  make_sequence_length_divisible_by: 1
+  optimizer:
+    kwargs:
+      lr: 2.0e-05
+      weight_decay: 0.01
+      eps: 1.0e-08
+data:
+  dataset_name: openmathinstruct2
+  prompt_file: examples/prompts/math.txt
+  split: train_1M
+  add_generation_prompt: true
+  output_key: generated_solution
+  seed: 42
+logger:
+  log_dir: logs/sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long
+  wandb:
+    project: nemo-rl
+    name: sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long
+  tensorboard:
+    log_dir: tb_logs-sft-dev-squad
+cluster:
+  gpus_per_node: 8
diff --git a/examples/configs/recipes/llm/sft-llama3.1-8b-1n8g-fsdp2tp1-long.yaml b/examples/configs/recipes/llm/sft-llama3.1-8b-1n8g-fsdp2tp1-long.yaml
new file mode 100644
index 0000000000..86db9da5e0
--- /dev/null
+++ b/examples/configs/recipes/llm/sft-llama3.1-8b-1n8g-fsdp2tp1-long.yaml
@@ -0,0 +1,41 @@
+defaults: ../../sft.yaml
+sft:
+  max_num_steps: 10000
+  val_period: 500
+  val_batches: 4
+  val_global_batch_size: 128
+  val_micro_batch_size: 2
+  val_at_start: false
+checkpointing:
+  checkpoint_dir: results/sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long
+  save_period: 100
+policy:
+  model_name: meta-llama/Llama-3.1-8B
+  tokenizer:
+    name: meta-llama/Llama-3.1-8B-Instruct
+    chat_template: default
+  train_global_batch_size: 512
+  train_micro_batch_size: 2
+  max_total_sequence_length: 4096
+  make_sequence_length_divisible_by: 1
+  optimizer:
+    kwargs:
+      lr: 2.0e-05
+      weight_decay: 0.01
+      eps: 1.0e-08
+data:
+  dataset_name: openmathinstruct2
+  prompt_file: examples/prompts/math.txt
+  split: train_1M
+  add_generation_prompt: true
+  output_key: generated_solution
+  seed: 42
+logger:
+  log_dir: logs/sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long
+  wandb:
+    project: nemo-rl
+    name: sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long
+  tensorboard:
+    log_dir: tb_logs-sft-dev-squad
+cluster:
+  gpus_per_node: 8
diff --git a/examples/configs/recipes/llm/sft-llama3.1-8b-1n8g-fsdp2tp2.yaml b/examples/configs/recipes/llm/sft-llama3.1-8b-1n8g-fsdp2tp2.yaml
new file mode 100644
index 0000000000..31b7538c1c
--- /dev/null
+++ b/examples/configs/recipes/llm/sft-llama3.1-8b-1n8g-fsdp2tp2.yaml
@@ -0,0 +1,39 @@
+defaults: ../../sft.yaml
+sft:
+  max_num_steps: 350
+  val_period: 500
+checkpointing:
+  checkpoint_dir: results/sft-llama3.1-8b-instruct-1n8g-fsdp2tp2
+  save_period: 20
+policy:
+  model_name: meta-llama/Llama-3.1-8B
+  tokenizer:
+    name: meta-llama/Llama-3.1-8B-Instruct
+    chat_template: default
+  train_global_batch_size: 512
+  train_micro_batch_size: 2
+  max_total_sequence_length: 4096
+  dtensor_cfg:
+    tensor_parallel_size: 2
+  make_sequence_length_divisible_by: 2
+  optimizer:
+    kwargs:
+      lr: 2.0e-05
+      weight_decay: 0.01
+      eps: 1.0e-08
+data:
+  dataset_name: openmathinstruct2
+  prompt_file: examples/prompts/math.txt
+  split: train_1M
+  add_generation_prompt: true
+  output_key: generated_solution
+  seed: 42
+logger:
+  log_dir: logs/sft-llama3.1-8b-instruct-1n8g-fsdp2tp2
+  wandb:
+    project: nemo-rl
+    name: sft-llama3.1-8b-instruct-1n8g-fsdp2tp2
+  tensorboard:
+    log_dir: tb_logs-sft-dev-openmathinstruct2
+cluster:
+  gpus_per_node: 8
diff --git a/examples/configs/recipes/llm/sft-llama3.1-8b-1n8g-megatron-seqpack.yaml b/examples/configs/recipes/llm/sft-llama3.1-8b-1n8g-megatron-seqpack.yaml
new file mode 100644
index 0000000000..5deed14cb4
--- /dev/null
+++ b/examples/configs/recipes/llm/sft-llama3.1-8b-1n8g-megatron-seqpack.yaml
@@ -0,0 +1,48 @@
+defaults: ../../sft.yaml
+sft:
+  max_num_steps: 250
+  val_period: 500
+checkpointing:
+  checkpoint_dir: results/sft-llama3.1-8b-instruct-1n8g-megatron
+  save_period: 50
+policy:
+  model_name: meta-llama/Llama-3.1-8B
+  tokenizer:
+    name: meta-llama/Llama-3.1-8B-Instruct
+    chat_template: default
+  train_global_batch_size: 512
+  train_micro_batch_size: 2
+  max_total_sequence_length: 4096
+  dtensor_cfg:
+    enabled: false
+  sequence_packing:
+    enabled: true
+  make_sequence_length_divisible_by: ${policy.megatron_cfg.tensor_model_parallel_size}
+  optimizer: null
+  megatron_cfg:
+    enabled: true
+    tensor_model_parallel_size: 2
+    pipeline_model_parallel_size: 2
+    optimizer:
+      lr: 2.0e-05
+      min_lr: 1.99999e-05
+      weight_decay: 0.01
+      bf16: true
+    scheduler:
+      lr_warmup_init: 1.9999e-65
+data:
+  dataset_name: openmathinstruct2
+  prompt_file: examples/prompts/math.txt
+  split: train_1M
+  add_generation_prompt: true
+  output_key: generated_solution
+  seed: 42
+logger:
+  log_dir: logs/sft-llama3.1-8b-1n8g-megatron
+  wandb:
+    project: nemo-rl
+    name: sft-llama3.1-8b-1n8g-megatron
+  tensorboard:
+    log_dir: tb_logs-sft-dev-openmathinstruct2
+cluster:
+  gpus_per_node: 8
diff --git a/examples/configs/recipes/llm/sft-llama3.1-8b-1n8g-megatron.yaml b/examples/configs/recipes/llm/sft-llama3.1-8b-1n8g-megatron.yaml
new file mode 100644
index 0000000000..daf5cd5393
--- /dev/null
+++ b/examples/configs/recipes/llm/sft-llama3.1-8b-1n8g-megatron.yaml
@@ -0,0 +1,46 @@
+defaults: ../../sft.yaml
+sft:
+  max_num_steps: 250
+  val_period: 500
+checkpointing:
+  checkpoint_dir: results/sft-llama3.1-8b-instruct-1n8g-megatron
+  save_period: 100
+policy:
+  model_name: meta-llama/Llama-3.1-8B
+  tokenizer:
+    name: meta-llama/Llama-3.1-8B-Instruct
+    chat_template: default
+  train_global_batch_size: 512
+  train_micro_batch_size: 2
+  max_total_sequence_length: 4096
+  dtensor_cfg:
+    enabled: false
+  make_sequence_length_divisible_by: ${policy.megatron_cfg.tensor_model_parallel_size}
+  optimizer: null
+  megatron_cfg:
+    enabled: true
+    tensor_model_parallel_size: 2
+    pipeline_model_parallel_size: 2
+    optimizer:
+      lr: 2.0e-05
+      min_lr: 1.99999e-05
+      weight_decay: 0.01
+      bf16: true
+    scheduler:
+      lr_warmup_init: 1.9999e-65
+data:
+  dataset_name: openmathinstruct2
+  prompt_file: examples/prompts/math.txt
+  split: train_1M
+  add_generation_prompt: true
+  output_key: generated_solution
+  seed: 42
+logger:
+  log_dir: logs/sft-llama3.1-8b-1n8g-megatron
+  wandb:
+    project: nemo-rl
+    name: sft-llama3.1-8b-1n8g-megatron
+  tensorboard:
+    log_dir: tb_logs-sft-dev-openmathinstruct2
+cluster:
+  gpus_per_node: 8
diff --git a/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long.v2.yaml b/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long.v2.yaml
deleted file mode 100644
index 19fbd99562..0000000000
--- a/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long.v2.yaml
+++ /dev/null
@@ -1,74 +0,0 @@
-sft:
-  max_num_epochs: 1
-  max_num_steps: 2730
-  val_period: 10
-  val_batches: 8
-  val_global_batch_size: 32
-  val_micro_batch_size: 1
-  val_at_start: true
-  seed: 42
-checkpointing:
-  enabled: true
-  checkpoint_dir: results/sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long
-  metric_name: val_loss
-  higher_is_better: false
-  keep_top_k: 3
-  save_period: 10
-policy:
-  model_name: meta-llama/Llama-3.1-8B-Instruct
-  tokenizer:
-    name: meta-llama/Llama-3.1-8B-Instruct
-    chat_template: '{% for message in messages %}{%- if message[''role''] == ''system''  %}{{''Context: '' + message[''content''].strip()}}{%- elif message[''role''] == ''user''  %}{{'' Question: '' + message[''content''].strip() + '' Answer:''}}{%- elif message[''role''] == ''assistant''  %}{{'' '' + message[''content''].strip()}}{%- endif %}{% endfor %}'
-  train_global_batch_size: 32
-  train_micro_batch_size: 1
-  max_total_sequence_length: 1024
-  precision: bfloat16
-  dtensor_cfg:
-    enabled: true
-    cpu_offload: false
-    sequence_parallel: false
-    activation_checkpointing: false
-    tensor_parallel_size: 1
-    context_parallel_size: 1
-    custom_parallel_plan: null
-  dynamic_batching:
-    enabled: false
-  sequence_packing:
-    enabled: false
-  make_sequence_length_divisible_by: 1
-  max_grad_norm: 1
-  optimizer:
-    name: torch.optim.AdamW
-    kwargs:
-      lr: 5e-06
-      weight_decay: 0.1
-      betas:
-        - 0.9
-        - 0.98
-      eps: 1e-05
-      foreach: false
-      fused: false
-data:
-  max_input_seq_length: 1024
-  dataset_name: squad
-  add_bos: true
-  add_eos: true
-  add_generation_prompt: false
-logger:
-  log_dir: logs/sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long
-  wandb_enabled: true
-  tensorboard_enabled: true
-  mlflow_enabled: false
-  monitor_gpus: true
-  num_val_samples_to_print: 0  # Number of validation samples to pretty print on terminal
-  wandb:
-    project: nemo-rl
-    name: sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long
-  tensorboard:
-    log_dir: tb_logs-sft-dev-squad
-  gpu_monitoring:
-    collection_interval: 10
-    flush_interval: 10
-cluster:
-  gpus_per_node: 8
-  num_nodes: 1
diff --git a/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp2sp.v2.yaml b/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp2sp.v2.yaml
deleted file mode 100644
index 8f37dc440e..0000000000
--- a/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp2sp.v2.yaml
+++ /dev/null
@@ -1,74 +0,0 @@
-sft:
-  max_num_epochs: 1
-  max_num_steps: 350
-  val_period: 10
-  val_batches: 8
-  val_global_batch_size: 32
-  val_micro_batch_size: 1
-  val_at_start: true
-  seed: 42
-checkpointing:
-  enabled: true
-  checkpoint_dir: results/sft-llama3.1-8b-instruct-1n8g-fsdp2tp2sp
-  metric_name: val_loss
-  higher_is_better: false
-  keep_top_k: 3
-  save_period: 10
-policy:
-  model_name: meta-llama/Llama-3.1-8B-Instruct
-  tokenizer:
-    name: meta-llama/Llama-3.1-8B-Instruct
-    chat_template: '{% for message in messages %}{%- if message[''role''] == ''system''  %}{{''Context: '' + message[''content''].strip()}}{%- elif message[''role''] == ''user''  %}{{'' Question: '' + message[''content''].strip() + '' Answer:''}}{%- elif message[''role''] == ''assistant''  %}{{'' '' + message[''content''].strip()}}{%- endif %}{% endfor %}'
-  train_global_batch_size: 32
-  train_micro_batch_size: 1
-  max_total_sequence_length: 1024
-  precision: bfloat16
-  dtensor_cfg:
-    enabled: true
-    cpu_offload: false
-    sequence_parallel: true
-    activation_checkpointing: false
-    tensor_parallel_size: 2
-    context_parallel_size: 1
-    custom_parallel_plan: null
-  dynamic_batching:
-    enabled: false
-  sequence_packing:
-    enabled: false
-  make_sequence_length_divisible_by: 2
-  max_grad_norm: 1
-  optimizer:
-    name: torch.optim.AdamW
-    kwargs:
-      lr: 5e-06
-      weight_decay: 0.1
-      betas:
-        - 0.9
-        - 0.98
-      eps: 1e-05
-      foreach: false
-      fused: false
-data:
-  max_input_seq_length: 1024
-  dataset_name: squad
-  add_bos: true
-  add_eos: true
-  add_generation_prompt: false
-logger:
-  log_dir: logs/sft-llama3.1-8b-instruct-1n8g-fsdp2tp2sp
-  wandb_enabled: true
-  tensorboard_enabled: true
-  mlflow_enabled: false
-  monitor_gpus: true
-  num_val_samples_to_print: 0  # Number of validation samples to pretty print on terminal
-  wandb:
-    project: nemo-rl
-    name: sft-llama3.1-8b-instruct-1n8g-fsdp2tp2sp
-  tensorboard:
-    log_dir: tb_logs-sft-dev-squad
-  gpu_monitoring:
-    collection_interval: 10
-    flush_interval: 10
-cluster:
-  gpus_per_node: 8
-  num_nodes: 1
diff --git a/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-megatron.yaml b/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-megatron.yaml
deleted file mode 100644
index ec72f8f454..0000000000
--- a/examples/configs/recipes/llm/sft-llama3.1-8b-instruct-1n8g-megatron.yaml
+++ /dev/null
@@ -1,118 +0,0 @@
-sft:
-  max_num_epochs: 1
-  max_num_steps: 250
-  val_period: 10
-  val_batches: 8
-  val_global_batch_size: 32
-  val_micro_batch_size: 1
-  val_at_start: true
-  seed: 42
-checkpointing:
-  enabled: false #true
-  checkpoint_dir: results/sft-llama3.1-8b-instruct-1n8g-fsdp1
-  metric_name: val_loss
-  higher_is_better: false
-  keep_top_k: 3
-  save_period: 10
-policy:
-  model_name: meta-llama/Llama-3.1-8B-Instruct
-  tokenizer:
-    name: meta-llama/Llama-3.1-8B-Instruct
-    chat_template: '{% for message in messages %}{%- if message[''role''] == ''system''  %}{{''Context: '' + message[''content''].strip()}}{%- elif message[''role''] == ''user''  %}{{'' Question: '' + message[''content''].strip() + '' Answer:''}}{%- elif message[''role''] == ''assistant''  %}{{'' '' + message[''content''].strip()}}{%- endif %}{% endfor %}'
-  train_global_batch_size: 32
-  train_micro_batch_size: 2
-  max_total_sequence_length: 1024
-  precision: bfloat16
-  dtensor_cfg:
-    enabled: false
-  dynamic_batching:
-    enabled: false
-  sequence_packing:
-    enabled: false
-  make_sequence_length_divisible_by: ${policy.megatron_cfg.tensor_model_parallel_size}
-  max_grad_norm: 1
-  optimizer: null
-  megatron_cfg:
-    enabled: true
-    empty_unused_memory_level: 1
-    activation_checkpointing: false
-    tensor_model_parallel_size: 2
-    expert_tensor_parallel_size: 1
-    expert_model_parallel_size: 1
-    pipeline_model_parallel_size: 2
-    context_parallel_size: 1
-    pipeline_dtype: ${policy.precision}
-    num_layers_in_first_pipeline_stage: null
-    num_layers_in_last_pipeline_stage: null
-    sequence_parallel: false
-    freeze_moe_router: false
-    moe_router_dtype: null
-    moe_router_load_balancing_type: "aux_loss"
-    moe_router_bias_update_rate: 1e-3
-    #gives ~20% training perf speedup with sequence packing 
-    apply_rope_fusion: True
-    
-    optimizer:
-      optimizer: "adam"
-      lr: 5.0e-6
-      min_lr: 4.9999e-6
-      weight_decay: 0.1
-      bf16: false
-      fp16: false
-      params_dtype: "float32"
-
-      #adam
-      adam_beta1: 0.9
-      adam_beta2: 0.98
-      adam_eps: 1e-5
-
-      #sgd
-      sgd_momentum: 0.9
-
-      #distributed optimizer
-      use_distributed_optimizer: true
-      use_precision_aware_optimizer: true
-
-      clip_grad: ${policy.max_grad_norm}
-
-    scheduler:
-      start_weight_decay: ${policy.megatron_cfg.optimizer.weight_decay}
-      end_weight_decay: ${policy.megatron_cfg.optimizer.weight_decay}
-      weight_decay_incr_style: "constant"
-      lr_decay_style: "constant"
-      lr_decay_iters: null
-      lr_warmup_iters: 50
-      lr_warmup_init: 4.9999e-6
-
-    distributed_data_parallel_config:
-      grad_reduce_in_fp32: false
-      overlap_grad_reduce: true
-      overlap_param_gather: true
-      average_in_collective: true
-      data_parallel_sharding_strategy: "optim_grads_params"
-
-
-data:
-  add_generation_prompt: false
-  max_input_seq_length: 1024
-  dataset_name: squad
-  add_bos: true
-  add_eos: true
-logger:
-  log_dir: logs/sft-llama3.1-8b-instruct-1n8g-fsdp1
-  wandb_enabled: true
-  tensorboard_enabled: true
-  mlflow_enabled: false
-  monitor_gpus: true
-  num_val_samples_to_print: 0  # Number of validation samples to pretty print on terminal
-  wandb:
-    project: nemo-rl
-    name: sft-llama3.1-8b-instruct-1n8g-fsdp1
-  tensorboard:
-    log_dir: tb_logs-sft-dev-squad
-  gpu_monitoring:
-    collection_interval: 10
-    flush_interval: 10
-cluster:
-  gpus_per_node: 8
-  num_nodes: 1
diff --git a/examples/configs/recipes/llm/sft-llama3.2-1b-1n8g-fsdp2tp1.v2.yaml b/examples/configs/recipes/llm/sft-llama3.2-1b-1n8g-fsdp2tp1.v2.yaml
deleted file mode 100644
index 685990ab98..0000000000
--- a/examples/configs/recipes/llm/sft-llama3.2-1b-1n8g-fsdp2tp1.v2.yaml
+++ /dev/null
@@ -1,74 +0,0 @@
-sft:
-  max_num_epochs: 1
-  max_num_steps: 500
-  val_period: 10
-  val_batches: 8
-  val_global_batch_size: 32
-  val_micro_batch_size: 1
-  val_at_start: true
-  seed: 42
-checkpointing:
-  enabled: true
-  checkpoint_dir: results/sft-llama3.2-1b-1n8g-fsdp2tp1
-  metric_name: val_loss
-  higher_is_better: false
-  keep_top_k: 3
-  save_period: 10
-policy:
-  model_name: meta-llama/Llama-3.2-1B
-  tokenizer:
-    name: meta-llama/Llama-3.2-1B
-    chat_template: '{% for message in messages %}{%- if message[''role''] == ''system''  %}{{''Context: '' + message[''content''].strip()}}{%- elif message[''role''] == ''user''  %}{{'' Question: '' + message[''content''].strip() + '' Answer:''}}{%- elif message[''role''] == ''assistant''  %}{{'' '' + message[''content''].strip()}}{%- endif %}{% endfor %}'
-  train_global_batch_size: 32
-  train_micro_batch_size: 1
-  max_total_sequence_length: 1024
-  precision: bfloat16
-  dtensor_cfg:
-    enabled: true
-    cpu_offload: false
-    sequence_parallel: false
-    activation_checkpointing: false
-    tensor_parallel_size: 1
-    context_parallel_size: 1
-    custom_parallel_plan: null
-  dynamic_batching:
-    enabled: false
-  sequence_packing:
-    enabled: false
-  make_sequence_length_divisible_by: 1
-  max_grad_norm: 1
-  optimizer:
-    name: torch.optim.AdamW
-    kwargs:
-      lr: 5e-06
-      weight_decay: 0.1
-      betas:
-        - 0.9
-        - 0.98
-      eps: 1e-05
-      foreach: false
-      fused: false
-data:
-  max_input_seq_length: 1024
-  dataset_name: squad 
-  add_bos: true
-  add_eos: true
-  add_generation_prompt: false
-logger:
-  log_dir: logs/sft-llama3.2-1b-1n8g-fsdp2tp1
-  wandb_enabled: true
-  tensorboard_enabled: true
-  mlflow_enabled: false
-  monitor_gpus: true
-  num_val_samples_to_print: 0  # Number of validation samples to pretty print on terminal
-  wandb:
-    project: nemo-rl
-    name: sft-llama3.2-1b-1n8g-fsdp2tp1
-  tensorboard:
-    log_dir: tb_logs-sft-dev-squad
-  gpu_monitoring:
-    collection_interval: 10
-    flush_interval: 10
-cluster:
-  gpus_per_node: 8
-  num_nodes: 1
diff --git a/examples/configs/recipes/llm/sft-llama3.2-1b-1n8g-fsdp2tp1.v3.yaml b/examples/configs/recipes/llm/sft-llama3.2-1b-1n8g-fsdp2tp1.v3.yaml
new file mode 100644
index 0000000000..77ff8aac89
--- /dev/null
+++ b/examples/configs/recipes/llm/sft-llama3.2-1b-1n8g-fsdp2tp1.v3.yaml
@@ -0,0 +1,26 @@
+defaults: ../../sft.yaml
+sft:
+  max_num_steps: 500
+checkpointing:
+  checkpoint_dir: results/sft-llama3.2-1b-1n8g-fsdp2tp1
+  save_period: 100
+policy:
+  tokenizer:
+    name: meta-llama/Llama-3.2-1B
+  make_sequence_length_divisible_by: 1
+data:
+  dataset_name: openmathinstruct2
+  prompt_file: examples/prompts/math.txt
+  split: train_1M
+  add_generation_prompt: true
+  output_key: generated_solution
+  seed: 42
+logger:
+  log_dir: logs/sft-llama3.2-1b-1n8g-fsdp2tp1
+  wandb:
+    project: nemo-rl
+    name: sft-llama3.2-1b-1n8g-fsdp2tp1
+  tensorboard:
+    log_dir: tb_logs-sft-dev-openmathinstruct2
+cluster:
+  gpus_per_node: 8
diff --git a/examples/configs/recipes/llm/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.v2.yaml b/examples/configs/recipes/llm/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.v2.yaml
deleted file mode 100644
index 2ebadb9670..0000000000
--- a/examples/configs/recipes/llm/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.v2.yaml
+++ /dev/null
@@ -1,74 +0,0 @@
-sft:
-  max_num_epochs: 1
-  max_num_steps: 20
-  val_period: 10
-  val_batches: 8
-  val_global_batch_size: 32
-  val_micro_batch_size: 1
-  val_at_start: true
-  seed: 42
-checkpointing:
-  enabled: true
-  checkpoint_dir: results/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt
-  metric_name: val_loss
-  higher_is_better: false
-  keep_top_k: 3
-  save_period: 10
-policy:
-  model_name: Qwen/Qwen2.5-32B
-  tokenizer:
-    name: Qwen/Qwen2.5-32B
-    chat_template: '{% for message in messages %}{%- if message[''role''] == ''system''  %}{{''Context: '' + message[''content''].strip()}}{%- elif message[''role''] == ''user''  %}{{'' Question: '' + message[''content''].strip() + '' Answer:''}}{%- elif message[''role''] == ''assistant''  %}{{'' '' + message[''content''].strip()}}{%- endif %}{% endfor %}'
-  train_global_batch_size: 32
-  train_micro_batch_size: 1
-  max_total_sequence_length: 16000
-  precision: bfloat16
-  dtensor_cfg:
-    enabled: true
-    cpu_offload: false
-    sequence_parallel: true
-    activation_checkpointing: true
-    tensor_parallel_size: 8
-    context_parallel_size: 1
-    custom_parallel_plan: null
-  dynamic_batching:
-    enabled: false
-  sequence_packing:
-    enabled: false
-  make_sequence_length_divisible_by: 8
-  max_grad_norm: 1
-  optimizer:
-    name: torch.optim.AdamW
-    kwargs:
-      lr: 5e-06
-      weight_decay: 0.1
-      betas:
-        - 0.9
-        - 0.98
-      eps: 1e-05
-      foreach: false
-      fused: false
-data:
-  max_input_seq_length: 16000
-  dataset_name: squad
-  add_bos: true
-  add_eos: true
-  add_generation_prompt: false
-logger:
-  log_dir: logs/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt
-  wandb_enabled: true
-  tensorboard_enabled: true
-  mlflow_enabled: false
-  monitor_gpus: true
-  num_val_samples_to_print: 0  # Number of validation samples to pretty print on terminal
-  wandb:
-    project: nemo-rl
-    name: sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt
-  tensorboard:
-    log_dir: tb_logs-sft-dev-squad
-  gpu_monitoring:
-    collection_interval: 10
-    flush_interval: 10
-cluster:
-  gpus_per_node: 8
-  num_nodes: 4
diff --git a/examples/configs/recipes/llm/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.v3.yaml b/examples/configs/recipes/llm/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.v3.yaml
new file mode 100644
index 0000000000..c94683c61f
--- /dev/null
+++ b/examples/configs/recipes/llm/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.v3.yaml
@@ -0,0 +1,32 @@
+defaults: ../../sft.yaml
+sft:
+  max_num_steps: 20
+checkpointing:
+  checkpoint_dir: results/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt
+  save_period: 100
+policy:
+  model_name: Qwen/Qwen2.5-32B
+  tokenizer:
+    name: Qwen/Qwen2.5-32B
+  max_total_sequence_length: 16000
+  dtensor_cfg:
+    sequence_parallel: true
+    activation_checkpointing: true
+    tensor_parallel_size: 8
+  make_sequence_length_divisible_by: 8
+data:
+  dataset_name: openmathinstruct2
+  prompt_file: examples/prompts/math.txt
+  split: train_1M
+  add_generation_prompt: true
+  output_key: generated_solution
+logger:
+  log_dir: logs/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt
+  wandb:
+    project: nemo-rl
+    name: sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt
+  tensorboard:
+    log_dir: tb_logs-sft-dev-openmathinstruct2
+cluster:
+  gpus_per_node: 8
+  num_nodes: 4
diff --git a/examples/configs/recipes/llm/sft-qwen2.5-math7b-2n8g-megatron.yaml b/examples/configs/recipes/llm/sft-qwen2.5-math7b-2n8g-megatron.yaml
new file mode 100644
index 0000000000..151319df3a
--- /dev/null
+++ b/examples/configs/recipes/llm/sft-qwen2.5-math7b-2n8g-megatron.yaml
@@ -0,0 +1,53 @@
+defaults: ../../sft.yaml
+sft:
+  max_num_steps: 80
+checkpointing:
+  enabled: false
+policy:
+  model_name: Qwen/Qwen2.5-Math-7B
+  train_global_batch_size: 512
+  max_total_sequence_length: 16384
+  dtensor_cfg:
+    enabled: false
+  megatron_cfg:
+    enabled: true
+    tensor_model_parallel_size: 4
+    context_parallel_size: 2
+    sequence_parallel: true
+    freeze_moe_router: true
+    moe_router_dtype: fp64
+    moe_router_bias_update_rate: 0.0
+    moe_permute_fusion: true
+    optimizer:
+      lr: 1.0e-06
+      min_lr: 1.0e-06
+      bf16: true
+      adam_beta2: 0.999
+      adam_eps: 1.0e-08
+      use_distributed_optimizer: false
+      use_precision_aware_optimizer: false
+    scheduler:
+      lr_decay_iters: null
+      lr_warmup_iters: 10
+      lr_warmup_init: 1.0e-11
+  sequence_packing:
+    enabled: true
+  make_sequence_length_divisible_by: 32
+data:
+  dataset_name: openmathinstruct2
+  prompt_file: examples/prompts/math.txt
+  split: train_1M
+  add_generation_prompt: true
+  output_key: generated_solution
+  num_workers: 8
+logger:
+  wandb:
+    project: nemo-rl
+    name: sft-qwen2.5-math-7b-megatron
+  tensorboard:
+    log_dir: tb_logs-sft-qwen2.5-math-7b-megatron
+  mlflow:
+    run_name: sft-qwen2.5-math-7b-megatron
+cluster:
+  gpus_per_node: 8
+  num_nodes: 2
diff --git a/examples/configs/recipes/vlm/vlm_grpo-qwen2.5-vl-3b-instruct-clevr-1n2g-dtensor2tp1.v1.yaml b/examples/configs/recipes/vlm/vlm_grpo-qwen2.5-vl-3b-instruct-clevr-1n2g-dtensor2tp1.v1.yaml
new file mode 100644
index 0000000000..a5da6ed98f
--- /dev/null
+++ b/examples/configs/recipes/vlm/vlm_grpo-qwen2.5-vl-3b-instruct-clevr-1n2g-dtensor2tp1.v1.yaml
@@ -0,0 +1,5 @@
+defaults: ../../vlm_grpo_3B.yaml
+checkpointing:
+  checkpoint_dir: results/clevr_grpo
+policy:
+  max_total_sequence_length: 3072
diff --git a/examples/configs/recipes/vlm/vlm_grpo-qwen2.5-vl-3b-instruct-clevr-1n2g-megatrontp2.v1.yaml b/examples/configs/recipes/vlm/vlm_grpo-qwen2.5-vl-3b-instruct-clevr-1n2g-megatrontp2.v1.yaml
new file mode 100644
index 0000000000..f4dc467976
--- /dev/null
+++ b/examples/configs/recipes/vlm/vlm_grpo-qwen2.5-vl-3b-instruct-clevr-1n2g-megatrontp2.v1.yaml
@@ -0,0 +1,24 @@
+defaults: ../../vlm_grpo_3B.yaml
+checkpointing:
+  checkpoint_dir: results/clevr_grpo
+policy:
+  max_total_sequence_length: 3072
+  dtensor_cfg:
+    enabled: false
+  dynamic_batching:
+    enabled: false
+  make_sequence_length_divisible_by: ${policy.megatron_cfg.tensor_model_parallel_size}
+  optimizer: null
+  megatron_cfg:
+    enabled: true
+    optimizer:
+      lr: 5.0e-07
+      min_lr: 5.0e-08
+    scheduler:
+      lr_warmup_iters: 50
+      lr_warmup_init: 5.0e-08
+    distributed_data_parallel_config:
+      overlap_grad_reduce: false
+logger:
+  wandb:
+    name: vlm-grpo-3b-megatron
diff --git a/examples/configs/recipes/vlm/vlm_grpo-smolvlm2-2.2b-instruct-clevr-1n2g-dtensor2tp1.v2.yaml.disabled b/examples/configs/recipes/vlm/vlm_grpo-smolvlm2-2.2b-instruct-clevr-1n2g-dtensor2tp1.v2.yaml.disabled
new file mode 100644
index 0000000000..15ef079582
--- /dev/null
+++ b/examples/configs/recipes/vlm/vlm_grpo-smolvlm2-2.2b-instruct-clevr-1n2g-dtensor2tp1.v2.yaml.disabled
@@ -0,0 +1,6 @@
+defaults: ../../vlm_grpo_3B.yaml
+checkpointing:
+  checkpoint_dir: results/clevr_grpo
+policy:
+  model_name: HuggingFaceTB/SmolVLM2-2.2B-Instruct
+  max_total_sequence_length: 3072
diff --git a/examples/configs/rm.yaml b/examples/configs/rm.yaml
new file mode 100644
index 0000000000..2ea4dbd6e9
--- /dev/null
+++ b/examples/configs/rm.yaml
@@ -0,0 +1,182 @@
+# Bradley-Terry (BT) Reward Model Training Configuration
+rm:
+  ## total number of steps to train will equal
+  ## min((max_num_epochs * len(train_dataloader)), max_num_steps)
+  max_num_epochs: 1
+  max_num_steps: -1  # by default, train for 1 epoch
+
+  val_period: 16
+  val_batches: -1
+  val_global_batch_size: 32
+  val_micro_batch_size: 1
+  val_at_start: false
+  seed: 42
+
+checkpointing:
+  enabled: true
+  checkpoint_dir: "results/rm"
+  metric_name: "val:validation-default_loss" # one of "val:" or "train:" followed by the metric name
+  higher_is_better: false
+  keep_top_k: 3
+  save_period: ${rm.val_period}
+  checkpoint_must_save_by: null
+
+policy:
+  model_name: "meta-llama/Llama-3.2-1B-Instruct"
+  tokenizer:
+    name: ${policy.model_name} ## specify if you'd like to use a tokenizer different from the model's default
+    # We don't use the "default" chat template because the Llama tokenizer inserts the current
+    # date in the system prompt, which could make the reward model's output date-dependent.
+    chat_template: "{{- bos_token }}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n    {%- set system_message = messages[0]['content']|trim %}\n    {%- set messages = messages[1:] %}\n{%- else %}\n    {%- set system_message = '' %}\n{%- endif %}\n\n{#- System message #}\n{{- '<|start_header_id|>system<|end_header_id|>\n\n' }}\n{{- system_message }}\n{{- '<|eot_id|>' }}\n\n{%- for message in messages %}\n    {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' + message['content'] | trim + '<|eot_id|>' }}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|start_header_id|>assistant<|end_header_id>\n\n' }}\n{%- endif %}"
+    chat_template_kwargs: null # can be used to pass kwargs to the chat template, e.g., enable_thinking=true
+  train_global_batch_size: 128
+  train_micro_batch_size: 1
+  max_total_sequence_length: 8192
+  precision: "bfloat16"
+  activation_checkpointing_enabled: false
+
+  offload_optimizer_for_logprob: false
+
+  reward_model_cfg:
+    enabled: true  # loads model as a Reward Model (do not change)
+    reward_model_type: "bradley_terry"  # only "bradley_terry" is currently supported
+
+  dtensor_cfg:
+    enabled: true
+    cpu_offload: false
+    sequence_parallel: false
+    activation_checkpointing: false
+    tensor_parallel_size: 1
+    context_parallel_size: 1
+    custom_parallel_plan: null
+
+  dynamic_batching:
+    enabled: false
+
+  sequence_packing:
+    enabled: false
+
+  # makes the training sequence length divisible by the tensor parallel size
+  # this is useful for sequence parallel training
+  make_sequence_length_divisible_by: ${policy.dtensor_cfg.tensor_parallel_size}
+  max_grad_norm: 1.0
+
+  optimizer:
+    name: "torch.optim.AdamW"
+    kwargs:
+      lr: 2.0e-6
+      weight_decay: 0.1
+      betas: [0.9, 0.98]
+      eps: 1e-5
+      # when using Dtensor, we need to set `foreach` and `fused` to false
+      foreach: false
+      fused: false
+    
+  ## ignored since enabled=false, but needed for testing purposes
+  megatron_cfg:
+    enabled: false
+    empty_unused_memory_level: 1
+    activation_checkpointing: false
+    tensor_model_parallel_size: 2
+    pipeline_model_parallel_size: 2
+    context_parallel_size: 1
+    pipeline_dtype: ${policy.precision}
+    num_layers_in_first_pipeline_stage: null
+    num_layers_in_last_pipeline_stage: null
+    sequence_parallel: false
+
+    optimizer:
+      optimizer: "adam"
+      lr: 2.0e-6
+      min_lr: 1.9999e-6
+      weight_decay: 0.1
+      bf16: false
+      fp16: false
+      params_dtype: "float32"
+
+      #adam
+      adam_beta1: 0.9
+      adam_beta2: 0.98
+      adam_eps: 1e-5
+
+      #sgd
+      sgd_momentum: 0.9
+
+      #distributed optimizer
+      use_distributed_optimizer: true
+      use_precision_aware_optimizer: true
+
+      clip_grad: ${policy.max_grad_norm}
+
+      # optimizer cpu offload
+      optimizer_cpu_offload: false
+      optimizer_offload_fraction: 0.0
+
+    scheduler:
+      start_weight_decay: ${policy.megatron_cfg.optimizer.weight_decay}
+      end_weight_decay: ${policy.megatron_cfg.optimizer.weight_decay}
+      weight_decay_incr_style: "constant"
+      lr_decay_style: "constant"
+      lr_decay_iters: 1000
+      lr_warmup_iters: 50
+      lr_warmup_init: 1.9999e-6
+
+    distributed_data_parallel_config:
+      grad_reduce_in_fp32: false
+      overlap_grad_reduce: true
+      overlap_param_gather: false
+      data_parallel_sharding_strategy: "optim_grads_params"
+
+    
+data:
+  max_input_seq_length: ${policy.max_total_sequence_length}
+  shuffle: true
+  num_workers: 1
+
+  dataset_name: HelpSteer3
+  # You can use custom preference datasets for training and validation. For example:
+  #   1. PreferenceDataset
+  #   data:
+  #     dataset_name: PreferenceDataset
+  #     train_data_path: <PathToTrainingDataset>  # e.g., /path/to/local/dataset.jsonl or hf_org/hf_dataset_name (HuggingFace)
+  #     val_data_paths:
+  #       <NameOfValidationDataset1>: <PathToValidationDataset1>
+  #       ...
+  #     train_split: <TrainSplit>, default is None  # used for HuggingFace datasets
+  #     val_split: <ValSplit>, default is None  # used for HuggingFace datasets
+  #   2. BinaryPreferenceDataset
+  #   data:
+  #     dataset_name: BinaryPreferenceDataset
+  #     train_data_path: <PathToTrainingDataset>  # e.g., /path/to/local/dataset.jsonl or hf_org/hf_dataset_name (HuggingFace)
+  #     val_data_path: <PathToValidationDataset>
+  #     prompt_key: <PromptKey>, default is "prompt"
+  #     chosen_key: <ChosenKey>, default is "chosen"
+  #     rejected_key: <RejectedKey>, default is "rejected"
+  #     train_split: <TrainSplit>, default is None  # used for HuggingFace datasets
+  #     val_split: <ValSplit>, default is None  # used for HuggingFace datasets
+  # See https://github.com/NVIDIA-NeMo/RL/blob/main/docs/guides/rm.md#datasets for more details.
+
+  # If you are doing checkpointing, `metric_name` should reflect the metric and validation set to be tracked. For example:
+  #   checkpointing:
+  #     metric_name: "validation-<NameOfValidationDataset1>_loss"
+  #   ...
+
+logger:
+  log_dir: "logs"  # Base directory for all logs
+  wandb_enabled: true # Make sure you do a ``wandb login [Your API key]'' before running
+  tensorboard_enabled: true
+  mlflow_enabled: false
+  swanlab_enabled: false # Disable SwanLab logging
+  monitor_gpus: true  # If true, will monitor GPU usage and log to wandb and/or tensorboard
+  wandb:
+    project: "rm-dev"
+    name: "rm-dev-${data.dataset_name}"
+  tensorboard:
+    log_dir: "tb_logs-rm-dev-${data.dataset_name}"
+  gpu_monitoring:
+    collection_interval: 10  # How often to collect GPU usage metrics (in seconds)
+    flush_interval: 10  # How often to flush GPU usage metrics to the loggers (in seconds)
+
+cluster:
+  gpus_per_node: 1
+  num_nodes: 1
diff --git a/examples/configs/sft.yaml b/examples/configs/sft.yaml
index a592321cfe..2482678374 100644
--- a/examples/configs/sft.yaml
+++ b/examples/configs/sft.yaml
@@ -15,23 +15,29 @@ sft:
 checkpointing:
   enabled: true
   checkpoint_dir: "results/sft"
-  metric_name: "val_loss" ## set to null to save most recent k checkpoints
+  metric_name: "val:val_loss" # one of "val:" or "train:" followed by the metric name
   higher_is_better: false
   keep_top_k: 3
   save_period: 10
+  checkpoint_must_save_by: null
 
 policy:
   model_name: "meta-llama/Llama-3.2-1B"
   tokenizer:
     name: ${policy.model_name} ## specify if you'd like to use a tokenizer different from the model's default
+    # chat_template can be a Jinja template string or path to a .jinja file
     chat_template: "{% for message in messages %}{%- if message['role'] == 'system'  %}{{'Context: ' + message['content'].strip()}}{%- elif message['role'] == 'user'  %}{{' Question: ' + message['content'].strip() + ' Answer:'}}{%- elif message['role'] == 'assistant'  %}{{' ' + message['content'].strip()}}{%- endif %}{% endfor %}"
+    chat_template_kwargs: null # can be used to pass kwargs to the chat template, e.g., enable_thinking=true
   train_global_batch_size: 32
   train_micro_batch_size: 1
   max_total_sequence_length: 1024
   precision: "bfloat16"
 
+  offload_optimizer_for_logprob: false
+
   dtensor_cfg:
     enabled: true
+    env_vars: {}
     cpu_offload: False
     sequence_parallel: false
     activation_checkpointing: false
@@ -41,6 +47,8 @@ policy:
 
   dynamic_batching:
     enabled: false
+    train_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.train_micro_batch_size}}
+    sequence_length_round: 64
 
   sequence_packing:
     enabled: False
@@ -68,6 +76,7 @@ policy:
   ## ignored since enabled=false, but needed for testing purposes
   megatron_cfg:
     enabled: false
+    env_vars: {}
     empty_unused_memory_level: 1
     activation_checkpointing: false
     tensor_model_parallel_size: 1
@@ -83,8 +92,12 @@ policy:
     moe_router_dtype: null
     moe_router_load_balancing_type: "aux_loss"
     moe_router_bias_update_rate: 1e-3
+    moe_permute_fusion: false
     #gives ~20% training perf speedup with sequence packing 
-    apply_rope_fusion: True   
+    apply_rope_fusion: True
+    # gives ~25% training perf speedup with sequence packing and apply_rope_fusion
+    bias_activation_fusion: True
+    defer_fp32_logits: False
 
     optimizer:
       optimizer: "adam"
@@ -109,12 +122,16 @@ policy:
 
       clip_grad: ${policy.max_grad_norm}
 
+      # optimizer cpu offload
+      optimizer_cpu_offload: false
+      optimizer_offload_fraction: 0.0
+
     scheduler:
       start_weight_decay: ${policy.megatron_cfg.optimizer.weight_decay}
       end_weight_decay: ${policy.megatron_cfg.optimizer.weight_decay}
       weight_decay_incr_style: "constant"
       lr_decay_style: "constant"
-      lr_decay_iters: null
+      lr_decay_iters: 1000
       lr_warmup_iters: 50
       lr_warmup_init: 4.9999e-6
 
@@ -122,24 +139,52 @@ policy:
       grad_reduce_in_fp32: false
       overlap_grad_reduce: true
       overlap_param_gather: true
-      average_in_collective: true
       data_parallel_sharding_strategy: "optim_grads_params"
-
+      use_custom_fsdp: false
 
 data:
   max_input_seq_length: ${policy.max_total_sequence_length}
-  dataset_name: "squad"
   add_bos: true
   add_eos: true
   add_generation_prompt: false
+  shuffle: true
+  num_workers: 1
+
+  dataset_name: "squad"
+  # You can use custom response datasets for training and validation. For example:
+  #   data:
+  #     dataset_name: ResponseDataset
+  #     train_data_path: <PathToTrainingDataset>  # e.g., /path/to/local/dataset.jsonl or hf_org/hf_dataset_name (HuggingFace)
+  #     val_data_path: <PathToValidationDataset>
+  #     input_key: <QuestionKey>, default is "input"
+  #     output_key: <AnswerKey>, default is "output"
+  #     train_split: <TrainSplit>, default is None  # used for HuggingFace datasets
+  #     val_split: <ValSplit>, default is None  # used for HuggingFace datasets
+  # See https://github.com/NVIDIA-NeMo/RL/blob/main/docs/guides/sft.md#datasets for more details.
+
+  ## unused with squad dataset
+  prompt_file: null
+  split: null
+  output_key: null
+  seed: null
+
+
+  ## OpenAI format specific configs
+  # train_data_path: "/path/to/train.jsonl"  # Path to training data
+  # val_data_path: "/path/to/val.jsonl"      # Path to validation data
+  # chat_key: "messages"                     # Key for messages in the data
+  # system_key: null                         # Key for system message (optional)
+  # system_prompt: null                      # Default system prompt (optional)
+  # tool_key: "tools"                        # Key for tools in the data
+  # use_preserving_dataset: false            # If true, uses PreservingDataset to preserve heterogeneous schemas (e.g., tool calls with varying argument structures)
 
 logger:
   log_dir: "logs"  # Base directory for all logs
   wandb_enabled: true # Make sure you do a ``wandb login [Your API key]'' before running
   tensorboard_enabled: true
   mlflow_enabled: false
+  swanlab_enabled: false # Disable SwanLab logging
   monitor_gpus: true  # If true, will monitor GPU usage and log to wandb and/or tensorboard
-  num_val_samples_to_print: 0  # Number of validation samples to pretty print on terminal
   wandb:
     project: "sft-dev"
     name: "sft-dev-${data.dataset_name}"
diff --git a/examples/configs/sft_openmathinstruct2.yaml b/examples/configs/sft_openmathinstruct2.yaml
index 1f1b88a8a9..8e8df3dc66 100644
--- a/examples/configs/sft_openmathinstruct2.yaml
+++ b/examples/configs/sft_openmathinstruct2.yaml
@@ -12,10 +12,11 @@ sft:
 checkpointing:
   enabled: true
   checkpoint_dir: "results/sft_openmathinstruct2"
-  metric_name: "val_loss"
+  metric_name: "val:val_loss" # one of "val:" or "train:" followed by the metric name
   higher_is_better: false
   keep_top_k: 100
   save_period: 500
+  checkpoint_must_save_by: null
 
 policy:
   model_name: "meta-llama/Llama-3.1-8B"
@@ -26,6 +27,8 @@ policy:
   max_total_sequence_length: 4096
   precision: "bfloat16"
 
+  offload_optimizer_for_logprob: false
+
   dtensor_cfg:
     enabled: true
     cpu_offload: False
@@ -35,11 +38,17 @@ policy:
     context_parallel_size: 1
     custom_parallel_plan: null
 
+  megatron_cfg:
+    enabled: false
+
   dynamic_batching:
     enabled: false
 
   sequence_packing:
     enabled: false
+    train_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.train_micro_batch_size}}
+    algorithm: "modified_first_fit_decreasing"
+    sequence_length_round: 64
 
   # makes the training sequence length divisible by the tensor parallel size
   # this is useful for sequence parallel training
@@ -67,14 +76,15 @@ data:
   add_eos: true
   add_generation_prompt: true
   output_key: 'generated_solution'
+  shuffle: true
 
 logger:
   log_dir: "logs"  # Base directory for all logs
   wandb_enabled: true # Make sure you do a ``wandb login [Your API key]'' before running
   tensorboard_enabled: true
   mlflow_enabled: false
+  swanlab_enabled: false # Disable SwanLab logging
   monitor_gpus: false  # If true, will monitor GPU usage and log to wandb and/or tensorboard
-  num_val_samples_to_print: 0  # Number of validation samples to pretty print on terminal
   wandb:
     project: "sft-dev"
     name: "openmathinstruct-nemorl-1M_train"
diff --git a/examples/configs/sft_openmathinstruct2_megatron.yaml b/examples/configs/sft_openmathinstruct2_megatron.yaml
new file mode 100644
index 0000000000..7e5936f183
--- /dev/null
+++ b/examples/configs/sft_openmathinstruct2_megatron.yaml
@@ -0,0 +1,158 @@
+# SFT Algorithm Configuration
+defaults: sft_openmathinstruct2.yaml
+
+sft:
+  max_num_epochs: 1
+  max_num_steps: 1000000
+  val_period: 500
+  val_batches: 4
+  val_global_batch_size: 128
+  val_micro_batch_size: 1
+  val_at_start: true
+  seed: 42
+
+checkpointing:
+  enabled: true
+  checkpoint_dir: "results/sft_openmathinstruct2"
+  metric_name: "val:val_loss" # one of "val:" or "train:" followed by the metric name
+  higher_is_better: false
+  keep_top_k: 100
+  save_period: 500
+
+policy:
+  model_name: "meta-llama/Llama-3.1-8B"
+  tokenizer:
+    name: meta-llama/Llama-3.1-8B-Instruct
+  train_global_batch_size: 512
+  train_micro_batch_size: 1
+  max_total_sequence_length: 4096
+  precision: "bfloat16"
+
+  dtensor_cfg:
+    enabled: false
+
+  megatron_cfg:
+    activation_checkpointing: false
+    context_parallel_size: 1
+    distributed_data_parallel_config:
+      data_parallel_sharding_strategy: optim_grads_params
+      grad_reduce_in_fp32: true
+      overlap_grad_reduce: true
+      overlap_param_gather: true
+      use_custom_fsdp: false
+    empty_unused_memory_level: 1
+    enabled: true
+    expert_tensor_parallel_size: 1
+    expert_model_parallel_size: 1
+    num_layers_in_first_pipeline_stage: null
+    num_layers_in_last_pipeline_stage: null
+    optimizer:
+      adam_beta1: 0.9
+      adam_beta2: 0.98
+      adam_eps: 1.0e-8
+      bf16: true
+      clip_grad: 0
+      fp16: false
+      lr: 0.00002
+      min_lr: 0.00002
+      optimizer: adam
+      params_dtype: bfloat16
+      sgd_momentum: 0.9
+      use_distributed_optimizer: true
+      use_precision_aware_optimizer: false #true ## TODO: precision aware optim not working with fp8. Is this expected?
+      weight_decay: 0.01
+
+      # optimizer cpu offload
+      optimizer_cpu_offload: false
+      optimizer_offload_fraction: 0.0
+
+      ## recently introduced, our current mcore commit doesn't have this
+      #fp8_recipe: delayed
+
+    pipeline_dtype: bfloat16
+    pipeline_model_parallel_size: 1
+    scheduler:
+      end_weight_decay: 0.01
+      lr_decay_iters: 1000
+      lr_decay_style: constant
+      lr_warmup_init: 0.00001999999
+      lr_warmup_iters: 1
+      start_weight_decay: 0.01
+      weight_decay_incr_style: constant
+    sequence_parallel: false
+    tensor_model_parallel_size: 4 ## TODO: should not need this large TP size
+
+    freeze_moe_router: true
+    moe_router_dtype: "fp64"
+    moe_router_load_balancing_type: "none" # "seq_aux_loss" causes logprob error divergence for grpo
+    moe_router_bias_update_rate: 0.0 # by default, disable bias updates for grpo
+    moe_permute_fusion: false
+    #gives ~20% training perf speedup with sequence packing
+    apply_rope_fusion: True
+    # gives ~25% training perf speedup with sequence packing and apply_rope_fusion
+    bias_activation_fusion: True
+
+    env_vars:
+      PYTORCH_CUDA_ALLOC_CONF: "expandable_segments:False"
+
+    ## fp8 training currently not supported
+    #fp8_cfg:
+    #  enabled: true
+    #  fp8: hybrid
+    #  fp8_recipe: delayed
+    #  fp8_param: true # false gives the following error: "RuntimeError: /TransformerEngine/transformer_engine/common/gemm/cublaslt_gemm.cu:116 in function CanonicalizeGemmInput: Assertion failed: !is_fp8_dtype(ret.Atype). Input A is missing column-wise usage"
+    #  fp8_dot_product_attention: false #true
+    #  fp8_multi_head_attention: false #true
+
+  dynamic_batching:
+    enabled: false
+    train_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.train_micro_batch_size}}
+    sequence_length_round: 64
+
+
+  sequence_packing:
+    enabled: True
+    train_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.train_micro_batch_size}}
+    algorithm: "modified_first_fit_decreasing"
+    sequence_length_round: 64
+
+  # makes the training sequence length divisible by the tensor parallel size
+  # this is useful for sequence parallel training
+  make_sequence_length_divisible_by: ${mul:16, ${policy.megatron_cfg.tensor_model_parallel_size}}
+  max_grad_norm: null
+
+  optimizer: null
+
+data:
+  max_input_seq_length: ${policy.max_total_sequence_length}
+  dataset_name: "openmathinstruct2"
+  prompt_file: examples/prompts/math.txt
+  split: "train_1M"
+  add_bos: true
+  add_eos: true
+  add_generation_prompt: true
+  output_key: 'generated_solution'
+  num_workers: 1
+
+logger:
+  log_dir: "logs"  # Base directory for all logs
+  wandb_enabled: true # Make sure you do a ``wandb login [Your API key]'' before running
+  tensorboard_enabled: true
+  mlflow_enabled: false
+  monitor_gpus: false  # If true, will monitor GPU usage and log to wandb and/or tensorboard
+  wandb:
+    project: "sft-openmathinstruct-megatron"
+    name: "llama8b"
+  tensorboard:
+    log_dir: "tb_logs-openmathinstruct-nemorl-1M_train"
+  mlflow:
+    experiment_name: "sft-dev"
+    run_name: "openmathinstruct-nemorl-1M_train"
+  gpu_monitoring:
+    collection_interval: 10  # How often to collect GPU usage metrics (in seconds)
+    flush_interval: 10  # How often to flush GPU usage metrics to the loggers (in seconds)
+
+cluster:
+  gpus_per_node: 8
+  num_nodes: 2
+                                                  
diff --git a/examples/configs/sft_vlm_3B.yaml b/examples/configs/sft_vlm_3B.yaml
new file mode 100644
index 0000000000..5615e2f99d
--- /dev/null
+++ b/examples/configs/sft_vlm_3B.yaml
@@ -0,0 +1,49 @@
+defaults:
+  - sft.yaml
+
+policy:
+  model_name: "Qwen/Qwen2.5-VL-3B-Instruct"
+  tokenizer:
+    name: ${policy.model_name} ## specify if you'd like to use a tokenizer different from the model's default
+  train_global_batch_size: 16
+  train_micro_batch_size: 1
+  max_total_sequence_length: 1024
+  precision: "bfloat16"
+
+  sequence_packing:
+    enabled: False
+
+checkpointing:
+  enabled: true
+  checkpoint_dir: "results/sft_${policy.model_name}"
+  metric_name: "val:val_loss" # one of "val:" or "train:" followed by the metric name
+  higher_is_better: false
+  keep_top_k: 1
+  save_period: 10
+
+data:
+  max_input_seq_length: ${policy.max_total_sequence_length}
+  dataset_name: "clevr_cogent"
+  add_bos: true
+  add_eos: true
+  add_generation_prompt: false
+  split: trainA
+  prompt_file: null
+
+logger:
+  log_dir: "logs"  # Base directory for all logs
+  wandb_enabled: false # Make sure you do a ``wandb login [Your API key]'' before running
+  tensorboard_enabled: true
+  monitor_gpus: true  # If true, will monitor GPU usage and log to wandb and/or tensorboard
+  wandb:
+    project: "sft-dev"
+    name: "sft-dev-${data.dataset_name}"
+  tensorboard:
+    log_dir: "tb_logs-sft-dev-${data.dataset_name}"
+  gpu_monitoring:
+    collection_interval: 10  # How often to collect GPU usage metrics (in seconds)
+    flush_interval: 10  # How often to flush GPU usage metrics to the loggers (in seconds)
+
+cluster:
+  gpus_per_node: 2
+  num_nodes: 1
diff --git a/examples/configs/vlm_grpo_3B.yaml b/examples/configs/vlm_grpo_3B.yaml
new file mode 100644
index 0000000000..4e21205491
--- /dev/null
+++ b/examples/configs/vlm_grpo_3B.yaml
@@ -0,0 +1,278 @@
+# GRPO Algorithm Configuration
+# Examplar script for running GRPO on Qwen2.5-VL-3B-Instruct
+grpo:
+  num_prompts_per_step: 8
+  num_generations_per_prompt: 16
+  max_rollout_turns: 1 # for multi-turn rollouts. Math Environments just have 1 turn (answering the question)
+  max_num_epochs: 1
+  max_num_steps: 1000000
+  normalize_rewards: true
+  use_leave_one_out_baseline: true
+  val_period: 10
+  val_at_start: false
+  overlong_filtering: false
+  max_val_samples: 256
+  val_batch_size: 256
+  seed: 42
+  use_dynamic_sampling: false
+  batch_multiplier: 1
+  reward_shaping:
+    enabled: false
+    overlong_buffer_length: 512
+    overlong_buffer_penalty: 1
+    max_response_length: ${policy.max_total_sequence_length}
+  reward_scaling:
+    enabled: false
+    source_min: 0.0
+    source_max: 1.0
+    target_min: 0.0
+    target_max: 1.0
+  async_grpo:
+    enabled: false
+    max_trajectory_age_steps: 1
+
+loss_fn:
+  reference_policy_kl_penalty: 0.01
+  # Can be set to k1, k2, k3
+  # For more details, see http://joschu.net/blog/kl-approx.html
+  reference_policy_kl_type: "k3"
+  kl_input_clamp_value: 20.0
+  kl_output_clamp_value: 10.0
+  ratio_clip_min: 0.2
+  ratio_clip_max: 0.2
+  ratio_clip_c: null
+  # (default off) loss formulation improvements (docs/guides/grpo.md#loss)
+  use_on_policy_kl_approximation: false
+  use_importance_sampling_correction: false
+  truncated_importance_sampling_ratio: null
+  token_level_loss: true
+
+checkpointing:
+  enabled: true
+  checkpoint_dir: "results/clevr_grpo_${policy.model_name}"
+  metric_name: "val:accuracy" # one of "val:" or "train:" followed by the metric name
+  higher_is_better: true
+  keep_top_k: 3
+  save_period: 10
+  checkpoint_must_save_by: null
+
+policy:
+  model_name: "Qwen/Qwen2.5-VL-3B-Instruct"
+  tokenizer:
+    name: ${policy.model_name} ## specify if you'd like to use a tokenizer different from the model's default
+  train_global_batch_size: 128
+  train_micro_batch_size: 1
+  generation_batch_size: 32 # Only used when generating using HF backend
+  logprob_batch_size: 4
+  max_total_sequence_length: 2048
+  precision: "bfloat16"
+  offload_optimizer_for_logprob: false # Only useful for non-colocated generation since colocated generation will always offload optimizer to cuda before refit
+
+  dtensor_cfg:
+    _v2: true
+    enabled: true
+    cpu_offload: False
+    sequence_parallel: false
+    activation_checkpointing: false
+    tensor_parallel_size: 1
+    context_parallel_size: 1
+    custom_parallel_plan: null
+
+  megatron_cfg:
+    enabled: false
+    empty_unused_memory_level: 1  # 1 is the minimum recommendation for RL since we almost always need to offload before beginning generation. Setting to 0 is faster, but you are more likely to run out of GPU memory.
+    activation_checkpointing: false
+    converter_type: "Qwen2ForCausalLM"
+    tensor_model_parallel_size: 1
+    expert_tensor_parallel_size: 1
+    expert_model_parallel_size: 1
+    pipeline_model_parallel_size: 1
+    num_layers_in_first_pipeline_stage: null
+    num_layers_in_last_pipeline_stage: null
+    context_parallel_size: 1
+    pipeline_dtype: ${policy.precision}
+    sequence_parallel: false
+    freeze_moe_router: true
+    moe_router_dtype: "fp64"
+    moe_router_load_balancing_type: "none" # "seq_aux_loss" causes logprob error divergence for grpo
+    moe_router_bias_update_rate: 0.0 # by default, disable bias updates for grpo
+    moe_permute_fusion: false
+    #gives ~20% training perf speedup with sequence packing
+    apply_rope_fusion: True
+    # gives ~25% training perf speedup with sequence packing and apply_rope_fusion
+    bias_activation_fusion: True
+    defer_fp32_logits: False
+
+    optimizer:
+      optimizer: "adam"
+      lr: 5.0e-6
+      min_lr: 5.0e-7
+      weight_decay: 0.01
+      bf16: true
+      fp16: false
+      params_dtype: "float32"
+
+      #adam
+      adam_beta1: 0.9
+      adam_beta2: 0.999
+      adam_eps: 1e-8
+
+      #sgd
+      sgd_momentum: 0.9
+
+      #distributed optimizer
+      use_distributed_optimizer: true
+      use_precision_aware_optimizer: true
+
+      clip_grad: ${policy.max_grad_norm}
+
+      # optimizer cpu offload
+      optimizer_cpu_offload: false
+      optimizer_offload_fraction: 0.0
+
+    scheduler:
+      start_weight_decay: ${policy.megatron_cfg.optimizer.weight_decay}
+      end_weight_decay: ${policy.megatron_cfg.optimizer.weight_decay}
+      weight_decay_incr_style: "constant"
+      lr_decay_style: "constant"
+      lr_decay_iters: 1000
+      lr_warmup_iters: 13
+      lr_warmup_init: 5.0e-7
+
+    distributed_data_parallel_config:
+      grad_reduce_in_fp32: false
+      overlap_grad_reduce: true
+      overlap_param_gather: true
+      use_custom_fsdp: false
+      data_parallel_sharding_strategy: "optim_grads_params"
+
+
+  # dynamic_batching improves performance by ensuring logprob and training microbatches
+  # have a sufficent number of tokens to maximize GPU utilization. Specifically, variable length
+  # responses are sorted by sequence length and bucketed into microbatches with a total
+  # amount of tokens is approximately close to 'train_mb_tokens' and 'logprob_mb_tokens' for the
+  # training and logprob stages respectively.
+  dynamic_batching:
+    enabled: True
+    train_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.train_micro_batch_size}}
+    logprob_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.logprob_batch_size}}
+    sequence_length_round: 64
+
+  # makes the training sequence length divisible by the tensor parallel size
+  # this is useful for sequence parallel training
+  make_sequence_length_divisible_by: ${policy.dtensor_cfg.tensor_parallel_size}
+  max_grad_norm: 1.0
+
+  sequence_packing:
+    enabled: False
+    train_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.train_micro_batch_size}}
+    logprob_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.logprob_batch_size}}
+    algorithm: "modified_first_fit_decreasing"
+    sequence_length_round: 64
+
+  optimizer:
+    name: "torch.optim.AdamW"
+    kwargs:
+      lr: 5e-7
+      weight_decay: 0.01
+      betas: [0.9, 0.999]
+      eps: 1e-8
+      # when using Dtensor, we need to set foreach
+      # and fused to False
+      foreach: False
+      fused: False
+
+  scheduler:
+    - name: "torch.optim.lr_scheduler.LinearLR"
+      kwargs:
+        start_factor: 0.1
+        end_factor: 1.0
+        total_iters: 50
+    - name: "torch.optim.lr_scheduler.ConstantLR"
+      kwargs:
+        factor: 1.0
+        total_iters: 10000000000
+    - milestones: [50]
+
+  generation:
+    backend: "vllm"
+    # max_new_tokens: ${policy.max_total_sequence_length}
+    max_new_tokens: 1024
+    temperature: 1.0
+    top_p: 1.0
+    top_k: null
+    stop_token_ids: null
+    stop_strings: null
+    vllm_cfg:
+      async_engine: false # Only for internal testing, will be enabled by https://github.com/NVIDIA/NeMo-RL/issues/447.
+      precision: ${policy.precision}
+      tensor_parallel_size: 1
+      pipeline_parallel_size: 1
+      expert_parallel_size: 1
+      gpu_memory_utilization: 0.6
+      max_model_len: ${policy.max_total_sequence_length}
+      enforce_eager: False
+      # VLMs require tokenizer to be initialized before generation, so we set skip_tokenizer_init to False.
+      skip_tokenizer_init: False
+    colocated:
+      # true: generation shares training GPUs
+      # false: uses dedicated generation resources
+      enabled: true
+      # only relevant when enabled is false
+      resources:
+        gpus_per_node: null # Decides num gpus to be dedicated to generation when there is one node in the cluster i.e cluster.num_nodes == 1
+        num_nodes: null # Decides number of nodes to be dedicated to generation
+
+data:
+  max_input_seq_length: ${policy.max_total_sequence_length} # upper bound, real truncation occurs at vllm.max_model_len
+  prompt_file: "examples/prompts/clevr_cogent_cot.txt"
+  system_prompt_file: null
+  dataset_name: "clevr-cogent"
+  split: "trainA"
+  shuffle: true
+  num_workers: 1
+
+env:
+  clevr-cogent:
+    num_workers: 8
+    reward_functions:
+      - name: format
+        weight: 0.2
+      - name: exact_alnum
+        weight: 0.8
+  geometry3k:
+    num_workers: 8
+    reward_functions:
+      - name: format
+        weight: 0.1
+      - name: math_expr
+        weight: 0.9
+  refcoco:
+    num_workers: 8
+    reward_functions:
+      - name: format
+        weight: 0.1
+      - name: bbox_giou
+        weight: 0.9
+        kwargs: 
+          giou_penalty_thres: 0.5
+
+logger:
+  log_dir: "logs"  # Base directory for all logs
+  num_val_samples_to_print: 0 # Number of validation samples to pretty print on terminal
+  wandb_enabled: false
+  tensorboard_enabled: true
+  swanlab_enabled: false # Disable SwanLab logging
+  mlflow_enabled: false  # Disable MLflow logging
+  monitor_gpus: false  # If true, will monitor GPU usage and log to wandb and/or tensorboard
+  wandb:
+    project: "grpo-dev"
+    name: "grpo-dev-logger"
+  tensorboard: {}
+  gpu_monitoring:
+    collection_interval: 10  # How often to collect GPU usage metrics (in seconds)
+    flush_interval: 10  # How often to flush GPU usage metrics to the loggers (in seconds)
+
+cluster:
+  gpus_per_node: 2
+  num_nodes: 1
diff --git a/examples/configs/vlm_grpo_3B_megatron.yaml b/examples/configs/vlm_grpo_3B_megatron.yaml
new file mode 100644
index 0000000000..dd206d75ac
--- /dev/null
+++ b/examples/configs/vlm_grpo_3B_megatron.yaml
@@ -0,0 +1,227 @@
+grpo:
+  num_prompts_per_step: 8
+  num_generations_per_prompt: 16
+  max_rollout_turns: 1
+  max_num_epochs: 1
+  max_num_steps: 1000000
+  normalize_rewards: true
+  use_leave_one_out_baseline: true
+  val_period: 10
+  val_at_start: false
+  overlong_filtering: false
+  max_val_samples: 256
+  val_batch_size: 256
+  seed: 42
+  use_dynamic_sampling: false
+  batch_multiplier: 1
+  reward_shaping:
+    enabled: false
+    overlong_buffer_length: 512
+    overlong_buffer_penalty: 1
+    max_response_length: ${policy.max_total_sequence_length}
+  reward_scaling:
+    enabled: false
+    source_min: 0.0
+    source_max: 1.0
+    target_min: 0.0
+    target_max: 1.0
+  async_grpo:
+    enabled: false
+    max_trajectory_age_steps: 1
+loss_fn:
+  reference_policy_kl_penalty: 0.01
+  # Can be set to k1, k2, k3
+  # For more details, see http://joschu.net/blog/kl-approx.html
+  reference_policy_kl_type: "k3"
+  kl_input_clamp_value: 20.0
+  kl_output_clamp_value: 10.0
+  ratio_clip_min: 0.2
+  ratio_clip_max: 0.2
+  ratio_clip_c: null
+  use_on_policy_kl_approximation: false
+  use_importance_sampling_correction: false
+  truncated_importance_sampling_ratio: null
+  token_level_loss: true
+checkpointing:
+  enabled: true
+  checkpoint_dir: results/clevr_grpo_${policy.model_name}
+  metric_name: val:accuracy # one of "val:" or "train:" followed by the metric name
+  higher_is_better: true
+  keep_top_k: 3
+  save_period: 10
+  checkpoint_must_save_by: null
+policy:
+  model_name: Qwen/Qwen2.5-VL-3B-Instruct
+  tokenizer:
+    name: ${policy.model_name}
+  train_global_batch_size: 128
+  train_micro_batch_size: 1
+  generation_batch_size: 32
+  logprob_batch_size: 4
+  max_total_sequence_length: 2048
+  precision: bfloat16
+  offload_optimizer_for_logprob: false # Only useful for non-colocated generation since colocated generation will always offload optimizer to cuda before refit
+  dtensor_cfg:
+    _v2: true
+    enabled: false
+    cpu_offload: false
+    sequence_parallel: false
+    activation_checkpointing: false
+    tensor_parallel_size: 1
+    context_parallel_size: 1
+    custom_parallel_plan: null
+  dynamic_batching:
+    enabled: false
+    train_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.train_micro_batch_size}}
+    logprob_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.logprob_batch_size}}
+    sequence_length_round: 64
+  make_sequence_length_divisible_by: ${policy.megatron_cfg.tensor_model_parallel_size}
+  max_grad_norm: 1.0
+  sequence_packing:
+    enabled: false
+    train_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.train_micro_batch_size}}
+    logprob_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.logprob_batch_size}}
+    algorithm: modified_first_fit_decreasing
+    sequence_length_round: 64
+  scheduler:
+  - name: torch.optim.lr_scheduler.LinearLR
+    kwargs:
+      start_factor: 0.1
+      end_factor: 1.0
+      total_iters: 50
+  - name: torch.optim.lr_scheduler.ConstantLR
+    kwargs:
+      factor: 1.0
+      total_iters: 10000000000
+  - milestones:
+    - 50
+  generation:
+    backend: vllm
+    max_new_tokens: 1024
+    temperature: 1.0
+    top_p: 1.0
+    top_k: null
+    stop_token_ids: null
+    stop_strings: null
+    vllm_cfg:
+      async_engine: false
+      precision: ${policy.precision}
+      tensor_parallel_size: 1
+      pipeline_parallel_size: 1
+      expert_parallel_size: 1
+      gpu_memory_utilization: 0.6
+      max_model_len: ${policy.max_total_sequence_length}
+      enforce_eager: false
+      enable_expert_parallel: false
+      # VLMs require tokenizer to be initialized before generation, so we set skip_tokenizer_init to False.
+      skip_tokenizer_init: False
+    colocated:
+      enabled: true
+      resources:
+        gpus_per_node: null
+        num_nodes: null
+  megatron_cfg:
+    enabled: true
+    empty_unused_memory_level: 1  # 1 is the minimum recommendation for RL since we almost always need to offload before beginning generation. Setting to 0 is faster, but you are more likely to run out of GPU memory.
+    activation_checkpointing: false
+    converter_type: Qwen2ForCausalLM
+    tensor_model_parallel_size: 1
+    expert_tensor_parallel_size: 1
+    expert_model_parallel_size: 1
+    pipeline_model_parallel_size: 1
+    num_layers_in_first_pipeline_stage: null
+    num_layers_in_last_pipeline_stage: null
+    context_parallel_size: 1
+    pipeline_dtype: ${policy.precision}
+    sequence_parallel: false
+    freeze_moe_router: true
+    moe_router_dtype: fp64
+    moe_router_load_balancing_type: none
+    moe_router_bias_update_rate: 0.0
+    moe_permute_fusion: false
+    apply_rope_fusion: true
+    # gives ~25% training perf speedup with sequence packing and apply_rope_fusion
+    bias_activation_fusion: True
+    defer_fp32_logits: False
+    optimizer:
+      optimizer: adam
+      lr: 2.0e-07
+      min_lr: 2.0e-07
+      weight_decay: 0.01
+      bf16: true
+      fp16: false
+      params_dtype: float32
+      adam_beta1: 0.9
+      adam_beta2: 0.999
+      adam_eps: 1.0e-08
+      sgd_momentum: 0.9
+      use_distributed_optimizer: true
+      use_precision_aware_optimizer: true
+      clip_grad: ${policy.max_grad_norm}
+      # optimizer cpu offload
+      optimizer_cpu_offload: false
+      optimizer_offload_fraction: 0.0
+    scheduler:
+      start_weight_decay: ${policy.megatron_cfg.optimizer.weight_decay}
+      end_weight_decay: ${policy.megatron_cfg.optimizer.weight_decay}
+      weight_decay_incr_style: constant
+      lr_decay_style: constant
+      lr_decay_iters: 1000
+      lr_warmup_iters: 50
+      lr_warmup_init: 2.0e-08
+    distributed_data_parallel_config:
+      grad_reduce_in_fp32: false
+      overlap_grad_reduce: false
+      overlap_param_gather: true
+      use_custom_fsdp: false
+      data_parallel_sharding_strategy: optim_grads_params
+data:
+  max_input_seq_length: ${policy.max_total_sequence_length}
+  prompt_file: examples/prompts/clevr_cogent_cot.txt
+  system_prompt_file: null
+  dataset_name: clevr-cogent
+  split: trainA
+  shuffle: true
+  num_workers: 1
+env:
+  clevr-cogent:
+    num_workers: 8
+    reward_functions:
+    - name: format
+      weight: 0.2
+    - name: exact_alnum
+      weight: 0.8
+  geometry3k:
+    num_workers: 8
+    reward_functions:
+    - name: format
+      weight: 0.1
+    - name: math_expr
+      weight: 0.9
+  refcoco:
+    num_workers: 8
+    reward_functions:
+    - name: format
+      weight: 0.1
+    - name: bbox_giou
+      weight: 0.9
+      kwargs:
+        giou_penalty_thres: 0.5
+logger:
+  log_dir: logs
+  num_val_samples_to_print: 0
+  wandb_enabled: false
+  tensorboard_enabled: true
+  swanlab_enabled: false
+  mlflow_enabled: false
+  monitor_gpus: false
+  wandb:
+    project: grpo-dev
+    name: vlm-grpo-3b-megatron
+  tensorboard: {}
+  gpu_monitoring:
+    collection_interval: 10
+    flush_interval: 10
+cluster:
+  gpus_per_node: 2
+  num_nodes: 1
diff --git a/examples/converters/convert_dcp_to_hf.py b/examples/converters/convert_dcp_to_hf.py
index d87d97a64e..6a9018cc20 100644
--- a/examples/converters/convert_dcp_to_hf.py
+++ b/examples/converters/convert_dcp_to_hf.py
@@ -57,12 +57,14 @@ def main():
     # This is more stable than relying on the current NeMo-RL get_tokenizer() which can
     # change release to release.
     tokenizer_name_or_path = config["policy"]["model_name"]
+    hf_overrides = config["policy"].get("hf_overrides", {}) or {}
 
     hf_ckpt = convert_dcp_to_hf(
         dcp_ckpt_path=args.dcp_ckpt_path,
         hf_ckpt_path=args.hf_ckpt_path,
         model_name_or_path=model_name_or_path,
         tokenizer_name_or_path=tokenizer_name_or_path,
+        hf_overrides=hf_overrides,
     )
     print(f"Saved HF checkpoint to: {hf_ckpt}")
 
diff --git a/examples/converters/convert_megatron_to_hf.py b/examples/converters/convert_megatron_to_hf.py
index ea4501286e..1a3bef0bee 100644
--- a/examples/converters/convert_megatron_to_hf.py
+++ b/examples/converters/convert_megatron_to_hf.py
@@ -18,6 +18,13 @@
 
 from nemo_rl.models.megatron.community_import import export_model_from_megatron
 
+""" NOTE: this script requires mcore. Make sure to launch with the mcore extra:
+uv run --extra mcore python examples/converters/convert_megatron_to_hf.py \
+  --config <path_to_ckpt>/config.yaml \
+  --megatron-ckpt-path <path_to_ckpt>/policy/weights/iter_xxxxx \
+  --hf-ckpt-path <path_to_save_hf_ckpt>
+"""
+
 
 def parse_args():
     """Parse command line arguments."""
@@ -54,12 +61,14 @@ def main():
 
     model_name = config["policy"]["model_name"]
     tokenizer_name = config["policy"]["tokenizer"]["name"]
+    hf_overrides = config["policy"].get("hf_overrides", {}) or {}
 
     export_model_from_megatron(
         hf_model_name=model_name,
         input_path=args.megatron_ckpt_path,
         output_path=args.hf_ckpt_path,
         hf_tokenizer_path=tokenizer_name,
+        hf_overrides=hf_overrides,
     )
 
 
diff --git a/examples/custom_parallel.py b/examples/custom_parallel.py
index 647ddfc563..487ce008e6 100644
--- a/examples/custom_parallel.py
+++ b/examples/custom_parallel.py
@@ -26,3 +26,37 @@
     "model.layers.*.mlp.down_proj": RowwiseParallel(),
     "lm_head": ColwiseParallel(output_layouts=Shard(-1), use_local_output=False),
 }
+
+"""
+Note on numerical stability:
+
+- Default plans that keep attention output proj and mlp downproj RowwiseParallel are numerically
+  unstable and tend to increase with larger TP (e.g., TP >= 4).
+
+Enable this custom plan via:
+
+- policy.dtensor_cfg.custom_parallel_plan=examples.custom_parallel.qwen_model_tp_plan_stable
+
+Based on https://github.com/NVIDIA-NeMo/Automodel/blob/d79ccb94b0eca94a4c479313db2f9eee80db0139/nemo_automodel/components/distributed/optimized_tp_plans.py#L205-L217
+"""
+qwen_model_tp_plan_stable = {
+    "lm_head": ColwiseParallel(output_layouts=Shard(-1), use_local_output=False),
+    "model.embed_tokens": RowwiseParallel(
+        input_layouts=Replicate(),
+    ),
+    "model.layers.*.self_attn.q_proj": ColwiseParallel(),
+    "model.layers.*.self_attn.k_proj": ColwiseParallel(),
+    "model.layers.*.self_attn.v_proj": ColwiseParallel(),
+    "model.layers.*.self_attn.o_proj": ColwiseParallel(
+        input_layouts=Shard(-1),
+        output_layouts=Replicate(),
+        use_local_output=True,
+    ),
+    "model.layers.*.mlp.up_proj": ColwiseParallel(),
+    "model.layers.*.mlp.gate_proj": ColwiseParallel(),
+    "model.layers.*.mlp.down_proj": ColwiseParallel(
+        input_layouts=Shard(-1),
+        output_layouts=Replicate(),
+        use_local_output=True,
+    ),
+}
diff --git a/examples/penguin/grpo_dapo17k_bytedtsinghua_qwen3_4binstruct_nf.yaml b/examples/penguin/grpo_dapo17k_bytedtsinghua_qwen3_4binstruct_nf.yaml
new file mode 100644
index 0000000000..9fba418322
--- /dev/null
+++ b/examples/penguin/grpo_dapo17k_bytedtsinghua_qwen3_4binstruct_nf.yaml
@@ -0,0 +1,273 @@
+grpo:
+  max_num_epochs: 1
+  num_prompts_per_step: 64
+  num_generations_per_prompt: 16
+  max_rollout_turns: 1 # for multi-turn rollouts. Math Environments just have 1 turn (answering the question)
+  max_num_steps: 1000000
+  normalize_rewards: true
+  use_leave_one_out_baseline: true
+  val_period: 10
+  val_at_start: true
+  overlong_filtering: false
+  max_val_samples: null   # inferred from size of val dataset. for multi evals, repeat val ds via `num_repeats` in `ng_prepare_data`.
+  val_batch_size: null
+  seed: 42
+  use_dynamic_sampling: false
+  dynamic_sampling_max_gen_batches: 10
+  batch_multiplier: 1
+  reward_shaping:
+    enabled: false
+    overlong_buffer_length: 128
+    overlong_buffer_penalty: 1
+    max_response_length: ${policy.max_total_sequence_length}
+  reward_scaling:
+    enabled: false
+    source_min: 0.0
+    source_max: 1.0
+    target_min: 0.0
+    target_max: 1.0
+  skip_reference_policy_logprobs_calculation: true
+
+loss_fn:
+  reference_policy_kl_penalty: 0
+  reference_policy_kl_type: "k3"
+  kl_input_clamp_value: 20.0
+  kl_output_clamp_value: 10.0
+  ratio_clip_min: 0.2
+  ratio_clip_max: 0.2
+  ratio_clip_c: null
+  # (default off) loss formulation improvements (docs/guides/grpo.md#loss)
+  use_on_policy_kl_approximation: false
+  truncated_importance_sampling_ratio: null
+  use_importance_sampling_correction: false
+  token_level_loss: true
+
+checkpointing:
+  enabled: true
+  checkpoint_dir: "results/grpo"
+  metric_name: "val:accuracy"
+  higher_is_better: true
+  keep_top_k: 3
+  save_period: 1
+  checkpoint_must_save_by: null
+
+policy:
+  model_name: "Qwen/Qwen3-4B-Instruct-2507"
+  tokenizer:
+    name: ${policy.model_name} ## specify if you'd like to use a tokenizer different from the model's default
+    chat_template_kwargs: null # can be used to pass kwargs to the chat template, e.g., enable_thinking=true
+  hf_config_overrides: {}
+  train_global_batch_size: ${mul:${grpo.num_prompts_per_step}, ${grpo.num_generations_per_prompt}}  # Match the total rollouts per step
+  train_micro_batch_size: 1
+  logprob_batch_size: 1
+  generation_batch_size: 32 # Only used when generating using HF backend
+  max_total_sequence_length: 32768
+  precision: "bfloat16"
+  logprob_chunk_size: 1024
+
+  dtensor_cfg:
+    _v2: false
+    enabled: true
+    cpu_offload: False
+    sequence_parallel: false
+    activation_checkpointing: true
+    tensor_parallel_size: 2
+    context_parallel_size: 1
+    custom_parallel_plan: null
+    clear_cache_every_n_steps: null
+  
+  megatron_cfg:
+    enabled: false
+    # We might want to consider setting this value higher (e.g. to 1) and raising the vllm generation max mem utilization
+    empty_unused_memory_level: 0
+    activation_checkpointing: true
+    converter_type: "Qwen2ForCausalLM"  # Apparently this is comptible with Qwen 3 dense models.
+    tensor_model_parallel_size: 1
+    expert_tensor_parallel_size: 1
+    expert_model_parallel_size: 1
+    pipeline_model_parallel_size: 1
+    num_layers_in_first_pipeline_stage: null
+    num_layers_in_last_pipeline_stage: null
+    context_parallel_size: 1
+    pipeline_dtype: ${policy.precision}
+    sequence_parallel: false
+    freeze_moe_router: true
+    moe_router_dtype: "fp64"
+    moe_router_load_balancing_type: "none" # "seq_aux_loss" causes logprob error divergence for grpo
+    moe_router_bias_update_rate: 0.0 # by default, disable bias updates for grpo
+    #gives ~20% training perf speedup with sequence packing
+    apply_rope_fusion: True
+    defer_fp32_logits: true
+    moe_permute_fusion: false
+    bias_activation_fusion: True
+
+    optimizer:
+      optimizer: "adam"
+      lr: 5.0e-6
+      min_lr: 5.0e-7
+      weight_decay: 0.01
+      bf16: true
+      fp16: false
+      params_dtype: "float32"
+
+      #adam
+      adam_beta1: 0.9
+      adam_beta2: 0.999
+      adam_eps: 1e-8
+
+      #sgd
+      sgd_momentum: 0.9
+
+      #distributed optimizer
+      use_distributed_optimizer: true
+      use_precision_aware_optimizer: true
+
+      # optimizer cpu offload
+      optimizer_cpu_offload: false
+      optimizer_offload_fraction: 0.0
+
+      clip_grad: ${policy.max_grad_norm}
+
+    scheduler:
+      start_weight_decay: ${policy.megatron_cfg.optimizer.weight_decay}
+      end_weight_decay: ${policy.megatron_cfg.optimizer.weight_decay}
+      weight_decay_incr_style: "constant"
+      lr_decay_style: "constant"
+      lr_decay_iters: null
+      lr_warmup_iters: 13
+      lr_warmup_init: 5.0e-7
+
+    distributed_data_parallel_config:
+      grad_reduce_in_fp32: false
+      overlap_grad_reduce: true
+      overlap_param_gather: true
+      use_custom_fsdp: false
+      data_parallel_sharding_strategy: "optim_grads_params"
+
+    env_vars: null
+
+  # See docs/design-docs/sequence-packing-and-dynamic-batching.md 
+  # for more details on dynamic batching and sequence packing.
+  dynamic_batching:
+    enabled: False
+    train_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.train_micro_batch_size}}
+    logprob_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.logprob_batch_size}}
+    sequence_length_round: 64
+
+  sequence_packing:
+    enabled: false
+    train_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.train_micro_batch_size}}
+    logprob_mb_tokens: ${mul:${policy.max_total_sequence_length}, ${policy.logprob_batch_size}}
+    algorithm: "modified_first_fit_decreasing"
+    sequence_length_round: 64
+
+  # makes the training sequence length divisible by the tensor parallel size
+  # this is useful for sequence parallel training
+  make_sequence_length_divisible_by: ${policy.dtensor_cfg.tensor_parallel_size}
+  max_grad_norm: 1.0
+
+  optimizer:
+    name: "torch.optim.AdamW"
+    kwargs:
+      lr: 1.0e-6
+      weight_decay: 0.01
+      betas: [0.9, 0.999]
+      eps: 1e-8
+      # when using Dtensor, we need to set foreach
+      # and fused to False
+      foreach: False
+      fused: False
+
+  scheduler:
+    - name: "torch.optim.lr_scheduler.ConstantLR"
+      kwargs:
+        factor: 1.0
+        total_iters: 10000000000
+    - milestones: []
+
+  generation:
+    backend: "vllm"
+    max_new_tokens: ${policy.max_total_sequence_length}
+    temperature: 1.0
+    top_p: 1.0
+    top_k: null
+    stop_token_ids: null
+    stop_strings: null
+    vllm_cfg:
+      async_engine: true
+      precision: ${policy.precision}
+      tensor_parallel_size: 1
+      pipeline_parallel_size: 1
+      enable_expert_parallel: false
+      expert_parallel_size: 1
+      gpu_memory_utilization: 0.8
+      max_model_len: ${policy.max_total_sequence_length}
+      enforce_eager: false
+      use_deep_gemm: False
+      num_last_layers_in_bf16: 0
+      num_first_layers_in_bf16: 0
+      expose_http_server: true
+      skip_tokenizer_init: false
+      http_server_serving_chat_kwargs:
+        # This is the tool parser for Qwen 3 4B Instruct. This needs to be changed for other models.
+        enable_auto_tools: true
+        tool_parser: hermes
+        # Enable the appropriate reasoning parser here. Since this model is an instruct model, we comment it out.
+        # reasoning_parser: deepseek_r1
+    vllm_kwargs:
+      compilation_config:
+        # when enforce_eager is False, set ++policy.generation.vllm_kwargs.compilation_config.use_inductor=False for better accuracy,
+        # with the flag, vllm will use the custom CUDA kernels instead of the Triton kernels generated by torch.compile
+        # for more details, see convergence issue https://github.com/NVIDIA-NeMo/RL/issues/998
+        use_inductor: False
+    colocated:
+      # true: generation shares training GPUs
+      # false: uses dedicated generation resources
+      enabled: true
+      # only relevant when enabled is false
+      resources:
+        gpus_per_node: null # Decides num gpus to be dedicated to generation when there is one node in the cluster i.e cluster.num_nodes == 1
+        num_nodes: null # Decides number of nodes to be dedicated to generation
+
+data:
+  train_jsonl_fpath: 3rdparty/Penguin-workspace/Penguin/data/bytedtsinghua_dapo17k/train.jsonl
+  validation_jsonl_fpath: 3rdparty/Penguin-workspace/Penguin/data/bytedtsinghua_dapo17k/validation.jsonl
+  shuffle: true
+  num_workers: 0
+
+env:
+  should_use_penguin: true
+  should_log_penguin_responses: true  # If you have low logging storage, set this to false
+  penguin:  # This is passed into Penguin as the initial_global_config_dict
+    config_paths:
+    - responses_api_models/vllm_model/configs/vllm_model_for_training.yaml  # Required! And it must be *for_training
+    - resources_servers/library_judge_math/configs/library_judge_math.yaml
+    library_judge_math:
+      resources_servers:
+        library_judge_math:
+          judge_model_server:
+            name: policy_model
+          should_use_judge: false
+
+logger:
+  log_dir: "logs"  # Base directory for all logs
+  num_val_samples_to_print: 0 # Number of validation samples to pretty print on terminal
+  wandb_enabled: true
+  tensorboard_enabled: false
+  mlflow_enabled: false  # Disable MLflow logging
+  swanlab_enabled: false
+  monitor_gpus: true  # If true, will monitor GPU usage and log to wandb and/or tensorboard
+  wandb:
+    project: "grpo-dev"
+    name: "grpo-dev-logger"
+  tensorboard: {}
+  mlflow:
+    experiment_name: "grpo-dev"
+    run_name: "grpo-dev-logger"
+  gpu_monitoring:
+    collection_interval: 10  # How often to collect GPU usage metrics (in seconds)
+    flush_interval: 10  # How often to flush GPU usage metrics to the loggers (in seconds)
+
+cluster:
+  gpus_per_node: 8
+  num_nodes: 8
diff --git a/examples/penguin/run_grpo_penguin.py b/examples/penguin/run_grpo_penguin.py
new file mode 100644
index 0000000000..96d33e9528
--- /dev/null
+++ b/examples/penguin/run_grpo_penguin.py
@@ -0,0 +1,297 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import json
+import os
+import pprint
+from itertools import chain, repeat
+from typing import Optional
+
+# Increase the W&B single object size warning threshold. Initially 100_000 (100 KB) -> 10_000_000 (10 MB)
+import wandb.util
+
+wandb.util.VALUE_BYTES_LIMIT = 10_000_000
+
+import ray
+from omegaconf import OmegaConf
+from wandb import Table
+
+from nemo_rl.algorithms.grpo import (
+    ColocatablePolicyInterface,
+    EnvironmentInterface,
+    GenerationInterface,
+    Logger,
+    MasterConfig,
+    StatefulDataLoader,
+    TokenizerType,
+    _should_use_penguin,
+    grpo_train,
+    refit_policy_generation,
+    setup,
+)
+from nemo_rl.algorithms.utils import get_tokenizer
+from nemo_rl.data.datasets import AllTaskProcessedDataset
+from nemo_rl.data.interfaces import DatumSpec
+from nemo_rl.distributed.ray_actor_environment_registry import (
+    get_actor_python_env,
+)
+from nemo_rl.distributed.virtual_cluster import init_ray
+from nemo_rl.environments.penguin import (
+    Penguin,
+    PenguinConfig,
+    penguin_example_to_nemo_rl_datum_spec,
+    setup_penguin_config,
+)
+from nemo_rl.experience.rollouts import run_async_penguin_rollout
+from nemo_rl.models.generation import configure_generation_config
+from nemo_rl.utils.config import load_config, parse_hydra_overrides
+from nemo_rl.utils.logger import get_next_experiment_dir
+
+OmegaConf.register_new_resolver("mul", lambda a, b: a * b)
+
+
+def parse_args() -> tuple[argparse.Namespace, list[str]]:
+    """Parse command line arguments."""
+    parser = argparse.ArgumentParser(description="Run GRPO training with configuration")
+    parser.add_argument(
+        "--config", type=str, default=None, help="Path to YAML config file"
+    )
+
+    # Parse known args for the script
+    args, overrides = parser.parse_known_args()
+
+    return args, overrides
+
+
+def setup_single_penguin_dataset(
+    jsonl_fpath: str, tokenizer, num_repeats: Optional[int] = None
+):
+    with open(jsonl_fpath) as f:
+        penguin_examples = list(map(json.loads, f))
+
+    print(f"Loaded data at {jsonl_fpath}. Found {len(penguin_examples)} examples")
+
+    if num_repeats:
+        previous_length = len(penguin_examples)
+        penguin_examples = list(
+            chain.from_iterable(
+                repeat(penguin_example, num_repeats)
+                for penguin_example in penguin_examples
+            )
+        )
+        print(
+            f"Repeating examples (in a pattern of abc to aabbcc) for {jsonl_fpath} from {previous_length} to {len(penguin_examples)}!"
+        )
+
+    nemo_rl_compatible_examples: list[DatumSpec] = [
+        penguin_example_to_nemo_rl_datum_spec(penguin_example, idx)
+        for idx, penguin_example in enumerate(penguin_examples)
+    ]
+
+    passthrough_task_processor = lambda datum_dict, *args, **kwargs: datum_dict
+    return AllTaskProcessedDataset(
+        nemo_rl_compatible_examples,
+        tokenizer,
+        None,
+        passthrough_task_processor,
+    )
+
+
+# These types are directly imported from grpo_train since if something about the architecture changes we want to immediately fail.
+def collect_trajectories(
+    policy: ColocatablePolicyInterface,
+    policy_generation: GenerationInterface,
+    val_dataloader: StatefulDataLoader,
+    tokenizer: TokenizerType,
+    val_task_to_env: dict[str, EnvironmentInterface],
+    logger: Logger,
+    master_config: MasterConfig,
+) -> None:
+    """Run trajectory collection."""
+    # common config/state items
+    colocated_inference = master_config["policy"]["generation"]["colocated"]["enabled"]
+    refit_policy_generation(policy, policy_generation, colocated_inference)
+
+    log_filename = "trajectory_collection.jsonl"
+
+    print("\n🔍 Running trajectory collection...", flush=True)
+    generation_config = master_config["policy"]["generation"]
+    for val_batch in val_dataloader:
+        penguin_rollout_result = run_async_penguin_rollout(
+            policy_generation=policy_generation,
+            input_batch=val_batch,
+            tokenizer=tokenizer,
+            task_to_env=val_task_to_env,
+            max_seq_len=None,
+            generation_config=generation_config,
+            max_rollout_turns=None,
+            greedy=False,
+        )
+
+        rows_to_log: list[str] = []
+        for key, value in penguin_rollout_result.rollout_metrics.items():
+            if "full_result" not in key:
+                continue
+
+            value: Table
+            data: list[list[str]] = value.data  # (n, 1)
+            rows_to_log.extend(v[0] for v in data)
+
+        logger.log_string_list_as_jsonl(rows_to_log, log_filename)
+
+        # TODO: eventually as trajectory collection use cases exceed 4 hours, we can leverage the dataloader save functionality to resume
+        # And also leverage the TimeoutChecker functionality as well
+
+    policy_generation.finish_generation()
+
+
+def main() -> None:
+    """Main entry point."""
+    # Parse arguments
+    args, overrides = parse_args()
+
+    if not args.config:
+        args.config = os.path.join(
+            os.path.dirname(__file__),
+            "grpo_dapo17k_bytedtsinghua_qwen3_4binstruct_nf.yaml",
+        )
+
+    config = load_config(args.config)
+    print(f"Loaded configuration from: {args.config}")
+
+    if overrides:
+        print(f"Overrides: {overrides}")
+        config = parse_hydra_overrides(config, overrides)
+
+    config: MasterConfig = OmegaConf.to_container(config, resolve=True)
+    print("Applied CLI overrides")
+
+    # Get the next experiment directory with incremented ID
+    config["logger"]["log_dir"] = get_next_experiment_dir(config["logger"]["log_dir"])
+    print(f"📊 Using log directory: {config['logger']['log_dir']}")
+    if config["checkpointing"]["enabled"]:
+        print(
+            f"📊 Using checkpoint directory: {config['checkpointing']['checkpoint_dir']}"
+        )
+
+    # setup tokenizer
+    tokenizer = get_tokenizer(config["policy"]["tokenizer"])
+    assert config["policy"]["generation"] is not None, (
+        "A generation config is required for GRPO"
+    )
+    config["policy"]["generation"] = configure_generation_config(
+        config["policy"]["generation"], tokenizer
+    )
+
+    # Penguin specific config setup.
+    setup_penguin_config(config, tokenizer)
+
+    # We assert here since this is right after the final config has been materialized.
+    assert _should_use_penguin(config)
+
+    print("\n▶ Setting up data...")
+    train_dataset = setup_single_penguin_dataset(
+        jsonl_fpath=config["data"]["train_jsonl_fpath"],
+        tokenizer=tokenizer,
+    )
+    val_dataset = setup_single_penguin_dataset(
+        jsonl_fpath=config["data"]["validation_jsonl_fpath"],
+        tokenizer=tokenizer,
+    )
+
+    # Validation dataset config setup.
+    if config["grpo"]["max_val_samples"] is not None:
+        raise ValueError(
+            """A non-null `grpo.max_val_samples` parameter is not supported.
+
+Gym principle is that there is no hidden data pre or post processing from you. What you see is what you get.
+
+The validation set you pass in will directly be used for validation with no additional preprocessing. If you want to have some number of repetitions, please include that in your dataset, via ``num_repeats``, in your dataset config and `ng_prepare_data` will prepare it accordingly."""
+        )
+
+    print(
+        f"Setting `grpo.max_val_samples` and `grpo.val_batch_size` to the length of the validation dataset, which is {len(val_dataset)}"
+    )
+    config["grpo"]["max_val_samples"] = len(val_dataset)
+    config["grpo"]["val_batch_size"] = config["grpo"]["max_val_samples"]
+
+    # Print config
+    print("Final config:")
+    pprint.pprint(config)
+
+    init_ray()
+
+    (
+        policy,
+        policy_generation,
+        cluster,
+        dataloader,
+        val_dataloader,
+        loss_fn,
+        logger,
+        checkpointer,
+        grpo_state,
+        master_config,
+    ) = setup(config, tokenizer, train_dataset, val_dataset)
+
+    is_trajectory_collection = (
+        config["env"]["penguin"].pop("is_trajectory_collection", False) or False
+    )
+    penguin_config = PenguinConfig(
+        model_name=policy_generation.cfg["model_name"],
+        base_urls=policy_generation.dp_openai_server_base_urls,
+        initial_global_config_dict=config["env"]["penguin"],
+    )
+    penguin = Penguin.options(
+        runtime_env={
+            "py_executable": get_actor_python_env(
+                "nemo_rl.environments.penguin.Penguin"
+            ),
+        }
+    ).remote(penguin_config)
+    # Blocking wait for penguin to spin up
+    ray.get(penguin.health_check.remote())
+    task_to_env = {"penguin": penguin}
+    val_task_to_env = task_to_env
+
+    if is_trajectory_collection:
+        collect_trajectories(
+            policy=policy,
+            policy_generation=policy_generation,
+            val_dataloader=val_dataloader,
+            tokenizer=tokenizer,
+            val_task_to_env=val_task_to_env,
+            logger=logger,
+            master_config=master_config,
+        )
+    else:
+        grpo_train(
+            policy,
+            policy_generation,
+            dataloader,
+            val_dataloader,
+            tokenizer,
+            loss_fn,
+            task_to_env,
+            val_task_to_env,
+            logger,
+            checkpointer,
+            grpo_state,
+            master_config,
+        )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/penguin/run_penguin_single_node_sanity_tests.sh b/examples/penguin/run_penguin_single_node_sanity_tests.sh
new file mode 100755
index 0000000000..1337cf3102
--- /dev/null
+++ b/examples/penguin/run_penguin_single_node_sanity_tests.sh
@@ -0,0 +1,33 @@
+# Fail on errors
+set -e
+
+uv sync --group={build,docs,dev,test} --extra penguin
+
+# Stop pesky previous Ray servers that may have not been able to spin down from previous users.
+uv run ray stop --force
+uv run python -c "import ray; ray.shutdown()"
+
+# The first time I ran this, it took roughly 5 mins to setup the vLLM deps.
+# This took me 2-3 mins to run this one test.
+# NeMo RL test. This should pass no matter what the Gym setup is.
+./tests/run_unit.sh unit/models/generation/test_vllm_generation.py::test_vllm_generate_text
+
+# NeMo Gym uses an OpenAI compatible endpoint under the hood. This tests the implementation for this server.
+./tests/run_unit.sh unit/models/generation/test_vllm_generation.py::test_vllm_http_server
+
+# NeMo Gym communicates not using token ids, but in OpenAI schema. There are some edge cases we need to handle (e.g. token merging upon retokenization, multiple most efficient retokenizations, etc).
+./tests/run_unit.sh unit/models/generation/test_vllm_generation.py::test_VllmAsyncGenerationWorker_replace_prefix_tokens
+./tests/run_unit.sh unit/models/generation/test_vllm_generation.py::test_replace_prefix_tokens_empty_model_prefix_returns_template
+./tests/run_unit.sh unit/models/generation/test_vllm_generation.py::test_replace_prefix_tokens_missing_eos_in_template_prefix_raises
+./tests/run_unit.sh unit/models/generation/test_vllm_generation.py::test_replace_prefix_tokens_tokenizer_without_eos_raises
+./tests/run_unit.sh unit/models/generation/test_vllm_generation.py::test_replace_prefix_tokens_uses_last_eos_in_template_prefix
+./tests/run_unit.sh unit/models/generation/test_vllm_generation.py::test_vllm_http_server_correct_merged_tokens_matches_baseline
+
+# NeMo RL test. This should pass no matter what the Gym setup is.
+./tests/run_unit.sh unit/environments/test_math_environment.py::test_math_env_step_basic
+
+# NeMo Gym integrates directly into NeMo RL as an Environment since that is the cleanest way. This tests the NeMo Gym integration logic and correctness.
+./tests/run_unit.sh unit/environments/test_penguin.py::test_penguin_sanity
+
+# NeMo Gym uses a separate rollout loop inside grpo_train in NeMo RL. This tests the e2e rollout functionality and correctness.
+./tests/run_unit.sh unit/experience/test_rollouts.py::test_run_async_penguin_rollout
diff --git a/examples/prompts/clevr_cogent_cot.txt b/examples/prompts/clevr_cogent_cot.txt
new file mode 100644
index 0000000000..0139bd3374
--- /dev/null
+++ b/examples/prompts/clevr_cogent_cot.txt
@@ -0,0 +1,5 @@
+Think step-by-step to solve the following problem, and answer in the following format: <think> step-by-step thought process </think> <answer> final answer </answer>
+Note that your final answer must only contain a single numerical output (e.g. 2, 12, 45) for numerical solutions, and only the strings "yes" or "no" for yes/no type questions.
+
+Let's think step-by-step: 
+{}
diff --git a/examples/prompts/geo3k.txt b/examples/prompts/geo3k.txt
new file mode 100644
index 0000000000..25e57bb46b
--- /dev/null
+++ b/examples/prompts/geo3k.txt
@@ -0,0 +1,5 @@
+Think step-by-step to solve the following math problem, and answer in the following format: <think> step-by-step thought process </think> <answer> only answer here </answer>.
+Note that your final answer must only contain a number or mathematical expression 
+
+Let's think step-by-step: 
+{}
diff --git a/examples/prompts/refcoco.txt b/examples/prompts/refcoco.txt
new file mode 100644
index 0000000000..d6353a003b
--- /dev/null
+++ b/examples/prompts/refcoco.txt
@@ -0,0 +1,5 @@
+Think step-by-step to solve the following problem, and answer in the following format: <think> step-by-step thought process </think> <answer> [x1, y1, x2, y2] </answer> 
+Your answer must contain a bounding box with the following format: [x1, y1, x2, y2] where x1, y1 are the top left coordinates, and x2, y2 are the bottom right coordinates. The coordinates should be normalized from [0, 1000].
+
+Let's think step-by-step: 
+{}
diff --git a/examples/run_distillation_math.py b/examples/run_distillation_math.py
new file mode 100644
index 0000000000..60f0632706
--- /dev/null
+++ b/examples/run_distillation_math.py
@@ -0,0 +1,197 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+from collections import defaultdict
+from typing import Any, Optional
+
+from omegaconf import OmegaConf
+from transformers import PreTrainedTokenizerBase
+
+from nemo_rl.algorithms.distillation import MasterConfig, distillation_train, setup
+from nemo_rl.algorithms.utils import get_tokenizer
+from nemo_rl.data import DataConfig
+from nemo_rl.data.datasets import AllTaskProcessedDataset, load_response_dataset
+from nemo_rl.data.interfaces import (
+    TaskDataProcessFnCallable,
+    TaskDataSpec,
+)
+from nemo_rl.data.processors import math_hf_data_processor
+from nemo_rl.distributed.ray_actor_environment_registry import (
+    get_actor_python_env,
+)
+from nemo_rl.distributed.virtual_cluster import init_ray
+from nemo_rl.environments.interfaces import EnvironmentInterface
+from nemo_rl.environments.math_environment import MathEnvironment
+from nemo_rl.models.generation import configure_generation_config
+from nemo_rl.utils.config import load_config, parse_hydra_overrides
+from nemo_rl.utils.logger import get_next_experiment_dir
+
+OmegaConf.register_new_resolver("mul", lambda a, b: a * b)
+
+
+def parse_args() -> tuple[argparse.Namespace, list[str]]:
+    """Parse command line arguments."""
+    parser = argparse.ArgumentParser(
+        description="Run distillation training with configuration"
+    )
+    parser.add_argument(
+        "--config", type=str, default=None, help="Path to YAML config file"
+    )
+
+    # Parse known args for the script
+    args, overrides = parser.parse_known_args()
+
+    return args, overrides
+
+
+# ===============================================================================
+#                             Math Data Processor
+# ===============================================================================
+TokenizerType = PreTrainedTokenizerBase
+
+
+def setup_data(
+    tokenizer: TokenizerType,
+    data_config: DataConfig,
+    env_configs: dict[str, Any],
+    seed: int,
+) -> tuple[
+    AllTaskProcessedDataset,
+    Optional[AllTaskProcessedDataset],
+    dict[str, EnvironmentInterface],
+    dict[str, EnvironmentInterface],
+]:
+    print("\n▶ Setting up data...")
+    math_task_spec = TaskDataSpec(
+        task_name="math",
+        prompt_file=data_config["prompt_file"],
+        system_prompt_file=data_config["system_prompt_file"],
+    )
+
+    # load dataset
+    data: Any = load_response_dataset(data_config, seed)
+
+    # data processor
+    task_data_processors: dict[str, tuple[TaskDataSpec, TaskDataProcessFnCallable]] = (
+        defaultdict(lambda: (math_task_spec, math_hf_data_processor))
+    )
+    task_data_processors["math"] = (math_task_spec, math_hf_data_processor)
+
+    # setup math environment
+    math_env = MathEnvironment.options(  # type: ignore # it's wrapped with ray.remote
+        runtime_env={
+            "py_executable": get_actor_python_env(
+                "nemo_rl.environments.math_environment.MathEnvironment"
+            ),
+            "env_vars": dict(os.environ),  # Pass thru all user environment variables
+        }
+    ).remote(env_configs["math"])
+
+    dataset = AllTaskProcessedDataset(
+        data.formatted_ds["train"],
+        tokenizer,
+        math_task_spec,
+        task_data_processors,
+        max_seq_length=data_config["max_input_seq_length"],
+    )
+
+    val_dataset: Optional[AllTaskProcessedDataset] = None
+    if data.formatted_ds["validation"]:
+        val_dataset = AllTaskProcessedDataset(
+            data.formatted_ds["validation"],
+            tokenizer,
+            math_task_spec,
+            task_data_processors,
+            max_seq_length=data_config["max_input_seq_length"],
+        )
+    else:
+        val_dataset = None
+
+    task_to_env: dict[str, EnvironmentInterface] = defaultdict(lambda: math_env)
+    task_to_env["math"] = math_env
+    return dataset, val_dataset, task_to_env, task_to_env
+
+
+def main() -> None:
+    """Main entry point."""
+    # Parse arguments
+    args, overrides = parse_args()
+
+    if not args.config:
+        args.config = os.path.join(
+            os.path.dirname(__file__), "configs", "distillation_math.yaml"
+        )
+
+    config = load_config(args.config)
+    if overrides:
+        config = parse_hydra_overrides(config, overrides)
+
+    config: MasterConfig = OmegaConf.to_container(config, resolve=True)
+
+    # Get the next experiment directory with incremented ID
+    config["logger"]["log_dir"] = get_next_experiment_dir(config["logger"]["log_dir"])
+
+    init_ray()
+
+    tokenizer = get_tokenizer(config["policy"]["tokenizer"])
+
+    if config["policy"]["generation"] is not None:
+        config["policy"]["generation"] = configure_generation_config(
+            config["policy"]["generation"], tokenizer
+        )
+    else:
+        print("  ⚠️ No generation config found, this may cause issues")
+
+    # setup data
+    (
+        dataset,
+        val_dataset,
+        task_to_env,
+        val_task_to_env,
+    ) = setup_data(tokenizer, config["data"], config["env"], 42)
+
+    (
+        student_policy,
+        teacher_policy,
+        student_generation,
+        dataloader,
+        val_dataloader,
+        loss_fn,
+        logger,
+        checkpointer,
+        distillation_state,
+        master_config,
+    ) = setup(config, tokenizer, dataset, val_dataset)
+
+    distillation_train(
+        student_policy,
+        teacher_policy,
+        student_generation,
+        dataloader,
+        val_dataloader,
+        tokenizer,  # pass tokenizer parameter
+        loss_fn,
+        task_to_env,
+        val_task_to_env,
+        logger,
+        checkpointer,
+        distillation_state,
+        master_config,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/run_dpo.py b/examples/run_dpo.py
index a9702ed93e..fb84a02fb6 100644
--- a/examples/run_dpo.py
+++ b/examples/run_dpo.py
@@ -19,15 +19,16 @@
 from typing import Any
 
 from omegaconf import OmegaConf
+from transformers import AutoTokenizer
 
 from nemo_rl.algorithms.dpo import MasterConfig, dpo_train, setup
 from nemo_rl.algorithms.utils import get_tokenizer
-from nemo_rl.data import DataConfig, hf_datasets
-from nemo_rl.data.datasets import AllTaskProcessedDataset
+from nemo_rl.data import DataConfig
+from nemo_rl.data.datasets import AllTaskProcessedDataset, load_preference_dataset
+from nemo_rl.data.datasets.preference_datasets import PreferenceDataset
 from nemo_rl.data.interfaces import DatumSpec, TaskDataSpec
 from nemo_rl.data.llm_message_utils import get_formatted_message_log
 from nemo_rl.distributed.virtual_cluster import init_ray
-from nemo_rl.models.policy import PolicyConfig
 from nemo_rl.utils.config import load_config, parse_hydra_overrides
 from nemo_rl.utils.logger import get_next_experiment_dir
 
@@ -69,9 +70,11 @@ def dpo_preprocessor(
         >>> task_spec = TaskDataSpec(task_name="test_dpo")
         >>>
         >>> datum = {
-        ...     "prompt": "What is 2+2?",
-        ...     "chosen_response": "4",
-        ...     "rejected_response": "5"
+        ...     "context": [{"role": "user", "content": "What is 2+2?"}],
+        ...     "completions": [
+        ...         {"rank": 0, "completion": [{"role": "assistant", "content": "4"}]},
+        ...         {"rank": 1, "completion": [{"role": "assistant", "content": "5"}]}
+        ...     ]
         ... }
         >>>
         >>> processed = dpo_preprocessor(datum, task_spec, tokenizer, max_seq_length=128, idx=0)
@@ -84,11 +87,13 @@ def dpo_preprocessor(
         >>> processed["message_log_rejected"][-1]["content"]
         '5<|eot_id|>'
         >>>
-        >>> # prompt can also be a list with multiple messages
+        >>> # context can also contain multiple turns
         >>> datum = {
-        ...     "prompt": [{"role": "user", "content": "I have a question."}, {"role": "assistant", "content": "Sure!"}, {"role": "user", "content": "What is 2+2?"}],
-        ...     "chosen_response": "4",
-        ...     "rejected_response": "5"
+        ...     "context": [{"role": "user", "content": "I have a question."}, {"role": "assistant", "content": "Sure!"}, {"role": "user", "content": "What is 2+2?"}],
+        ...     "completions": [
+        ...         {"rank": 0, "completion": [{"role": "assistant", "content": "4"}]},
+        ...         {"rank": 1, "completion": [{"role": "assistant", "content": "5"}]}
+        ...     ]
         ... }
         >>> processed = dpo_preprocessor(datum, task_spec, tokenizer, max_seq_length=128, idx=0)
         >>> len(processed["message_log_chosen"])
@@ -102,36 +107,23 @@ def dpo_preprocessor(
 
         ```
     """
-    if isinstance(datum_dict["prompt"], list):
-        messages_chosen = datum_dict["prompt"].copy()
-        messages_rejected = datum_dict["prompt"].copy()
-    else:
-        messages_chosen = [
-            {
-                "role": "user",
-                "content": datum_dict["prompt"],
-            },
-        ]
-        messages_rejected = [
-            {
-                "role": "user",
-                "content": datum_dict["prompt"],
-            },
-        ]
-
-    messages_chosen.append(
-        {
-            "role": "assistant",
-            "content": datum_dict["chosen_response"],
-        },
+    assert len(datum_dict["completions"]) == 2, (
+        "DPO training supports only two completions"
     )
+    # Lower rank is preferred
+    if datum_dict["completions"][0]["rank"] < datum_dict["completions"][1]["rank"]:
+        chosen_completion = datum_dict["completions"][0]
+        rejected_completion = datum_dict["completions"][1]
+    elif datum_dict["completions"][0]["rank"] > datum_dict["completions"][1]["rank"]:
+        chosen_completion = datum_dict["completions"][1]
+        rejected_completion = datum_dict["completions"][0]
+    else:
+        raise NotImplementedError(
+            "Ties are not supported yet. You can use the following command to filter out ties: `cat <PathToPreferenceDataset> | jq 'select(.completions[0].rank != .completions[1].rank)'`."
+        )
 
-    messages_rejected.append(
-        {
-            "role": "assistant",
-            "content": datum_dict["rejected_response"],
-        },
-    )
+    messages_chosen = datum_dict["context"] + chosen_completion["completion"]
+    messages_rejected = datum_dict["context"] + rejected_completion["completion"]
 
     message_log_chosen = get_formatted_message_log(
         messages_chosen, tokenizer, task_data_spec
@@ -171,22 +163,20 @@ def dpo_preprocessor(
     return output
 
 
-def setup_data(data_config: DataConfig, policy_config: PolicyConfig):
+def setup_data(tokenizer: AutoTokenizer, data_config: DataConfig):
     print("\n▶ Setting up data...")
 
-    if data_config["dataset_name"] == "HelpSteer3":
-        data = hf_datasets.HelpSteer3Dataset()
-    else:
-        data = hf_datasets.DPODataset(
-            train_data_path=data_config["train_data_path"],
-            val_data_path=data_config["val_data_path"],
-        )
+    # load dataset
+    data = load_preference_dataset(data_config)
     train_dataset = data.formatted_ds["train"]
     val_dataset = data.formatted_ds["validation"]
 
+    print(f"  ✓ Training dataset loaded with {len(train_dataset)} samples.")
+    if val_dataset:
+        print(f"  ✓ Validation dataset loaded with {len(val_dataset)} samples.")
+
     dpo_task_spec = data.task_spec
 
-    tokenizer = get_tokenizer(policy_config["tokenizer"])
     train_dataset = AllTaskProcessedDataset(
         train_dataset,
         tokenizer,
@@ -195,15 +185,44 @@ def setup_data(data_config: DataConfig, policy_config: PolicyConfig):
         max_seq_length=data_config["max_input_seq_length"],
     )
 
-    val_dataset = AllTaskProcessedDataset(
-        val_dataset,
-        tokenizer,
-        dpo_task_spec,
-        dpo_preprocessor,
-        max_seq_length=data_config["max_input_seq_length"],
-    )
+    # TODO @yukih: unify the code when support multiple datasets for other algorithms
+    if "val_data_paths" in data_config and data_config["val_data_paths"]:
+        val_dataset = {}
 
-    return train_dataset, val_dataset, tokenizer, dpo_task_spec
+        assert isinstance(data_config["val_data_paths"], dict), (
+            f"Invalid type for val_data_paths: {type(data_config['val_data_paths'])}. val_data_paths must be a dictionary."
+        )
+        val_data_paths = data_config["val_data_paths"]
+
+        for val_dataset_name, val_dataset_path in val_data_paths.items():
+            assert val_dataset_name not in val_dataset
+            val_data = PreferenceDataset(val_dataset_path)
+            print(
+                f"  ✓ Validation dataset '{val_dataset_name}' loaded with {len(val_data.formatted_ds['train'])} samples."
+            )
+            val_dataset[val_dataset_name] = AllTaskProcessedDataset(
+                val_data.formatted_ds["train"],
+                tokenizer,
+                val_data.task_spec,
+                dpo_preprocessor,
+                max_seq_length=data_config["max_input_seq_length"],
+            )
+    else:
+        val_dataset = (
+            {
+                "default": AllTaskProcessedDataset(
+                    val_dataset,
+                    tokenizer,
+                    dpo_task_spec,
+                    dpo_preprocessor,
+                    max_seq_length=data_config["max_input_seq_length"],
+                )
+            }
+            if val_dataset
+            else {}
+        )
+
+    return train_dataset, val_dataset, dpo_task_spec
 
 
 def main():
@@ -236,10 +255,16 @@ def main():
 
     init_ray()
 
+    # setup tokenizer
+    tokenizer = get_tokenizer(config["policy"]["tokenizer"])
+
     # setup data
-    train_dataset, val_dataset, tokenizer, dpo_task_spec = setup_data(
-        config["data"], config["policy"]
-    )
+    (
+        train_dataset,
+        val_dataset,
+        dpo_task_spec,
+    ) = setup_data(tokenizer, config["data"])
+
     (
         policy,
         cluster,
@@ -251,6 +276,7 @@ def main():
         dpo_save_state,
         master_config,
     ) = setup(config, tokenizer, train_dataset, val_dataset)
+
     dpo_train(
         policy,
         train_dataloader,
diff --git a/examples/run_eval.py b/examples/run_eval.py
index 89e2ede395..8966938632 100644
--- a/examples/run_eval.py
+++ b/examples/run_eval.py
@@ -23,11 +23,8 @@
 from transformers import AutoTokenizer, PreTrainedTokenizerBase
 
 from nemo_rl.algorithms.utils import get_tokenizer
-from nemo_rl.data.datasets import AllTaskProcessedDataset
-from nemo_rl.data.eval_datasets import load_eval_dataset
-from nemo_rl.distributed.ray_actor_environment_registry import (
-    get_actor_python_env,
-)
+from nemo_rl.data.datasets import AllTaskProcessedDataset, load_eval_dataset
+from nemo_rl.distributed.ray_actor_environment_registry import get_actor_python_env
 from nemo_rl.distributed.virtual_cluster import init_ray
 from nemo_rl.environments.math_environment import MathEnvironment
 from nemo_rl.evals.eval import MasterConfig, run_env_eval, setup
diff --git a/examples/run_grpo_math.py b/examples/run_grpo_math.py
index 673322eb61..01a2ea8d79 100644
--- a/examples/run_grpo_math.py
+++ b/examples/run_grpo_math.py
@@ -24,15 +24,12 @@
 from nemo_rl.algorithms.grpo import MasterConfig, grpo_train, setup
 from nemo_rl.algorithms.utils import get_tokenizer
 from nemo_rl.data import DataConfig
-from nemo_rl.data.datasets import AllTaskProcessedDataset
-from nemo_rl.data.hf_datasets.deepscaler import DeepScalerDataset
-from nemo_rl.data.hf_datasets.openmathinstruct2 import OpenMathInstruct2Dataset
+from nemo_rl.data.datasets import AllTaskProcessedDataset, load_response_dataset
 from nemo_rl.data.interfaces import (
-    DatumSpec,
-    LLMMessageLogType,
     TaskDataProcessFnCallable,
     TaskDataSpec,
 )
+from nemo_rl.data.processors import math_hf_data_processor
 from nemo_rl.distributed.ray_actor_environment_registry import (
     get_actor_python_env,
 )
@@ -65,60 +62,11 @@ def parse_args() -> tuple[argparse.Namespace, list[str]]:
 TokenizerType = PreTrainedTokenizerBase
 
 
-# TaskDataProcessFnCallable
-def hf_data_processor(
-    datum_dict: dict[str, Any],
-    task_data_spec: TaskDataSpec,
-    tokenizer: TokenizerType,
-    max_seq_length: int,
-    idx: int,
-) -> DatumSpec:
-    """Process a datum dictionary (directly loaded from data/hf_datasets/openmathinstruct2.py) into a DatumSpec for the Math Environment."""
-    user_message = datum_dict["messages"]
-    problem = user_message[0]["content"]
-    extra_env_info = {"ground_truth": user_message[1]["content"]}
-
-    message_log: LLMMessageLogType = []
-    user_message = {
-        "role": "user",
-        "content": task_data_spec.prompt.format(problem),
-    }
-    message: list[str] = tokenizer.apply_chat_template(  # type: ignore
-        [user_message],
-        tokenize=False,
-        add_generation_prompt=True,
-        add_special_tokens=False,
-    )
-    user_message["token_ids"] = tokenizer(message, return_tensors="pt")["input_ids"][0]
-    user_message["content"] = message[0]
-    message_log.append(user_message)
-
-    length = sum(len(m["token_ids"]) for m in message_log)
-
-    loss_multiplier = 1.0
-    if length > max_seq_length:
-        # make smaller and mask out
-        for chat_message in message_log:
-            chat_message["token_ids"] = chat_message["token_ids"][
-                : min(4, max_seq_length // len(message_log))
-            ]
-        loss_multiplier = 0.0
-
-    output: DatumSpec = {
-        "message_log": message_log,
-        "length": length,
-        "extra_env_info": extra_env_info,
-        "loss_multiplier": loss_multiplier,
-        "idx": idx,
-        "task_name": datum_dict["task_name"],
-    }
-    return output
-
-
 def setup_data(
     tokenizer: TokenizerType,
     data_config: DataConfig,
     env_configs: dict[str, Any],
+    seed: int,
 ) -> tuple[
     AllTaskProcessedDataset,
     Optional[AllTaskProcessedDataset],
@@ -132,23 +80,16 @@ def setup_data(
         system_prompt_file=data_config["system_prompt_file"],
     )
 
-    # Load OpenMathInstruct2Dataset using nemo rl datasets
-    if data_config["dataset_name"] == "OpenMathInstruct-2":
-        print("Loading nvidia/OpenMathInstruct2Dataset for training and validation")
-        data: Any = OpenMathInstruct2Dataset()
-    elif data_config["dataset_name"] == "DeepScaler":
-        print(
-            "Loading agentica-org/DeepScaleR-Preview-Dataset for training and validation"
-        )
-        data: Any = DeepScalerDataset()
-    else:
-        raise ValueError(f"No processor for dataset {data_config['dataset_name']}.")
+    # load dataset
+    data: Any = load_response_dataset(data_config, seed)
 
+    # data processor
     task_data_processors: dict[str, tuple[TaskDataSpec, TaskDataProcessFnCallable]] = (
-        defaultdict(lambda: (math_task_spec, hf_data_processor))
+        defaultdict(lambda: (math_task_spec, math_hf_data_processor))
     )
-    task_data_processors["math"] = (math_task_spec, hf_data_processor)
+    task_data_processors["math"] = (math_task_spec, math_hf_data_processor)
 
+    # setup math environment
     math_env = MathEnvironment.options(  # type: ignore # it's wrapped with ray.remote
         runtime_env={
             "py_executable": get_actor_python_env(
@@ -157,6 +98,7 @@ def setup_data(
             "env_vars": dict(os.environ),  # Pass thru all user environment variables
         }
     ).remote(env_configs["math"])
+
     dataset = AllTaskProcessedDataset(
         data.formatted_ds["train"],
         tokenizer,
@@ -231,7 +173,7 @@ def main() -> None:
         val_dataset,
         task_to_env,
         val_task_to_env,
-    ) = setup_data(tokenizer, config["data"], config["env"])
+    ) = setup_data(tokenizer, config["data"], config["env"], config["grpo"]["seed"])
 
     (
         policy,
@@ -246,20 +188,69 @@ def main() -> None:
         master_config,
     ) = setup(config, tokenizer, dataset, val_dataset)
 
-    grpo_train(
-        policy,
-        policy_generation,
-        dataloader,
-        val_dataloader,
-        tokenizer,
-        loss_fn,
-        task_to_env,
-        val_task_to_env,
-        logger,
-        checkpointer,
-        grpo_state,
-        master_config,
-    )
+    # Check if async mode is enabled
+    if "async_grpo" in config["grpo"] and config["grpo"]["async_grpo"]["enabled"]:
+        # Async GRPO does not support dynamic sampling, reward scaling, or reward shaping (DAPO features)
+        unsupported_features = [
+            "use_dynamic_sampling",
+            "reward_scaling",
+            "reward_shaping",
+        ]
+
+        for feature in unsupported_features:
+            if feature not in config["grpo"]:
+                continue
+
+            if feature == "use_dynamic_sampling":
+                if config["grpo"][feature]:
+                    raise NotImplementedError(
+                        f"{feature} is not supported with async GRPO"
+                    )
+            else:
+                if config["grpo"][feature]["enabled"]:
+                    raise NotImplementedError(
+                        f"{feature} is not supported with async GRPO"
+                    )
+
+        from nemo_rl.algorithms.grpo import async_grpo_train
+
+        print("🚀 Running async GRPO training")
+
+        async_config = config["grpo"]["async_grpo"]
+        # Run async GRPO training
+        async_grpo_train(
+            policy=policy,
+            policy_generation=policy_generation,
+            dataloader=dataloader,
+            val_dataloader=val_dataloader,
+            tokenizer=tokenizer,
+            loss_fn=loss_fn,
+            task_to_env=task_to_env,
+            val_task_to_env=val_task_to_env,
+            logger=logger,
+            checkpointer=checkpointer,
+            grpo_save_state=grpo_state,
+            master_config=master_config,
+            max_trajectory_age_steps=async_config["max_trajectory_age_steps"],
+        )
+    else:
+        print("🚀 Running synchronous GRPO training")
+
+        # Run standard GRPO training
+        grpo_train(
+            policy,
+            policy_generation,
+            dataloader,
+            val_dataloader,
+            tokenizer,
+            loss_fn,
+            task_to_env,
+            val_task_to_env,
+            logger,
+            checkpointer,
+            grpo_state,
+            master_config,
+        )
 
 
 if __name__ == "__main__":
diff --git a/examples/run_grpo_rm.py b/examples/run_grpo_rm.py
new file mode 100644
index 0000000000..b0be8ed9a4
--- /dev/null
+++ b/examples/run_grpo_rm.py
@@ -0,0 +1,216 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+import pprint
+from collections import defaultdict
+from typing import Any, Optional
+
+from omegaconf import OmegaConf
+from transformers import PreTrainedTokenizerBase
+
+from nemo_rl.algorithms.grpo import MasterConfig, grpo_train, setup
+from nemo_rl.algorithms.utils import get_tokenizer
+from nemo_rl.data import DataConfig
+from nemo_rl.data.datasets import AllTaskProcessedDataset, load_response_dataset
+from nemo_rl.data.interfaces import (
+    TaskDataProcessFnCallable,
+    TaskDataSpec,
+)
+from nemo_rl.data.processors import math_hf_data_processor
+from nemo_rl.distributed.ray_actor_environment_registry import get_actor_python_env
+from nemo_rl.distributed.virtual_cluster import init_ray
+from nemo_rl.environments.interfaces import EnvironmentInterface
+from nemo_rl.environments.reward_model_environment import RewardModelEnvironment
+from nemo_rl.models.generation import configure_generation_config
+from nemo_rl.utils.config import load_config, parse_hydra_overrides
+from nemo_rl.utils.logger import get_next_experiment_dir
+
+OmegaConf.register_new_resolver("mul", lambda a, b: a * b)
+
+
+def parse_args() -> tuple[argparse.Namespace, list[str]]:
+    """Parse command line arguments.
+
+    Returns:
+        Tuple of (parsed_args, overrides) where:
+        - parsed_args: Namespace object containing parsed arguments
+        - overrides: List of remaining unparsed arguments (Hydra overrides)
+    """
+    parser = argparse.ArgumentParser(description="Run GRPO training with configuration")
+    parser.add_argument(
+        "--config", type=str, default=None, help="Path to YAML config file"
+    )
+
+    # Parse known args for the script
+    args, overrides = parser.parse_known_args()
+
+    return args, overrides
+
+
+# ===============================================================================
+#                             Math Data Processor
+# ===============================================================================
+TokenizerType = PreTrainedTokenizerBase
+
+
+def setup_data(
+    tokenizer: TokenizerType,
+    data_config: DataConfig,
+    env_configs: dict[str, Any],
+    seed: int,
+) -> tuple[
+    AllTaskProcessedDataset,
+    Optional[AllTaskProcessedDataset],
+    dict[str, EnvironmentInterface],
+    dict[str, EnvironmentInterface],
+]:
+    print("\n▶ Setting up data...")
+    task_name = "math"
+    reward_model_task_spec = TaskDataSpec(
+        task_name=task_name,
+        prompt_file=data_config["prompt_file"],
+        system_prompt_file=data_config["system_prompt_file"],
+    )
+
+    # load dataset
+    data: Any = load_response_dataset(data_config, seed)
+
+    # data processor
+    task_data_processors: dict[str, tuple[TaskDataSpec, TaskDataProcessFnCallable]] = (
+        defaultdict(lambda: (reward_model_task_spec, math_hf_data_processor))
+    )
+    task_data_processors[task_name] = (reward_model_task_spec, math_hf_data_processor)
+
+    reward_model_env = RewardModelEnvironment.options(  # type: ignore # it's wrapped with ray.remote
+        runtime_env={
+            "py_executable": get_actor_python_env(
+                "nemo_rl.environments.reward_model_environment.RewardModelEnvironment"
+            ),
+            "env_vars": dict(os.environ),  # Pass thru all user environment variables
+        }
+    ).remote(env_configs["reward_model"])
+
+    dataset = AllTaskProcessedDataset(
+        data.formatted_ds["train"],
+        tokenizer,
+        reward_model_task_spec,
+        task_data_processors,
+        max_seq_length=data_config["max_input_seq_length"],
+    )
+
+    val_dataset: Optional[AllTaskProcessedDataset] = None
+    if data.formatted_ds["validation"]:
+        val_dataset = AllTaskProcessedDataset(
+            data.formatted_ds["validation"],
+            tokenizer,
+            reward_model_task_spec,
+            task_data_processors,
+            max_seq_length=data_config["max_input_seq_length"],
+        )
+    else:
+        val_dataset = None
+
+    task_to_env: dict[str, EnvironmentInterface] = defaultdict(lambda: reward_model_env)
+    task_to_env[task_name] = reward_model_env
+    return dataset, val_dataset, task_to_env, task_to_env
+
+
+def main() -> None:
+    """Main entry point."""
+    # Parse arguments
+    args, overrides = parse_args()
+
+    if not args.config:
+        args.config = os.path.join(
+            os.path.dirname(__file__), "configs", "grpo_rm_1B.yaml"
+        )
+
+    config = load_config(args.config)
+    print(f"Loaded configuration from: {args.config}")
+
+    if overrides:
+        print(f"Overrides: {overrides}")
+        config = parse_hydra_overrides(config, overrides)
+
+    config: MasterConfig = OmegaConf.to_container(config, resolve=True)
+    print("Applied CLI overrides")
+
+    # Print config
+    print("Final config:")
+    pprint.pprint(config)
+
+    # Get the next experiment directory with incremented ID
+    config["logger"]["log_dir"] = get_next_experiment_dir(config["logger"]["log_dir"])
+    print(f"📊 Using log directory: {config['logger']['log_dir']}")
+    if config["checkpointing"]["enabled"]:
+        print(
+            f"📊 Using checkpoint directory: {config['checkpointing']['checkpoint_dir']}"
+        )
+
+    init_ray()
+
+    # setup tokenizer
+    tokenizer = get_tokenizer(config["policy"]["tokenizer"])
+    assert config["policy"]["generation"] is not None, (
+        "A generation config is required for GRPO"
+    )
+    config["policy"]["generation"] = configure_generation_config(
+        config["policy"]["generation"], tokenizer
+    )
+
+    # setup data
+    (
+        dataset,
+        val_dataset,
+        task_to_env,
+        val_task_to_env,
+    ) = setup_data(tokenizer, config["data"], config["env"], config["grpo"]["seed"])
+
+    (
+        policy,
+        policy_generation,
+        cluster,
+        dataloader,
+        val_dataloader,
+        loss_fn,
+        logger,
+        checkpointer,
+        grpo_state,
+        master_config,
+    ) = setup(config, tokenizer, dataset, val_dataset)
+
+    grpo_train(
+        policy,
+        policy_generation,
+        dataloader,
+        val_dataloader,
+        tokenizer,
+        loss_fn,
+        task_to_env,
+        val_task_to_env,
+        logger,
+        checkpointer,
+        grpo_state,
+        master_config,
+    )
+
+    for task_name in val_task_to_env.keys():
+        env = val_task_to_env[task_name]
+        env.shutdown.remote()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/run_grpo_sliding_puzzle.py b/examples/run_grpo_sliding_puzzle.py
index c5ccc65524..ca2359d0d2 100644
--- a/examples/run_grpo_sliding_puzzle.py
+++ b/examples/run_grpo_sliding_puzzle.py
@@ -24,7 +24,7 @@
 from transformers import AutoTokenizer
 
 from nemo_rl.algorithms.grpo import MasterConfig, grpo_train, setup
-from nemo_rl.algorithms.utils import get_tokenizer
+from nemo_rl.algorithms.utils import get_tokenizer, set_seed
 from nemo_rl.data.interfaces import DatumSpec, LLMMessageLogType
 from nemo_rl.distributed.virtual_cluster import init_ray
 from nemo_rl.environments.games.sliding_puzzle import (
@@ -223,6 +223,8 @@ def main():
 
     init_ray()
 
+    set_seed(config["grpo"]["seed"])
+
     # setup tokenizer
     tokenizer = get_tokenizer(config["policy"]["tokenizer"])
     config["policy"]["generation"] = configure_generation_config(
diff --git a/examples/run_rm.py b/examples/run_rm.py
new file mode 100644
index 0000000000..64b23fc27d
--- /dev/null
+++ b/examples/run_rm.py
@@ -0,0 +1,254 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import logging
+import os
+import pprint
+from typing import Any
+
+from omegaconf import OmegaConf
+from transformers import AutoTokenizer
+
+from nemo_rl.algorithms.rm import MasterConfig, rm_train, setup
+from nemo_rl.algorithms.utils import get_tokenizer
+from nemo_rl.data import DataConfig
+from nemo_rl.data.datasets import AllTaskProcessedDataset, load_preference_dataset
+from nemo_rl.data.datasets.preference_datasets import PreferenceDataset
+from nemo_rl.data.interfaces import DatumSpec, TaskDataSpec
+from nemo_rl.data.llm_message_utils import get_formatted_message_log
+from nemo_rl.distributed.virtual_cluster import init_ray
+from nemo_rl.utils.config import load_config, parse_hydra_overrides
+from nemo_rl.utils.logger import get_next_experiment_dir
+
+
+def parse_args():
+    """Parse command line arguments."""
+    parser = argparse.ArgumentParser(description="Run RM training with configuration")
+    parser.add_argument(
+        "--config", type=str, default=None, help="Path to YAML config file"
+    )
+
+    # Parse known args for the script
+    args, overrides = parser.parse_known_args()
+
+    return args, overrides
+
+
+# =======================================================
+# Data Processing
+# =======================================================
+def rm_preprocessor(
+    datum_dict: dict[str, Any],
+    task_data_spec: TaskDataSpec,
+    tokenizer,
+    max_seq_length: int,
+    idx: int,
+) -> DatumSpec:
+    """Process a datum dictionary for RM training."""
+    assert len(datum_dict["completions"]) == 2, (
+        "RM training supports only two completions"
+    )
+    # Lower rank is preferred
+    if datum_dict["completions"][0]["rank"] < datum_dict["completions"][1]["rank"]:
+        chosen_completion = datum_dict["completions"][0]
+        rejected_completion = datum_dict["completions"][1]
+    elif datum_dict["completions"][0]["rank"] > datum_dict["completions"][1]["rank"]:
+        chosen_completion = datum_dict["completions"][1]
+        rejected_completion = datum_dict["completions"][0]
+    else:
+        raise NotImplementedError(
+            "Ties are not supported yet. You can use the following command to filter out ties: `cat <PathToPreferenceDataset> | jq 'select(.completions[0].rank != .completions[1].rank)'`."
+        )
+
+    messages_chosen = datum_dict["context"] + chosen_completion["completion"]
+    messages_rejected = datum_dict["context"] + rejected_completion["completion"]
+
+    message_log_chosen = get_formatted_message_log(
+        messages_chosen, tokenizer, task_data_spec
+    )
+    message_log_rejected = get_formatted_message_log(
+        messages_rejected, tokenizer, task_data_spec
+    )
+
+    length_chosen = sum(len(m["token_ids"]) for m in message_log_chosen)
+    length_rejected = sum(len(m["token_ids"]) for m in message_log_rejected)
+
+    loss_multiplier = 1.0
+    if max(length_chosen, length_rejected) > max_seq_length:
+        # make smaller and mask out
+        logging.warning(
+            f"Truncating chosen and rejected messages to {max_seq_length} tokens"
+        )
+        for message in message_log_chosen:
+            message["token_ids"] = message["token_ids"][
+                : min(4, max_seq_length // len(message_log_chosen))
+            ]
+        for message in message_log_rejected:
+            message["token_ids"] = message["token_ids"][
+                : min(4, max_seq_length // len(message_log_rejected))
+            ]
+        loss_multiplier = 0.0
+
+        length_chosen = sum(len(m["token_ids"]) for m in message_log_chosen)
+        length_rejected = sum(len(m["token_ids"]) for m in message_log_rejected)
+
+        # safeguard against edge case where there are too many turns to fit within the max length
+        assert max(length_chosen, length_rejected) <= max_seq_length
+
+    output = {
+        "message_log_chosen": message_log_chosen,
+        "length_chosen": length_chosen,
+        "message_log_rejected": message_log_rejected,
+        "length_rejected": length_rejected,
+        "extra_env_info": None,
+        "loss_multiplier": loss_multiplier,
+        "idx": idx,
+    }
+    return output
+
+
+def setup_data(tokenizer: AutoTokenizer, data_config: DataConfig):
+    print("\n▶ Setting up data...")
+
+    # load dataset
+    data = load_preference_dataset(data_config)
+    train_dataset = data.formatted_ds["train"]
+    val_dataset = data.formatted_ds["validation"]
+
+    print(f"  ✓ Training dataset loaded with {len(train_dataset)} samples.")
+    if val_dataset:
+        print(f"  ✓ Validation dataset loaded with {len(val_dataset)} samples.")
+
+    rm_task_spec = data.task_spec
+
+    train_dataset = AllTaskProcessedDataset(
+        train_dataset,
+        tokenizer,
+        rm_task_spec,
+        rm_preprocessor,
+        max_seq_length=data_config["max_input_seq_length"],
+    )
+
+    # TODO @yukih: unify the code when support multiple datasets for other algorithms
+    if "val_data_paths" in data_config and data_config["val_data_paths"]:
+        val_dataset = {}
+
+        assert isinstance(data_config["val_data_paths"], dict), (
+            f"Invalid type for val_data_paths: {type(data_config['val_data_paths'])}. val_data_paths must be a dictionary."
+        )
+        val_data_paths = data_config["val_data_paths"]
+
+        for val_dataset_name, val_dataset_path in val_data_paths.items():
+            assert val_dataset_name not in val_dataset
+            val_data = PreferenceDataset(val_dataset_path)
+            print(
+                f"  ✓ Validation dataset '{val_dataset_name}' loaded with {len(val_data.formatted_ds['train'])} samples."
+            )
+            val_dataset[val_dataset_name] = AllTaskProcessedDataset(
+                val_data.formatted_ds["train"],
+                tokenizer,
+                val_data.task_spec,
+                rm_preprocessor,
+                max_seq_length=data_config["max_input_seq_length"],
+            )
+    else:
+        val_dataset = (
+            {
+                "default": AllTaskProcessedDataset(
+                    val_dataset,
+                    tokenizer,
+                    rm_task_spec,
+                    rm_preprocessor,
+                    max_seq_length=data_config["max_input_seq_length"],
+                )
+            }
+            if val_dataset
+            else {}
+        )
+
+    return train_dataset, val_dataset, rm_task_spec
+
+
+def main():
+    """Main entry point."""
+    # Parse arguments
+    args, overrides = parse_args()
+
+    if not args.config:
+        args.config = os.path.join(os.path.dirname(__file__), "configs", "rm.yaml")
+
+    config = load_config(args.config)
+    print(f"Loaded configuration from: {args.config}")
+
+    if overrides:
+        print(f"Overrides: {overrides}")
+        config = parse_hydra_overrides(config, overrides)
+
+    config: MasterConfig = OmegaConf.to_container(config, resolve=True)
+    print("Applied CLI overrides")
+
+    # Print config
+    print("Final config:")
+    pprint.pprint(config)
+
+    assert config["policy"]["reward_model_cfg"]["enabled"]
+
+    config["logger"]["log_dir"] = get_next_experiment_dir(config["logger"]["log_dir"])
+    print(f"📊 Using log directory: {config['logger']['log_dir']}")
+    if config["checkpointing"]["enabled"]:
+        print(
+            f"📊 Using checkpoint directory: {config['checkpointing']['checkpoint_dir']}"
+        )
+
+    init_ray()
+
+    # setup tokenizer
+    tokenizer = get_tokenizer(config["policy"]["tokenizer"])
+
+    # setup data
+    (
+        dataset,
+        val_dataset,
+        rm_task_spec,
+    ) = setup_data(tokenizer, config["data"])
+
+    (
+        policy,
+        cluster,
+        train_dataloader,
+        val_dataloader,
+        loss_fn,
+        logger,
+        checkpointer,
+        rm_save_state,
+        master_config,
+    ) = setup(config, tokenizer, dataset, val_dataset)
+
+    rm_train(
+        policy,
+        train_dataloader,
+        val_dataloader,
+        tokenizer,
+        loss_fn,
+        master_config,
+        logger,
+        rm_task_spec,
+        checkpointer,
+        rm_save_state,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/run_sft.py b/examples/run_sft.py
index df0d7ce3f7..b804b4e19f 100644
--- a/examples/run_sft.py
+++ b/examples/run_sft.py
@@ -16,15 +16,15 @@
 import os
 import pprint
 from functools import partial
-from typing import Any
+from typing import Any, Callable, Optional
 
 from omegaconf import OmegaConf
 from transformers import AutoTokenizer
 
 from nemo_rl.algorithms.sft import MasterConfig, setup, sft_train
 from nemo_rl.algorithms.utils import get_tokenizer
-from nemo_rl.data import DataConfig, hf_datasets
-from nemo_rl.data.datasets import AllTaskProcessedDataset
+from nemo_rl.data import DataConfig
+from nemo_rl.data.datasets import AllTaskProcessedDataset, load_response_dataset
 from nemo_rl.data.interfaces import DatumSpec, TaskDataSpec
 from nemo_rl.data.llm_message_utils import get_formatted_message_log
 from nemo_rl.distributed.virtual_cluster import init_ray
@@ -59,8 +59,13 @@ def sft_preprocessor(
     add_bos: bool = True,
     add_eos: bool = True,
     add_generation_prompt: bool = False,
+    datum_preprocessor: Optional[Callable] = None,
 ) -> DatumSpec:
     """Process a datum dictionary for SFT training."""
+    # optional preprocessor
+    if datum_preprocessor is not None:
+        datum_dict = datum_preprocessor(datum_dict)
+
     message_log = get_formatted_message_log(
         datum_dict["messages"],
         tokenizer,
@@ -68,6 +73,7 @@ def sft_preprocessor(
         add_bos_token=add_bos,
         add_eos_token=add_eos,
         add_generation_prompt=add_generation_prompt,
+        tools=datum_dict.get("tools", None),  # Pass tools from data if present
     )
 
     length = sum(len(m["token_ids"]) for m in message_log)
@@ -91,43 +97,26 @@ def sft_preprocessor(
     return output
 
 
-def setup_data(tokenizer: AutoTokenizer, data_config: DataConfig):
+def setup_data(tokenizer: AutoTokenizer, data_config: DataConfig, seed: int):
     print("\n▶ Setting up data...")
-    data_cls = data_config["dataset_name"]
-    if data_cls == "open_assistant":
-        data = hf_datasets.OasstDataset(output_dir="/tmp/open_assistant")
-    elif data_cls == "squad":
-        data = hf_datasets.SquadDataset()
-    elif data_cls == "prompt_response_dataset":
-        data = hf_datasets.PromptResponseDataset(
-            data_config["train_data_path"],
-            data_config["val_data_path"],
-            data_config["input_key"],
-            data_config["output_key"],
-        )
-    elif data_cls == "openmathinstruct2":
-        data = hf_datasets.OpenMathInstruct2Dataset(
-            split=data_config["split"],
-            output_key=data_config["output_key"],
-            prompt_file=data_config["prompt_file"],
-        )
-    elif data_cls == "openai_format":
-        data = hf_datasets.OpenAIFormatDataset(
-            data_config["train_data_path"],
-            data_config["val_data_path"],
-            data_config["chat_key"],
-            data_config["system_key"],
-            data_config["system_prompt"],
-        )
-    else:
-        raise ValueError(f"Unknown dataset class: {data_cls}")
-    print(
-        f"  ✓ Training and validation datasets loaded with {len(data.formatted_ds['train'])} and {len(data.formatted_ds['validation'])} samples, respectively."
-    )
 
+    # load dataset
+    data = load_response_dataset(data_config, seed)
     train_dataset = data.formatted_ds["train"]
     val_dataset = data.formatted_ds["validation"]
     sft_task_spec = data.task_spec
+    print(
+        f"  ✓ Training and validation datasets loaded with {len(train_dataset)} and {len(val_dataset)} samples, respectively."
+    )
+
+    # add preprocessor if needed
+    datum_preprocessor = None
+    if "dataset_name" in data_config and data_config["dataset_name"] == "clevr_cogent":
+        from nemo_rl.data.datasets.response_datasets.clevr import (
+            format_clevr_cogent_dataset,
+        )
+
+        datum_preprocessor = partial(format_clevr_cogent_dataset, return_pil=True)
 
     train_dataset = AllTaskProcessedDataset(
         train_dataset,
@@ -138,6 +127,7 @@ def setup_data(tokenizer: AutoTokenizer, data_config: DataConfig):
             add_bos=data_config["add_bos"],
             add_eos=data_config["add_eos"],
             add_generation_prompt=data_config["add_generation_prompt"],
+            datum_preprocessor=datum_preprocessor,
         ),
         max_seq_length=data_config["max_input_seq_length"],
     )
@@ -151,6 +141,7 @@ def setup_data(tokenizer: AutoTokenizer, data_config: DataConfig):
             add_bos=data_config.get("add_bos", True),
             add_eos=data_config.get("add_eos", True),
             add_generation_prompt=data_config["add_generation_prompt"],
+            datum_preprocessor=datum_preprocessor,
         ),
         max_seq_length=data_config["max_input_seq_length"],
     )
@@ -158,7 +149,7 @@ def setup_data(tokenizer: AutoTokenizer, data_config: DataConfig):
     return train_dataset, val_dataset, sft_task_spec
 
 
-def main():
+def main(is_vlm: bool = False):
     """Main entry point."""
     # Parse arguments
     args, overrides = parse_args()
@@ -189,15 +180,15 @@ def main():
 
     init_ray()
 
-    # setup tokenizer
-    tokenizer = get_tokenizer(config["policy"]["tokenizer"])
+    # setup tokenizer (or processor)
+    tokenizer = get_tokenizer(config["policy"]["tokenizer"], get_processor=is_vlm)
 
     # setup data
     (
         dataset,
         val_dataset,
         sft_task_spec,
-    ) = setup_data(tokenizer, config["data"])
+    ) = setup_data(tokenizer, config["data"], config["sft"]["seed"])
 
     (
         policy,
@@ -210,6 +201,7 @@ def main():
         sft_save_state,
         master_config,
     ) = setup(config, tokenizer, dataset, val_dataset)
+
     sft_train(
         policy,
         train_dataloader,
diff --git a/examples/run_vlm_grpo.py b/examples/run_vlm_grpo.py
new file mode 100644
index 0000000000..8dd3974ea3
--- /dev/null
+++ b/examples/run_vlm_grpo.py
@@ -0,0 +1,391 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import base64
+import os
+import pprint
+from collections import defaultdict
+from io import BytesIO
+from typing import Any, Optional
+
+import requests
+from omegaconf import OmegaConf
+from PIL import Image
+from transformers import AutoProcessor
+
+from nemo_rl.algorithms.grpo import MasterConfig, grpo_train, setup
+from nemo_rl.algorithms.utils import get_tokenizer
+from nemo_rl.data import DataConfig
+from nemo_rl.data.datasets import AllTaskProcessedDataset, load_response_dataset
+from nemo_rl.data.datasets.response_datasets.clevr import format_clevr_cogent_dataset
+from nemo_rl.data.datasets.response_datasets.geometry3k import format_geometry3k_dataset
+from nemo_rl.data.datasets.response_datasets.refcoco import format_refcoco_dataset
+from nemo_rl.data.interfaces import (
+    DatumSpec,
+    LLMMessageLogType,
+    TaskDataProcessFnCallable,
+    TaskDataSpec,
+)
+from nemo_rl.data.multimodal_utils import (
+    PackedTensor,
+    get_dim_to_pack_along,
+    get_multimodal_keys_from_processor,
+)
+from nemo_rl.distributed.ray_actor_environment_registry import (
+    get_actor_python_env,
+)
+from nemo_rl.distributed.virtual_cluster import init_ray
+from nemo_rl.environments.interfaces import EnvironmentInterface
+from nemo_rl.environments.vlm_environment import VLMEnvironment
+from nemo_rl.models.generation import configure_generation_config
+from nemo_rl.utils.config import load_config, parse_hydra_overrides
+from nemo_rl.utils.logger import get_next_experiment_dir
+
+OmegaConf.register_new_resolver("mul", lambda a, b: a * b)
+
+
+def parse_args() -> tuple[argparse.Namespace, list[str]]:
+    """Parse command line arguments."""
+    parser = argparse.ArgumentParser(description="Run GRPO training with configuration")
+    parser.add_argument(
+        "--config", type=str, default=None, help="Path to YAML config file"
+    )
+    # Parse known args for the script
+    args, overrides = parser.parse_known_args()
+    return args, overrides
+
+
+# ===============================================================================
+#                             VLM Data Processor
+# ===============================================================================
+
+
+def resolve_to_image(image_path_or_image: str | Image.Image) -> Image.Image:
+    """Resolve the image path to a PIL.Image object.
+
+    image_path can be either:
+    - path to local file
+    - url to image
+    - base64 encoded image
+    """
+    if isinstance(image_path_or_image, Image.Image):
+        return image_path_or_image
+
+    if image_path_or_image.startswith(("http://", "https://")):
+        # Handle URL
+        response = requests.get(image_path_or_image)
+        response.raise_for_status()
+        return Image.open(BytesIO(response.content)).convert("RGB")
+    elif image_path_or_image.startswith("data:"):
+        # Handle base64 encoded image
+        # Format: data:image/jpeg;base64,/9j/4AAQSkZJRg...
+        header, encoded = image_path_or_image.split(",", 1)
+        image_data = base64.b64decode(encoded)
+        return Image.open(BytesIO(image_data)).convert("RGB")
+    else:
+        # Handle local file path
+        return Image.open(image_path_or_image).convert("RGB")
+
+
+def hf_data_processor(
+    datum_dict: dict[str, Any],
+    task_data_spec: TaskDataSpec,
+    processor: AutoProcessor,
+    max_seq_length: int,
+    idx: int,
+) -> DatumSpec:
+    """Process a datum dictionary (directly loaded from response_datasets/<dataset_name>.py) into a DatumSpec for the VLM Environment."""
+    # depending on the task, format the data differently
+    if task_data_spec.task_name == "clevr-cogent":
+        datum_dict = format_clevr_cogent_dataset(datum_dict)
+    elif task_data_spec.task_name == "refcoco":
+        datum_dict = format_refcoco_dataset(datum_dict)
+    elif task_data_spec.task_name == "geometry3k":
+        datum_dict = format_geometry3k_dataset(datum_dict)
+    else:
+        raise ValueError(f"No data processor for task {task_data_spec.task_name}")
+
+    user_message = datum_dict["messages"]
+    problem = user_message[0]["content"]
+    extra_env_info = {"ground_truth": user_message[1]["content"]}
+
+    message_log: LLMMessageLogType = []
+    ### only one round of interaction is assumed, this can easily be extended to a conversational setting
+    user_message = {"role": "user", "content": []}
+    #
+    images = []
+    if isinstance(problem, list):
+        for content in problem:
+            # for image, video, just append it
+            # for text, format the prompt to the problem
+            if content["type"] != "text":
+                user_message["content"].append(content)
+                if content["type"] == "image":
+                    images.append(content["image"])
+                else:
+                    raise ValueError(f"Unsupported content type: {content['type']}")
+            elif content["type"] == "text":
+                user_message["content"].append(
+                    {
+                        "type": "text",
+                        "text": task_data_spec.prompt.format(content["text"])
+                        if task_data_spec.prompt
+                        else content["text"],
+                    }
+                )
+    else:
+        # conversation consists of a text-only message
+        user_message["content"] = task_data_spec.prompt.format(problem)
+
+    images = [resolve_to_image(image) for image in images]
+
+    # get formatted user message
+    if hasattr(processor, "conversation_preprocessor"):
+        user_message_for_chat_template = processor.conversation_preprocessor(
+            user_message
+        )
+    else:
+        user_message_for_chat_template = user_message
+
+    # this is the string-tokenized conversation template for the generation policy (for vllm)
+    string_formatted_dialog = processor.apply_chat_template(
+        [user_message_for_chat_template],
+        tokenize=False,
+        add_generation_prompt=True,
+    )
+
+    # this is the id-tokenized and image processed conversation template for the policy
+    message: dict = processor.apply_chat_template(
+        [user_message],
+        tokenize=True,
+        add_generation_prompt=True,
+        return_tensors="pt",
+        return_dict=True,
+    )
+
+    # add this for backward compatibility
+    user_message["token_ids"] = message["input_ids"][0]
+    # add all keys and values to the user message, and the list of keys
+    multimodal_keys = get_multimodal_keys_from_processor(processor)
+    for key in multimodal_keys:
+        if key in message:
+            user_message[key] = PackedTensor(
+                message[key], dim_to_pack=get_dim_to_pack_along(processor, key)
+            )
+
+    # specifically for gemma, we need to add token_type_ids to the user message as a sequence-type value
+    if "token_type_ids" in message:
+        user_message["token_type_ids"] = message["token_type_ids"][0]
+
+    ### append to user message
+    message_log.append(user_message)
+
+    length = sum(len(m["token_ids"]) for m in message_log)
+    loss_multiplier = 1.0
+    if length >= max_seq_length:
+        # Treat truncated messages as text only
+        vllm_kwargs = {
+            "vllm_content": None,
+            "vllm_images": [],
+        }
+
+        # make smaller and mask out
+        for chat_message in message_log:
+            chat_message["token_ids"] = chat_message["token_ids"][
+                : min(4, max_seq_length // len(message_log))
+            ]
+            for key, value in chat_message.items():
+                if isinstance(value, PackedTensor):
+                    chat_message[key] = PackedTensor.empty_like(value)
+        loss_multiplier = 0.0
+    else:
+        # get the prompt content! (use this for vllm-backend that needs formatted dialog and list of images) for the entire conversation
+        # add images for vllm serving
+        vllm_kwargs = {
+            "vllm_content": string_formatted_dialog,
+            "vllm_images": images,
+        }
+
+    output: DatumSpec = {
+        "message_log": message_log,
+        "length": length,
+        "extra_env_info": extra_env_info,
+        "loss_multiplier": loss_multiplier,
+        "idx": idx,
+        "task_name": task_data_spec.task_name,
+        **vllm_kwargs,
+    }
+    return output
+
+
+def setup_data(
+    processor: AutoProcessor,
+    data_config: DataConfig,
+    env_configs: dict[str, Any],
+    seed: int,
+) -> tuple[
+    AllTaskProcessedDataset,
+    Optional[AllTaskProcessedDataset],
+    dict[str, EnvironmentInterface],
+    dict[str, EnvironmentInterface],
+]:
+    """This function will create a TaskSpec, DatumSpec, and connect the two.
+
+    task_spec contains the task name as well as prompt and system prompt modifiers that can be used by data processor
+    """
+    print("\n▶ Setting up data...")
+
+    # load dataset
+    # TODO @yukih: currently seed is not used for vlm datasets
+    data: Any = load_response_dataset(data_config, seed)
+
+    task_name = data.task_name
+    vlm_task_spec = TaskDataSpec(
+        task_name=task_name,
+        prompt_file=data_config["prompt_file"],
+        system_prompt_file=data_config["system_prompt_file"],
+    )
+
+    # add data processor for different tasks
+    task_data_processors: dict[str, tuple[TaskDataSpec, TaskDataProcessFnCallable]] = (
+        defaultdict(lambda: (vlm_task_spec, hf_data_processor))
+    )
+    task_data_processors[task_name] = (vlm_task_spec, hf_data_processor)
+
+    vlm_env = VLMEnvironment.options(  # type: ignore # it's wrapped with ray.remote
+        runtime_env={
+            "py_executable": get_actor_python_env(
+                "nemo_rl.environments.vlm_environment.VLMEnvironment"
+            ),
+            "env_vars": dict(os.environ),  # Pass thru all user environment variables
+        }
+    ).remote(env_configs[task_name])
+
+    dataset = AllTaskProcessedDataset(
+        data.formatted_ds["train"],
+        processor,
+        vlm_task_spec,
+        task_data_processors,
+        max_seq_length=data_config["max_input_seq_length"],
+    )
+
+    val_dataset: Optional[AllTaskProcessedDataset] = None
+    if data.formatted_ds["validation"]:
+        val_dataset = AllTaskProcessedDataset(
+            data.formatted_ds["validation"],
+            processor,
+            vlm_task_spec,
+            task_data_processors,
+            max_seq_length=data_config["max_input_seq_length"],
+        )
+    else:
+        val_dataset = None
+
+    task_to_env: dict[str, EnvironmentInterface] = defaultdict(lambda: vlm_env)
+    task_to_env[task_name] = vlm_env
+    return dataset, val_dataset, task_to_env, task_to_env
+
+
+def main() -> None:
+    """Main entry point."""
+    args, overrides = parse_args()
+
+    if not args.config:
+        args.config = os.path.join(
+            os.path.dirname(__file__), "configs", "vlm_grpo_3B.yaml"
+        )
+
+    config = load_config(args.config)
+    print(f"Loaded configuration from: {args.config}")
+
+    if overrides:
+        print(f"Overrides: {overrides}")
+        config = parse_hydra_overrides(config, overrides)
+
+    config: MasterConfig = OmegaConf.to_container(config, resolve=True)
+    print("Applied CLI overrides")
+
+    # Print config
+    print("Final config:")
+    pprint.pprint(config)
+
+    # Get the next experiment directory with incremented ID
+    config["logger"]["log_dir"] = get_next_experiment_dir(config["logger"]["log_dir"])
+    print(f"📊 Using log directory: {config['logger']['log_dir']}")
+    if config["checkpointing"]["enabled"]:
+        print(
+            f"📊 Using checkpoint directory: {config['checkpointing']['checkpoint_dir']}"
+        )
+
+    init_ray()
+
+    # init processor
+    processor = get_tokenizer(config["policy"]["tokenizer"], get_processor=True)
+    tokenizer = processor.tokenizer
+
+    assert config["policy"]["generation"] is not None, (
+        "A generation config is required for GRPO"
+    )
+    config["policy"]["generation"] = configure_generation_config(
+        config["policy"]["generation"], processor.tokenizer
+    )
+    if "vllm_cfg" in config["policy"]["generation"]:
+        assert (
+            config["policy"]["generation"]["vllm_cfg"]["skip_tokenizer_init"] == False
+        ), (
+            "VLMs require tokenizer to be initialized before generation, so skip_tokenizer_init must be set to False."
+        )
+
+    # setup data
+    # this function is local to this script, and can be extended to other VLM datasets
+    (
+        dataset,
+        val_dataset,
+        task_to_env,
+        val_task_to_env,
+    ) = setup_data(processor, config["data"], config["env"], config["grpo"]["seed"])
+
+    (
+        policy,
+        policy_generation,
+        cluster,
+        dataloader,
+        val_dataloader,
+        loss_fn,
+        logger,
+        checkpointer,
+        grpo_state,
+        master_config,
+    ) = setup(config, tokenizer, dataset, val_dataset, processor=processor)
+
+    grpo_train(
+        policy,
+        policy_generation,
+        dataloader,
+        val_dataloader,
+        tokenizer,
+        loss_fn,
+        task_to_env,
+        val_task_to_env,
+        logger,
+        checkpointer,
+        grpo_state,
+        master_config,
+        processor,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/nemo_rl/converters/megatron/vllm_export.py b/examples/run_vlm_sft.py
similarity index 86%
rename from nemo_rl/converters/megatron/vllm_export.py
rename to examples/run_vlm_sft.py
index 341a77c5bc..c97be905f0 100644
--- a/nemo_rl/converters/megatron/vllm_export.py
+++ b/examples/run_vlm_sft.py
@@ -11,3 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+from run_sft import main as sft_main
+
+if __name__ == "__main__":
+    sft_main(is_vlm=True)
diff --git a/github/CODEOWNERS b/github/CODEOWNERS
new file mode 100644
index 0000000000..2673715981
--- /dev/null
+++ b/github/CODEOWNERS
@@ -0,0 +1,61 @@
+# Order is important: later matches have higher precedence
+
+# List of reviewer teams
+# @nvidia-nemo/rl_reviewers_algo
+# @nvidia-nemo/rl_reviewers_supervised
+# @nvidia-nemo/rl_reviewers_async
+# @nvidia-nemo/rl_reviewers_automodel
+# @nvidia-nemo/rl_reviewers_ci
+# @nvidia-nemo/rl_reviewers_doc
+# @nvidia-nemo/rl_reviewers_environments
+# @nvidia-nemo/rl_reviewers_eval
+# @nvidia-nemo/rl_reviewers_generation
+# @nvidia-nemo/rl_reviewers_infra
+# @nvidia-nemo/rl_reviewers_mcore
+# @nvidia-nemo/rl_reviewers_multimodal
+# @nvidia-nemo/rl_reviewers_models
+# @nvidia-nemo/rl_reviewers_perf
+
+# Catch-all if not defined
+* @nvidia-nemo/rl_maintainers
+
+# CI/Test
+/.github/ @nvidia-nemo/rl_reviewers_ci
+/tests/ @nvidia-nemo/rl_reviewers_ci
+
+# Examples and configs (TODO: Add all algo subteams eventually)
+/examples/ @nvidia-nemo/rl_reviewers_algo
+/examples/**/*vlm* @nvidia-nemo/rl_reviewers_multimodal
+/examples/**/*sft* @nvidia-nemo/rl_reviewers_supervised
+/examples/**/*dpo* @nvidia-nemo/rl_reviewers_supervised
+/examples/**/*rm* @nvidia-nemo/rl_reviewers_supervised
+
+# Core (TODO: Add all algo subteams eventually)
+/nemo_rl/algorithms/ @nvidia-nemo/rl_reviewers_algo
+/nemo_rl/algorithms/sft.py @nvidia-nemo/rl_reviewers_supervised
+/nemo_rl/algorithms/dpo.py @nvidia-nemo/rl_reviewers_supervised
+/nemo_rl/algorithms/rm.py @nvidia-nemo/rl_reviewers_supervised
+/nemo_rl/environments/ @nvidia-nemo/rl_reviewers_environments @nvidia-nemo/rl_reviewers_algo
+/nemo_rl/data/ @nvidia-nemo/rl_reviewers_algo
+/nemo_rl/experience/ @nvidia-nemo/rl_reviewers_environments @nvidia-nemo/rl_reviewers_algo
+/nemo_rl/evals/ @nvidia-nemo/rl_reviewers_eval
+/nemo_rl/distributed/ @nvidia-nemo/rl_reviewers_infra
+/nemo_rl/models/ @nvidia-nemo/rl_reviewers_automodel @nvidia-nemo/rl_reviewers_mcore
+/nemo_rl/models/dtensor/ @nvidia-nemo/rl_reviewers_automodel
+/nemo_rl/models/generation/ @nvidia-nemo/rl_reviewers_generation
+/nemo_rl/models/generation/fp8.py @nvidia-nemo/rl_reviewers_perf
+/nemo_rl/models/megatron/ @nvidia-nemo/rl_reviewers_mcore
+/nemo_rl/models/policy/dtensor* @nvidia-nemo/rl_reviewers_automodel
+/nemo_rl/models/policy/megatron* @nvidia-nemo/rl_reviewers_mcore
+/nemo_rl/utils/flop* @nvidia-nemo/rl_reviewers_perf
+
+# Infra, tooling, and docs
+/docker/ @nvidia-nemo/rl_reviewers_infra
+/docs/ @nvidia-nemo/rl_reviewers_doc
+/3rdparty/Automodel-workspace/ @nvidia-nemo/rl_reviewers_automodel
+/3rdparty/Megatron-LM-workspace/ @nvidia-nemo/rl_reviewers_mcore
+/3rdparty/Megatron-Bridge-workspace/ @nvidia-nemo/rl_reviewers_mcore
+/ray.sub @nvidia-nemo/rl_reviewers_infra
+
+# Codeowners
+/.github/CODEOWNERS @nvidia-nemo/rl_maintainers
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/github/PULL_REQUEST_TEMPLATE.md
similarity index 100%
rename from .github/PULL_REQUEST_TEMPLATE.md
rename to github/PULL_REQUEST_TEMPLATE.md
diff --git a/github/actions/test-template/action.yml b/github/actions/test-template/action.yml
new file mode 100644
index 0000000000..0c7306f58f
--- /dev/null
+++ b/github/actions/test-template/action.yml
@@ -0,0 +1,226 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+name: "Test Template"
+description: "Template for running NeMo tests in a containerized environment"
+
+inputs:
+  runner:
+    description: "Runner to use for test"
+    required: true
+  script:
+    description: "Test script to execute"
+    required: true
+  is_optional:
+    description: "Failure will cancel all other tests if set to true"
+    required: false
+    default: "false"
+  is_doc_test:
+    description: "Upload coverage as doc test"
+    required: false
+    default: "false"
+  is_unit_test:
+    description: "Upload coverage as unit test"
+    required: false
+    default: "false"
+  image:
+    description: "Image to use for test"
+    required: false
+    default: "nemo_rl_container"
+  cpu-only:
+    description: "Run tests on CPU only"
+    required: false
+    default: "false"
+  azure-client-id:
+    description: "Azure Client ID"
+    required: true
+  azure-tenant-id:
+    description: "Azure Tenant ID"
+    required: true
+  azure-subscription-id:
+    description: "Azure Subscription ID"
+    required: true
+  has-azure-credentials:
+    description: "Has Azure credentials"
+    required: false
+    default: "false"
+  is_fork_pr:
+    description: "Whether this is a pull request from a fork"
+    required: false
+    default: "false"
+
+runs:
+  using: "composite"
+  steps:
+    - name: Install Azure CLI
+      if: ${{ inputs.has-azure-credentials == 'true' }}
+      shell: bash
+      run: |
+        curl -sL https://aka.ms/InstallAzureCLIDeb | sudo bash
+
+    - name: Azure Login
+      if: ${{ inputs.has-azure-credentials == 'true' }}
+      uses: azure/login@532459ea530d8321f2fb9bb10d1e0bcf23869a43 # v3
+      with:
+        client-id: ${{ inputs.azure-client-id }}
+        tenant-id: ${{ inputs.azure-tenant-id }}
+        subscription-id: ${{ inputs.azure-subscription-id }}
+
+    - name: Azure ACR Login
+      if: ${{ inputs.has-azure-credentials == 'true' }}
+      shell: bash
+      run: |
+        az acr login --name nemoci
+
+    - name: Azure Fileshare
+      if: ${{ inputs.has-azure-credentials == 'true' && inputs.is_unit_test == 'false' && inputs.is_doc_test == 'false' }}
+      shell: bash
+      id: azure-fileshare
+      run: |
+        sudo apt update
+        sudo apt install -y cifs-utils
+
+        RESOURCE_GROUP_NAME="azure-gpu-vm-runner_group"
+        STORAGE_ACCOUNT_NAME="nemocistorageaccount2"
+        FILE_SHARE_NAME="fileshare"
+
+        MNT_ROOT="/media"
+        MNT_PATH="$MNT_ROOT/$STORAGE_ACCOUNT_NAME/$FILE_SHARE_NAME"
+
+        echo "MNT_PATH=$MNT_PATH" | tee -a "$GITHUB_OUTPUT"
+
+        sudo mkdir -p $MNT_PATH
+
+        # Create a folder to store the credentials for this storage account and
+        # any other that you might set up.
+        CREDENTIAL_ROOT="/etc/smbcredentials"
+        sudo mkdir -p "/etc/smbcredentials"
+
+        # Get the storage account key for the indicated storage account.
+        # You must be logged in with az login and your user identity must have
+        # permissions to list the storage account keys for this command to work.
+        STORAGE_ACCOUNT_KEY=$(az storage account keys list \
+            --resource-group $RESOURCE_GROUP_NAME \
+            --account-name $STORAGE_ACCOUNT_NAME \
+            --query "[0].value" --output tsv | tr -d '"')
+
+        # Create the credential file for this individual storage account
+        SMB_CREDENTIAL_FILE="$CREDENTIAL_ROOT/$STORAGE_ACCOUNT_NAME.cred"
+        if [ ! -f $SMB_CREDENTIAL_FILE ]; then
+            echo "username=$STORAGE_ACCOUNT_NAME" | sudo tee $SMB_CREDENTIAL_FILE > /dev/null
+            echo "password=$STORAGE_ACCOUNT_KEY" | sudo tee -a $SMB_CREDENTIAL_FILE > /dev/null
+        else
+            echo "The credential file $SMB_CREDENTIAL_FILE already exists, and was not modified."
+        fi
+
+        # Change permissions on the credential file so only root can read or modify the password file.
+        sudo chmod 600 $SMB_CREDENTIAL_FILE
+
+        # This command assumes you have logged in with az login
+        HTTP_ENDPOINT=$(az storage account show --resource-group $RESOURCE_GROUP_NAME --name $STORAGE_ACCOUNT_NAME --query "primaryEndpoints.file" --output tsv | tr -d '"')
+        SMB_PATH=$(echo $HTTP_ENDPOINT | cut -c7-${#HTTP_ENDPOINT})$FILE_SHARE_NAME
+
+        STORAGE_ACCOUNT_KEY=$(az storage account keys list --resource-group $RESOURCE_GROUP_NAME --account-name $STORAGE_ACCOUNT_NAME --query "[0].value" --output tsv | tr -d '"')
+
+        sudo mount -t cifs $SMB_PATH $MNT_PATH -o credentials=$SMB_CREDENTIAL_FILE,serverino,nosharesock,actimeo=30,mfsymlinks
+
+        ls -al $MNT_PATH/TestData
+
+    - name: Docker system cleanup
+      shell: bash
+      run: |
+        docker system prune -af --filter "until=48h" --force || true
+
+    - name: Docker pull image
+      shell: bash
+      run: |
+        docker pull nemoci.azurecr.io/${{ inputs.image }}:${{ github.run_id }}
+
+    - name: Create UUID
+      id: uuid
+      shell: bash
+      run: |
+        echo "id=$(uuidgen)" >> "$GITHUB_OUTPUT"
+
+    - name: Checkout NeMo
+      uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
+      env:
+        DIR: ${{ github.run_id }}
+      with:
+        submodules: 'recursive'
+        path: ${{ github.run_id }}/${{steps.uuid.outputs.id }}/nemo-rl
+
+    - name: Run tests
+      id: test
+      shell: bash
+      run: |
+        COVERAGE_PREFIX=$([[ "${{ inputs.is_doc_test }}" == "true" ]] && echo "doc-test" || ([[ "${{ inputs.is_unit_test }}" == "true" ]] && echo "unit-test" || echo "e2e"))
+        echo "coverage-prefix=$COVERAGE_PREFIX" | tee -a "$GITHUB_OUTPUT"
+
+        docker run --rm -u root --runtime=nvidia --gpus all \
+          --shm-size=64g \
+          --env TRANSFORMERS_OFFLINE=0 \
+          --env HYDRA_FULL_ERROR=1 \
+          --env HF_HOME=/home/TestData/nemo-rl/hf_home \
+          --env HF_DATASETS_CACHE=/home/TestData/nemo-rl/hf_datasets_cache \
+          --env NEMO_RL_REPO_DIR=/opt/nemo-rl \
+          --env HF_TOKEN \
+          ${{ inputs.is_fork_pr == 'true' && '--env HF_HUB_OFFLINE=1' || '' }} \
+          --volume $(pwd)/${{ github.run_id }}/${{steps.uuid.outputs.id }}/nemo-rl:/opt/nemo-rl \
+          --volume $GITHUB_ACTION_DIR:$GITHUB_ACTION_DIR \
+          --volume /mnt/datadrive/TestData/nemo-rl/datasets:/opt/nemo-rl/datasets:ro \
+          --volume /mnt/datadrive/TestData/nemo-rl/checkpoints:/home/TestData/nemo-rl/checkpoints:ro \
+          --volume /mnt/datadrive/TestData/nemo-rl/hf_home/hub:/home/TestData/nemo-rl/hf_home/hub \
+          --volume /mnt/datadrive/TestData/nemo-rl/hf_datasets_cache:/home/TestData/nemo-rl/hf_datasets_cache \
+          nemoci.azurecr.io/nemo_rl_container:${{ github.run_id }} bash -eux -o pipefail -c '\
+            git config --global --add safe.directory /opt/nemo-rl
+            # This is needed since we create virtualenvs in the workspace, so this allows it to be cleaned up if necessary
+            umask 000
+            bash tests/${{ inputs.is_doc_test == 'true' && 'docs' || (inputs.is_unit_test == 'true' && 'unit' || 'functional') }}/${{ inputs.script }}.sh && \
+            echo "Finished successfully." || echo "Did not finish."' 2>&1 | tee err.log
+
+    - name: Check result
+      id: check
+      shell: bash
+      run: |
+        coverage_report=coverage-${{ steps.test.outputs.coverage-prefix }}-${{ github.run_id }}-$(uuidgen)
+        echo "coverage_report=$coverage_report" >> "$GITHUB_OUTPUT"
+
+        IS_SUCCESS=$(tail -n 1 err.log | grep -q "Finished successfully." && echo "true" || echo "false")
+
+        if [[ "$IS_SUCCESS" == "false" && "{% raw %}${{ inputs.is_optional }}" == "true" ]]; then
+          echo "::warning:: Test failed, but displayed as successful because it is marked as optional."
+          IS_SUCCESS=true
+        fi
+
+        if [[ "$IS_SUCCESS" == "false" ]]; then
+          echo Test did not finish successfully.
+          exit 1
+        fi
+
+        exit $EXIT_CODE
+
+    - name: Upload artifacts
+      uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7
+      if: ${{ steps.check.outputs.coverage_report != 'none' }}
+      with:
+        name: ${{ steps.check.outputs.coverage_report }}
+        path: |
+          ${{ github.workspace }}/${{ github.run_id }}/${{steps.uuid.outputs.id }}/nemo-rl/tests/.coverage
+        include-hidden-files: true
+
+    - name: Container shutdown
+      if: always()
+      shell: bash
+      run: |
+        rm -rf $(pwd)/${{ github.run_id }}/${{steps.uuid.outputs.id }} || true
diff --git a/github/copy-pr-bot.yml b/github/copy-pr-bot.yml
new file mode 100644
index 0000000000..4cfbdc7f05
--- /dev/null
+++ b/github/copy-pr-bot.yml
@@ -0,0 +1,3 @@
+enabled: true
+auto_sync_draft: false
+auto_sync_ready: true
diff --git a/.github/labeler.yml b/github/labeler.yml
similarity index 100%
rename from .github/labeler.yml
rename to github/labeler.yml
diff --git a/github/workflows/_automodel_integration_check.yml b/github/workflows/_automodel_integration_check.yml
new file mode 100644
index 0000000000..4bbcb42981
--- /dev/null
+++ b/github/workflows/_automodel_integration_check.yml
@@ -0,0 +1,264 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: "Automodel Integration Files Consistency Check"
+
+on:
+  workflow_call:
+    inputs:
+      base_ref:
+        required: true
+        type: string
+        description: "Target branch to check against"
+      head_ref:
+        required: true
+        type: string
+        description: "Feature branch name"
+      pr_number:
+        required: true
+        type: string
+        description: "Pull request number"
+      head_sha:
+        required: true
+        type: string
+        description: "Head commit SHA of the feature branch"
+
+jobs:
+  check:
+    name: Related FilesSynchronization Check
+    runs-on: ubuntu-latest
+    outputs:
+      needs_attention: ${{ steps.check.outputs.needs_attention }}
+      comment_body: ${{ steps.check.outputs.comment_body }}
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Fetch target branch reference
+        run: |
+          git fetch origin ${{ inputs.base_ref }}
+
+      - name: Check parallel plans sync status
+        id: check
+        shell: bash -x -e {0}
+        run: |
+          echo "Checking if parallel plan files are synchronized..."
+
+          # Define the file paths
+          PARALLELIZE_FILE="nemo_rl/models/dtensor/parallelize.py"
+          OPTIMIZED_TP_PLANS_FILE="3rdparty/Automodel-workspace/Automodel/nemo_automodel/components/distributed/optimized_tp_plans.py"
+          PARALLELIZER_FILE="3rdparty/Automodel-workspace/Automodel/nemo_automodel/components/distributed/parallelizer.py"
+
+          needs_attention=0
+          comment_body=""
+
+          # Check if parallelize.py was modified in this PR
+          if git diff --name-only origin/${{ inputs.base_ref }}..HEAD | grep -q "^${PARALLELIZE_FILE}$"; then
+            echo "✅ Found changes in ${PARALLELIZE_FILE}"
+            parallelize_changed=1
+          else
+            echo "ℹ️  No changes found in ${PARALLELIZE_FILE}"
+            parallelize_changed=0
+          fi
+
+          # Check if optimized_tp_plans.py was modified in this PR
+          if git diff --name-only origin/${{ inputs.base_ref }}..HEAD | grep -q "^${OPTIMIZED_TP_PLANS_FILE}$"; then
+            echo "✅ Found changes in ${OPTIMIZED_TP_PLANS_FILE}"
+            optimized_tp_plans_changed=1
+          else
+            echo "ℹ️  No changes found in ${OPTIMIZED_TP_PLANS_FILE}"
+            optimized_tp_plans_changed=0
+          fi
+
+          # Check if parallelizer.py was modified in this PR
+          if git diff --name-only origin/${{ inputs.base_ref }}..HEAD | grep -q "^${PARALLELIZER_FILE}$"; then
+            echo "✅ Found changes in ${PARALLELIZER_FILE}"
+            parallelizer_changed=1
+          else
+            echo "ℹ️  No changes found in ${PARALLELIZER_FILE}"
+            parallelizer_changed=0
+          fi
+
+          # Check if all files exist
+          if [[ ! -f "$PARALLELIZE_FILE" ]]; then
+            echo "⚠️  Warning: ${PARALLELIZE_FILE} does not exist"
+          fi
+
+          if [[ ! -f "$OPTIMIZED_TP_PLANS_FILE" ]]; then
+            echo "⚠️  Warning: ${OPTIMIZED_TP_PLANS_FILE} does not exist"
+          fi
+
+          if [[ ! -f "$PARALLELIZER_FILE" ]]; then
+            echo "⚠️  Warning: ${PARALLELIZER_FILE} does not exist"
+          fi
+
+          # Analyze the relationship between the changes
+          # Success case: If parallelize.py is changed, either optimized_tp_plans.py OR parallelizer.py should also be changed
+          if [[ $parallelize_changed -eq 1 && $optimized_tp_plans_changed -eq 0 && $parallelizer_changed -eq 0 ]]; then
+            echo "❌ parallelize.py was modified but neither optimized_tp_plans.py nor parallelizer.py was updated"
+            needs_attention=1
+            comment_body+="### ⚠️  Parallel Plans Synchronization Warning"$'\n\n'
+            comment_body+="The file \`${PARALLELIZE_FILE}\` was modified in this PR, but neither \`${OPTIMIZED_TP_PLANS_FILE}\` nor \`${PARALLELIZER_FILE}\` was updated."$'\n\n'
+            comment_body+="**Why this matters:**"$'\n'
+            comment_body+="These files contain similar parallel plan implementations that should be kept synchronized to ensure consistency across the codebase."$'\n\n'
+            comment_body+="**Action required:**"$'\n'
+            comment_body+="- Please review if the changes in \`${PARALLELIZE_FILE}\` should also be applied to \`${OPTIMIZED_TP_PLANS_FILE}\` or \`${PARALLELIZER_FILE}\`"$'\n'
+            comment_body+="- Update the appropriate related file(s) if necessary to maintain functional consistency"$'\n'
+            comment_body+="- Request access to the [NVIDIA-NeMo/Automodel](https://github.com/NVIDIA-NeMo/Automodel/) repository, create a PR against the \`nemo-rl-submodule\` branch, and update the Automodel submodule in the nemo-rl index"$'\n'
+            comment_body+="- Add @ffrujeri as a reviewer of this PR if you have any questions about the consistency requirements"$'\n'
+            comment_body+="- If the files are intentionally different, please add a comment in the PR explaining why"$'\n\n'
+            comment_body+="**Files to check:**"$'\n'
+            comment_body+="- Modified: \`${PARALLELIZE_FILE}\`"$'\n'
+            comment_body+="- Not modified: \`${OPTIMIZED_TP_PLANS_FILE}\`"$'\n'
+            comment_body+="- Not modified: \`${PARALLELIZER_FILE}\`"$'\n\n'
+
+          else
+            echo "ℹ️  No consistency issues detected for parallel plan files"
+            # Don't set comment_body in this case to avoid unnecessary comments
+          fi
+
+          echo ""
+          echo "Checking if dtensor policy worker files are synchronized..."
+
+          # Define the dtensor policy worker file paths
+          DTENSOR_POLICY_WORKER_FILE="nemo_rl/models/policy/dtensor_policy_worker.py"
+          DTENSOR_POLICY_WORKER_V2_FILE="nemo_rl/models/policy/dtensor_policy_worker_v2.py"
+
+          # Check if dtensor_policy_worker.py was modified in this PR
+          if git diff --name-only origin/${{ inputs.base_ref }}..HEAD | grep -q "^${DTENSOR_POLICY_WORKER_FILE}$"; then
+            echo "✅ Found changes in ${DTENSOR_POLICY_WORKER_FILE}"
+            dtensor_worker_changed=1
+          else
+            echo "ℹ️  No changes found in ${DTENSOR_POLICY_WORKER_FILE}"
+            dtensor_worker_changed=0
+          fi
+
+          # Check if dtensor_policy_worker_v2.py was modified in this PR
+          if git diff --name-only origin/${{ inputs.base_ref }}..HEAD | grep -q "^${DTENSOR_POLICY_WORKER_V2_FILE}$"; then
+            echo "✅ Found changes in ${DTENSOR_POLICY_WORKER_V2_FILE}"
+            dtensor_worker_v2_changed=1
+          else
+            echo "ℹ️  No changes found in ${DTENSOR_POLICY_WORKER_V2_FILE}"
+            dtensor_worker_v2_changed=0
+          fi
+
+          # Check if both dtensor policy worker files exist
+          if [[ ! -f "$DTENSOR_POLICY_WORKER_FILE" ]]; then
+            echo "⚠️  Warning: ${DTENSOR_POLICY_WORKER_FILE} does not exist"
+          fi
+
+          if [[ ! -f "$DTENSOR_POLICY_WORKER_V2_FILE" ]]; then
+            echo "⚠️  Warning: ${DTENSOR_POLICY_WORKER_V2_FILE} does not exist"
+          fi
+
+          # Analyze the relationship between the dtensor policy worker changes
+          if [[ $dtensor_worker_changed -eq 1 && $dtensor_worker_v2_changed -eq 0 ]]; then
+            echo "❌ dtensor_policy_worker.py was modified but dtensor_policy_worker_v2.py was not updated"
+            needs_attention=1
+            comment_body+="### ⚠️  DTensor Policy Worker Synchronization Warning"$'\n\n'
+            comment_body+="The file \`${DTENSOR_POLICY_WORKER_FILE}\` was modified in this PR, but \`${DTENSOR_POLICY_WORKER_V2_FILE}\` was not updated."$'\n\n'
+            comment_body+="**Why this matters:**"$'\n'
+            comment_body+="These files contain related DTensor policy worker implementations that should be kept synchronized to ensure consistency across different versions."$'\n\n'
+            comment_body+="**Action required:**"$'\n'
+            comment_body+="- Please review if the changes in \`${DTENSOR_POLICY_WORKER_FILE}\` should also be applied to \`${DTENSOR_POLICY_WORKER_V2_FILE}\`"$'\n'
+            comment_body+="- Update \`${DTENSOR_POLICY_WORKER_V2_FILE}\` if necessary to maintain consistency"$'\n'
+            comment_body+="- If the files are intentionally different, please add a comment in the PR explaining why"$'\n\n'
+            comment_body+="**Files to check:**"$'\n'
+            comment_body+="- Modified: \`${DTENSOR_POLICY_WORKER_FILE}\`"$'\n'
+            comment_body+="- Not modified: \`${DTENSOR_POLICY_WORKER_V2_FILE}\`"$'\n\n'
+
+          elif [[ $dtensor_worker_changed -eq 0 && $dtensor_worker_v2_changed -eq 1 ]]; then
+            echo "❌ dtensor_policy_worker_v2.py was modified but dtensor_policy_worker.py was not updated"
+            needs_attention=1
+            comment_body+="### ⚠️  DTensor Policy Worker Synchronization Warning"$'\n\n'
+            comment_body+="The file \`${DTENSOR_POLICY_WORKER_V2_FILE}\` was modified in this PR, but \`${DTENSOR_POLICY_WORKER_FILE}\` was not updated."$'\n\n'
+            comment_body+="**Why this matters:**"$'\n'
+            comment_body+="These files contain related DTensor policy worker implementations that should be kept synchronized to ensure consistency across different versions."$'\n\n'
+            comment_body+="**Action required:**"$'\n'
+            comment_body+="- Please review if the changes in \`${DTENSOR_POLICY_WORKER_V2_FILE}\` should also be applied to \`${DTENSOR_POLICY_WORKER_FILE}\`"$'\n'
+            comment_body+="- Update \`${DTENSOR_POLICY_WORKER_FILE}\` if necessary to maintain consistency"$'\n'
+            comment_body+="- If the files are intentionally different, please add a comment in the PR explaining why"$'\n\n'
+            comment_body+="**Files to check:**"$'\n'
+            comment_body+="- Modified: \`${DTENSOR_POLICY_WORKER_V2_FILE}\`"$'\n'
+            comment_body+="- Not modified: \`${DTENSOR_POLICY_WORKER_FILE}\`"$'\n\n'
+
+          elif [[ $dtensor_worker_changed -eq 1 && $dtensor_worker_v2_changed -eq 1 ]]; then
+            echo "✅ Both DTensor policy worker files were modified"
+            comment_body+="### ✅ DTensor Policy Worker Synchronization Check"$'\n\n'
+            comment_body+="Both DTensor policy worker files were modified in this PR:"$'\n'
+            comment_body+="- \`${DTENSOR_POLICY_WORKER_FILE}\`"$'\n'
+            comment_body+="- \`${DTENSOR_POLICY_WORKER_V2_FILE}\`"$'\n\n'
+            comment_body+="Please ensure that the changes are consistent between both files where applicable."$'\n\n'
+
+          else
+            echo "ℹ️  No DTensor policy worker files were modified in this PR"
+            # Don't set comment_body in this case to avoid unnecessary comments
+          fi
+
+          # Set outputs
+          echo "needs_attention=$needs_attention" >> $GITHUB_OUTPUT
+          if [[ -n "$comment_body" ]]; then
+            echo "comment_body<<EOF" >> $GITHUB_OUTPUT
+            echo "$comment_body" >> $GITHUB_OUTPUT
+            echo "EOF" >> $GITHUB_OUTPUT
+          fi
+
+          if [[ $needs_attention -eq 1 ]]; then
+            echo ""
+            echo "⚠️  Files consistency needs attention"
+            echo "Please review the changes and ensure related files are properly synchronized"
+          else
+            echo ""
+            echo "✅ File consistency check completed"
+          fi
+
+  create_artifact:
+    name: Create Comment Artifact
+    needs: [check]
+    runs-on: ubuntu-latest
+    if: always() && needs.check.outputs.comment_body != ''
+    steps:
+      - name: Create comment file
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const fs = require('fs');
+
+            const needsAttention = ${{ toJSON(needs.check.outputs.needs_attention) }} === '1';
+            const title = needsAttention ?
+              '## ⚠️  File Consistency Check' :
+              '## ℹ️  File Consistency Check';
+
+            const headSha = ${{ toJSON(inputs.head_sha) }};
+            const prNumber = ${{ toJSON(inputs.pr_number) }};
+            const headRef = ${{ toJSON(inputs.head_ref) }};
+            const checkOutputs = ${{ toJSON(needs.check.outputs.comment_body) }};
+
+            const commentBody = title + '\n\n' +
+              '**Check based on commit:** ' + headSha + ' (PR #' + prNumber + ' from `' + headRef + '`)\n\n' +
+              checkOutputs + '\n\n' +
+              '---\n' +
+              '<sub>This check ensures that related file implementations remain synchronized across the codebase. If you believe this warning is incorrect or the files should intentionally differ, please add a comment explaining the reasoning.</sub>';
+
+            fs.writeFileSync('comment.txt', commentBody);
+            console.log('Comment body written to comment.txt');
+
+      - name: Upload comment artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: automodel-integration-check
+          path: comment.txt
+          retention-days: 1
diff --git a/github/workflows/_healthcheck_vm.yml b/github/workflows/_healthcheck_vm.yml
new file mode 100644
index 0000000000..4c46c5ee90
--- /dev/null
+++ b/github/workflows/_healthcheck_vm.yml
@@ -0,0 +1,118 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+name: ~monitor a single VM
+
+on:
+  workflow_call:
+    inputs:
+      vm:
+        type: string
+        description: Name of VM
+        required: true
+      n_gpus:
+        type: string
+        description: Number of GPUs this VM has
+        required: true
+      is_recheck:
+        type: boolean
+        description: Whether this is a recheck after reboot
+        required: false
+        default: false
+    secrets:
+      SLACK_WEBHOOK_ADMIN:
+        description: Slack webhook admin identifier
+        required: true
+      SLACK_GITHUB_CI_WEBHOOK:
+        description: Slack webhook URL for notifications
+        required: true
+      VM_KEY:
+        description: VM user credentials
+        required: true
+      PAT:
+        description: GitHub Personal Access Token
+        required: true
+
+jobs:
+  check-status-and-maybe-shutdown:
+    environment: main
+    runs-on: ${{ inputs.vm }}
+    outputs:
+      status: ${{ steps.status.outputs.main }}
+      reboot_needed: ${{ steps.status.outputs.reboot_needed }}
+    steps:
+      - name: Check status
+        id: status
+        run: |
+          echo "🔍 Running health check on VM ${{ inputs.vm }}"
+
+          docker run --rm --runtime=nvidia --gpus ${{ inputs.n_gpus }} ubuntu nvidia-smi
+
+          NUM_GPUS=$(nvidia-smi --query-gpu=name --format=csv,noheader | wc -l)
+
+          if [[ $NUM_GPUS -ne ${{ inputs.n_gpus }} ]]; then
+            echo "Issues with GPU detected"
+            echo "main=degraded" >> "$GITHUB_OUTPUT"
+            echo "reboot_needed=true" >> "$GITHUB_OUTPUT"
+          else
+            echo "✅ VM ${{ inputs.vm }} is healthy - found $NUM_GPUS/${{ inputs.n_gpus }} GPUs"
+            echo "main=healthy" >> "$GITHUB_OUTPUT"
+            echo "reboot_needed=false" >> "$GITHUB_OUTPUT"
+          fi
+
+      - name: Save reboot status to artifact
+        if: ${{ inputs.is_recheck != true }}
+        run: |
+          mkdir -p /tmp/healthcheck-results
+          echo "${{ steps.status.outputs.reboot_needed }}" > /tmp/healthcheck-results/${{ inputs.vm }}-reboot-needed.txt
+          echo "${{ steps.status.outputs.main }}" > /tmp/healthcheck-results/${{ inputs.vm }}-status.txt
+
+      - name: Upload healthcheck results
+        if: ${{ inputs.is_recheck != true }}
+        uses: actions/upload-artifact@v4
+        with:
+          name: healthcheck-${{ inputs.vm }}
+          path: /tmp/healthcheck-results/
+          retention-days: 1
+
+      - name: Take Action on Issues
+        if: ${{ (steps.status.outputs.main == 'degraded' || failure()) && inputs.is_recheck != true }}
+        continue-on-error: true
+        run: |
+          if [[ "${{ steps.status.outputs.reboot_needed }}" == "true" ]]; then
+            echo "Rebooting VM..."
+            echo "Scheduling reboot in 30 seconds to allow workflow to complete..."
+            echo '${{ secrets.VM_KEY }}' | sudo -S bash -c 'nohup bash -c "sleep 30 && cd /home/azureuser/actions-runner && ./svc.sh stop && sleep 30 && reboot" > /dev/null 2>&1 &'
+            echo "Reboot scheduled, workflow will continue..."
+          fi
+
+      - name: Send Slack Alert & Stop Service for Persistent Issues
+        if: ${{ (steps.status.outputs.main == 'degraded' || failure()) && inputs.is_recheck == true }}
+        continue-on-error: true
+        run: |
+          MESSAGE='{
+            "blocks": [
+              {
+                "type": "section",
+                "text": {
+                  "type": "mrkdwn",
+                  "text": ":alert: VM bot 🤖: Hey <!subteam^${{ secrets.SLACK_WEBHOOK_ADMIN }}>: VM `${{ inputs.vm }}` still has issues after reboot - stopping service and needs manual intervention."
+                }
+              }
+            ]
+          }'
+
+          curl -X POST -H "Content-type: application/json" --data "$MESSAGE" ${{ secrets.SLACK_GITHUB_CI_WEBHOOK }}
+
+          echo "Recheck detected persistent issues - stopping runner service to take VM offline"
+          echo '${{ secrets.VM_KEY }}' | sudo -S bash -c 'nohup bash -c "sleep 30 && cd /home/azureuser/actions-runner && ./svc.sh stop" > /dev/null 2>&1 &'
diff --git a/github/workflows/_pr_comment.yml b/github/workflows/_pr_comment.yml
new file mode 100644
index 0000000000..fea753fbe2
--- /dev/null
+++ b/github/workflows/_pr_comment.yml
@@ -0,0 +1,183 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: "PR Comment"
+
+on:
+  workflow_call:
+    inputs:
+      pr_number:
+        required: true
+        type: string
+        description: "Pull request number"
+      artifact_name:
+        required: true
+        type: string
+        description: "Name of the artifact containing the comment body"
+      head_sha:
+        required: true
+        type: string
+        description: "Head commit SHA of the pull request"
+      max_retries:
+        required: false
+        type: number
+        default: 30
+        description: "Maximum number of polling attempts"
+      sleep_duration:
+        required: false
+        type: number
+        default: 10
+        description: "Sleep duration in seconds between polling attempts"
+
+jobs:
+  comment:
+    name: Comment on PR
+    runs-on: ubuntu-latest
+    steps:
+      - name: Wait for and download artifact
+        id: find-artifact
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const artifactName = ${{ toJSON(inputs.artifact_name) }};
+            const headSha = ${{ toJSON(inputs.head_sha) }};
+            const prNumber = ${{ toJSON(inputs.pr_number) }};
+            const maxRetries = ${{ inputs.max_retries }};
+            const sleepDuration = ${{ inputs.sleep_duration }};
+
+            console.log(`Waiting for artifact: ${artifactName}`);
+            console.log(`PR: #${prNumber}, Head SHA: ${headSha}`);
+            console.log(`Max retries: ${maxRetries}, Sleep duration: ${sleepDuration}s`);
+
+            for (let attempt = 1; attempt <= maxRetries; attempt++) {
+              console.log(`Attempt ${attempt}/${maxRetries}: Searching for artifact in workflow runs for commit ${headSha}...`);
+
+              try {
+                // Get workflow runs for the specific commit SHA
+                console.log(`Searching for workflow runs for commit: ${headSha}`);
+                const workflowRuns = await github.rest.actions.listWorkflowRunsForRepo({
+                  owner: context.repo.owner,
+                  repo: context.repo.repo,
+                  head_sha: headSha,
+                  per_page: 50
+                });
+
+                console.log(`Found ${workflowRuns.data.workflow_runs.length} workflow runs for commit ${headSha}`);
+
+                // Search through each workflow run for the artifact
+                for (const run of workflowRuns.data.workflow_runs) {
+                  console.log(`Checking workflow run: ${run.name} (ID: ${run.id})`);
+
+                  try {
+                    const artifactsResponse = await github.rest.actions.listWorkflowRunArtifacts({
+                      owner: context.repo.owner,
+                      repo: context.repo.repo,
+                      run_id: run.id
+                    });
+
+                    const artifact = artifactsResponse.data.artifacts.find(a => a.name === artifactName);
+                    if (artifact) {
+                      console.log(`Found artifact '${artifactName}' in workflow '${run.name}' (run ${run.id})`);
+
+                      // Download the artifact using GitHub API
+                      console.log(`Downloading artifact ${artifact.id}...`);
+                      const download = await github.rest.actions.downloadArtifact({
+                        owner: context.repo.owner,
+                        repo: context.repo.repo,
+                        artifact_id: artifact.id,
+                        archive_format: 'zip'
+                      });
+
+                                            // Save the artifact to a file
+                      const fs = require('fs');
+                      fs.writeFileSync('artifact.zip', Buffer.from(download.data));
+                      console.log('Artifact downloaded successfully');
+
+                      core.setOutput('artifact_found', 'true');
+                      return; // Exit successfully
+                    }
+                  } catch (runError) {
+                    console.log(`Error checking workflow run ${run.id}: ${runError.message}`);
+                    // Continue to next run
+                  }
+                }
+
+                if (attempt < maxRetries) {
+                  console.log(`Artifact not found in any workflow runs for commit ${headSha}, waiting ${sleepDuration} seconds...`);
+                  await new Promise(resolve => setTimeout(resolve, sleepDuration * 1000));
+                }
+              } catch (error) {
+                console.log(`Error searching for artifact: ${error.message}`);
+                if (attempt < maxRetries) {
+                  await new Promise(resolve => setTimeout(resolve, sleepDuration * 1000));
+                }
+              }
+            }
+
+            console.log(`Artifact '${artifactName}' not found in any workflow runs for commit ${headSha} (PR #${prNumber}) after ${maxRetries} attempts`);
+            console.log('No comment will be posted as no artifact was found.');
+            core.setOutput('artifact_found', 'false');
+            return;
+
+      - name: Extract comment body
+        id: extract-comment
+        if: steps.find-artifact.outputs.artifact_found == 'true'
+        run: |
+          # Unzip the artifact
+          unzip -q artifact.zip
+
+          # Find the comment body file
+          if [ -f "comment.txt" ]; then
+            COMMENT_FILE="comment.txt"
+          elif [ -f "comment.md" ]; then
+            COMMENT_FILE="comment.md"
+          elif [ -f "comment" ]; then
+            COMMENT_FILE="comment"
+          else
+            # Take the first text file found
+            COMMENT_FILE=$(find . -maxdepth 1 -type f -name "*.txt" -o -name "*.md" | head -1)
+            if [ -z "$COMMENT_FILE" ]; then
+              echo "No comment file found in artifact"
+              exit 1
+            fi
+          fi
+
+          echo "Found comment file: $COMMENT_FILE"
+
+          # Read the comment body and set it as output
+          {
+            echo 'COMMENT_BODY<<EOF'
+            cat "$COMMENT_FILE"
+            echo
+            echo 'EOF'
+          } >> "$GITHUB_OUTPUT"
+
+      - name: Comment on PR
+        if: steps.find-artifact.outputs.artifact_found == 'true'
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const commentBody = ${{ toJSON(steps.extract-comment.outputs.COMMENT_BODY) }};
+            const prNumber = ${{ toJSON(inputs.pr_number) }};
+
+            console.log(`Creating comment on PR #${prNumber}`);
+
+            await github.rest.issues.createComment({
+              issue_number: parseInt(prNumber),
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              body: commentBody
+            });
+
+            console.log('Comment created successfully');
diff --git a/.github/workflows/_submodule_check.yml b/github/workflows/_submodule_check.yml
similarity index 93%
rename from .github/workflows/_submodule_check.yml
rename to github/workflows/_submodule_check.yml
index 6930432c2c..39a6c93dde 100644
--- a/.github/workflows/_submodule_check.yml
+++ b/github/workflows/_submodule_check.yml
@@ -47,55 +47,56 @@ jobs:
         uses: actions/checkout@v4
         with:
           submodules: 'recursive'
-      
+
       - name: Fetch target branch reference
         run: |
           git fetch origin ${{ inputs.base_ref }}
-      
+
       - name: Check submodule fast-forward status
         id: check
         shell: bash -x -e {0}
         run: |
           echo "Checking submodules are fast-forwarded..."
-          
+
           # Get current submodule status
           echo "Current submodule status:"
           git submodule status
-          
+
           failed=0
           changed=0
           success_body=""
           failed_body=""
-          
+
           # Process each submodule from git submodule status
           while read -r line; do
             # Extract commit and path from: " <commit> <path> (<branch_info>)"
             current_commit=$(echo "$line" | awk '{print $1}' | sed 's/^[+-]//')
             submodule_path=$(echo "$line" | awk '{print $2}')
-            
+
             if [[ -z "$current_commit" ]] || [[ -z "$submodule_path" ]]; then
               continue
             fi
-            
+
             submodule_name=$(basename "$submodule_path")
             echo ""
             echo "Checking $submodule_name at $submodule_path"
             echo "Current commit: $current_commit"
-            
+
             # Get target branch commit for this submodule
             target_commit=$(git ls-tree origin/${{ inputs.base_ref }} "$submodule_path" | awk '{print $3}')
-            
+
             if [[ -z "$target_commit" ]]; then
-              echo "❌ Could not find $submodule_name in ${{ inputs.base_ref }} branch"
-              failed=1
+              echo "✅ $submodule_name: New submodule being added (not present in ${{ inputs.base_ref }} branch)"
+              changed=1
+              success_body+="$submodule_name: ✅ New submodule being added"$'\n'
               continue
             fi
-            
+
             echo "Target commit:  $target_commit"
-            
+
             # Analyze the relationship between target and current commits
             cd "$submodule_path"
-            
+
             # Check if this is a shallow repository and unshallow if needed
             if git rev-parse --is-shallow-repository >/dev/null 2>&1 && [ "$(git rev-parse --is-shallow-repository)" = "true" ]; then
               echo "📦 $submodule_name: Detected shallow clone, fetching full history..."
@@ -103,7 +104,7 @@ jobs:
                 echo "⚠️  Warning: Failed to unshallow repository. Ancestry checks may be limited."
               }
             fi
-            
+
             # Get GitHub repository URL for comment
             remote_url=$(git remote get-url origin 2>/dev/null || echo "")
             if [[ "$remote_url" == *.git ]]; then
@@ -111,12 +112,12 @@ jobs:
             else
               github_repo="$remote_url"
             fi
-            
+
             # Case 1: Same commit
             if [[ "$current_commit" = "$target_commit" ]]; then
               echo "✅ $submodule_name: PR branch matches ${{ inputs.base_ref }} branch (same commit)"
               # No change, so don't add to changed count or comment
-              
+
             # Case 2: Check if target commit is an ancestor of current commit (current is fast-forward)
             elif git merge-base --is-ancestor "$target_commit" "$current_commit" 2>/dev/null; then
               echo "✅ $submodule_name: PR branch is ahead of ${{ inputs.base_ref }} branch (fast-forward)"
@@ -124,7 +125,7 @@ jobs:
               git log --oneline --graph "$target_commit".."$current_commit" 2>/dev/null || echo "   (Unable to show progression - possibly shallow clone)"
               changed=1
               success_body+="$submodule_name: ✅ PR branch is ahead of ${{ inputs.base_ref }} branch (fast-forward)"$'\n'
-              
+
             # Case 3: Check if current commit is an ancestor of target commit (current is behind)
             elif git merge-base --is-ancestor "$current_commit" "$target_commit" 2>/dev/null; then
               echo "❌ $submodule_name: PR branch is BEHIND ${{ inputs.base_ref }} branch"
@@ -138,11 +139,12 @@ jobs:
                 failed_body+="   TARGET (${{ inputs.base_ref }} branch): $github_repo/commits/$target_commit/"$'\n'
                 failed_body+="   CURRENT (PR #${{ inputs.pr_number }} from ${{ inputs.head_ref }}): $github_repo/commits/$current_commit/"$'\n\n'
               fi
-              
+
             else
               # Case 4: Commits have diverged or have no common ancestor
+              echo "Checking if commits have diverged or have no common ancestor"
               common_ancestor=$(git merge-base "$target_commit" "$current_commit" 2>/dev/null)
-              
+
               if [ -n "$common_ancestor" ]; then
                 echo "❌ $submodule_name: Commits have DIVERGED from a common ancestor"
                 echo "   This indicates parallel development - manual merge may be required"
@@ -192,9 +194,9 @@ jobs:
               fi
             fi
             cd "$GITHUB_WORKSPACE"
-            
+
           done < <(git submodule status)
-          
+
           # Set outputs
           echo "failed=$failed" >> $GITHUB_OUTPUT
           echo "changed=$changed" >> $GITHUB_OUTPUT
@@ -212,42 +214,47 @@ jobs:
             echo "$comment_body" >> $GITHUB_OUTPUT
             echo "EOF" >> $GITHUB_OUTPUT
           fi
-          
+
           if [[ $failed -eq 1 ]]; then
             echo ""
             echo "❌ One or more submodules are not fast-forwarded"
             echo "Please ensure submodule commits are fast-forwards of the ${{ inputs.base_ref }} branch"
             exit 1
           fi
-          
+
           echo ""
           echo "✅ All submodules are properly fast-forwarded"
 
-  comment:
-    name: Comment on PR
+  create_artifact:
+    name: Create Comment Artifact
     needs: [check]
     runs-on: ubuntu-latest
     if: always() && needs.check.outputs.changed == '1'
     steps:
-      - name: Comment on PR
+      - name: Create comment file
         uses: actions/github-script@v7
         with:
           script: |
+            const fs = require('fs');
+
             const failed = '${{ needs.check.outputs.failed }}' === '1';
-            const title = failed ? 
-              '## ❌ Submodule Fast-Forward Check Failed' : 
+            const title = failed ?
+              '## ❌ Submodule Fast-Forward Check Failed' :
               '## ✅ Submodule Fast-Forward Check Results';
-            
+
             const commentBody = `${title}
-            
+
             **Check based on commit:** ${{ inputs.head_sha }} (PR #${{ inputs.pr_number }} from \`${{ inputs.head_ref }}\`)
-            
+
             ${{ needs.check.outputs.comment_body }}
             ${failed ? 'Please ensure all submodule commits are fast-forwards of the ${{ inputs.base_ref }} branch before merging.' : 'All submodule changes look good! ✨'}`;
-            
-            await github.rest.issues.createComment({
-              issue_number: ${{ inputs.pr_number }},
-              owner: context.repo.owner,
-              repo: context.repo.repo,
-              body: commentBody
-            }); 
\ No newline at end of file
+
+            fs.writeFileSync('comment.txt', commentBody);
+            console.log('Comment body written to comment.txt');
+
+      - name: Upload comment artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: submodule-check
+          path: comment.txt
+          retention-days: 1
diff --git a/github/workflows/automodel-submodule-checks.yml b/github/workflows/automodel-submodule-checks.yml
new file mode 100644
index 0000000000..8694030f26
--- /dev/null
+++ b/github/workflows/automodel-submodule-checks.yml
@@ -0,0 +1,38 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: "Automodel Integration and Submodule Checks"
+
+on:
+  pull_request:
+
+jobs:
+
+  submodule-check:
+    name: Check submodule fast-forward
+    uses: ./.github/workflows/_submodule_check.yml
+    with:
+      base_ref: ${{ github.base_ref }}
+      head_ref: ${{ github.head_ref }}
+      pr_number: ${{ github.event.number }}
+      head_sha: ${{ github.event.pull_request.head.sha }}
+
+  automodel-integration-check:
+    name: Check if changes in nemo-automodel are in sync with nemo-rl and vice versa
+    uses: ./.github/workflows/_automodel_integration_check.yml
+    with:
+      base_ref: ${{ github.base_ref }}
+      head_ref: ${{ github.head_ref }}
+      pr_number: ${{ github.event.number }}
+      head_sha: ${{ github.event.pull_request.head.sha }}
diff --git a/.github/workflows/build-test-publish-wheel.yml b/github/workflows/build-test-publish-wheel.yml
similarity index 95%
rename from .github/workflows/build-test-publish-wheel.yml
rename to github/workflows/build-test-publish-wheel.yml
index 37025e0e62..b39719417b 100644
--- a/.github/workflows/build-test-publish-wheel.yml
+++ b/github/workflows/build-test-publish-wheel.yml
@@ -27,6 +27,7 @@ defaults:
 jobs:
   build-test-publish-wheel:
     uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_build_test_publish_wheel.yml@v0.33.0
+    if: ${{ vars.BUILD_TEST_PUBLISH_WHEEL == 'true' }}
     with:
       dry-run: true
       python-package: nemo_rl
diff --git a/.github/workflows/cherry-pick-release-commit.yml b/github/workflows/cherry-pick-release-commit.yml
similarity index 98%
rename from .github/workflows/cherry-pick-release-commit.yml
rename to github/workflows/cherry-pick-release-commit.yml
index 0fc1da8001..d891018417 100644
--- a/.github/workflows/cherry-pick-release-commit.yml
+++ b/github/workflows/cherry-pick-release-commit.yml
@@ -20,7 +20,7 @@ on:
 
 jobs:
   cherry-pick:
-    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cherry_pick.yml@v0.31.0
+    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_cherry_pick.yml@v0.63.0
     secrets:
       PAT: ${{ secrets.PAT }}
       SLACK_WEBHOOK_ADMIN: ${{ secrets.SLACK_WEBHOOK_ADMIN }}
diff --git a/github/workflows/cicd-main.yml b/github/workflows/cicd-main.yml
new file mode 100644
index 0000000000..07f1a1bb24
--- /dev/null
+++ b/github/workflows/cicd-main.yml
@@ -0,0 +1,430 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+name: "CICD NeMo RL"
+
+on:
+  pull_request:
+    branches:
+      - "main"
+      - "r**"
+    types: [labeled, opened, synchronize, reopened]
+  merge_group:
+    types: [checks_requested]
+  schedule:
+    - cron: "0 9 * * *"
+  workflow_dispatch:
+    inputs:
+      test_to_run:
+        required: false
+        default: L2
+        type: choice
+        options:
+          - docs
+          - L0
+          - L1
+          - L2
+        description: Test level to run. docs = doc tests only, L0 = unit/docs/lint, L1 = L0 + functional, L2 = L1 + convergence
+  # TODO: Due to limited compute, disabling pushes to main. This is okay to do since we force PRs to be up to date and the CI tests on pull/$PR_NUM/merge
+  #push:
+  #  branches:
+  #    - 'main'
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-${{ github.event.label.name || 'main' }}
+  cancel-in-progress: true
+
+jobs:
+  pre-flight:
+    runs-on: ubuntu-latest
+    outputs:
+      test_level: ${{ steps.evaluate.outputs.test_level }}
+    steps:
+      - name: Get changed files
+        id: changed-files
+        if: github.event_name == 'pull_request'
+        uses: step-security/changed-files@v45.0.1
+        with:
+          files_yaml: |
+            doc:
+              - '**.md'
+              - docs/**
+            src:
+              - '!**.md'
+              - '!docs/**'
+
+      - name: Evaluate conditions
+        id: evaluate
+        env:
+          DOCS_ONLY: ${{ steps.changed-files.outputs.doc_any_changed == 'true' && steps.changed-files.outputs.src_any_changed == 'false' }}
+          CHANGED_DOCS: ${{ steps.changed-files.outputs.doc_all_changed_files }}
+          CHANGED_SRC: ${{ steps.changed-files.outputs.src_all_changed_files }}
+          IS_PULLREQUEST: ${{ github.event_name == 'pull_request' }}
+          LABEL: ${{ github.event.label.name }}
+          MERGE_GROUP: ${{ github.event_name == 'merge_group' }}
+        run: |
+          # Some output that's helpful for debugging
+          echo "Docs changed: $CHANGED_DOCS"
+          echo "Src changed: $CHANGED_SRC"
+          echo "LABEL: $LABEL"
+          echo "IS_PULLREQUEST: $IS_PULLREQUEST"
+          echo "DOCS_ONLY: $DOCS_ONLY"
+
+          # Run CI only (on main or if label is attached) and if it's not only docs
+          # Determine test level based on conditions
+          if [[ "$DOCS_ONLY" == "true" || "$LABEL" == "CI:docs" ]]; then
+            # For doc-only changes, run only doc tests
+            TEST_LEVEL="docs"
+          elif [[ "$LABEL" == "CI:L0" ]]; then
+            TEST_LEVEL="L0"
+          elif [[ "$LABEL" == "CI:L1" || "$IS_PULLREQUEST" == "false" || "$MERGE_GROUP" == "true"  ]]; then
+            # For labeled PRs, pushes to main (IS_PULL_REQUEST=false), or merge group events, run L1 by default
+            TEST_LEVEL="L1"
+          elif [[ "$LABEL" == "CI:L2" ]]; then
+            TEST_LEVEL="L2"
+          else
+            # Skip tests by default for non-labeled PRs
+            TEST_LEVEL="none"
+          fi
+
+          if [[ "${{ github.event_name }}" == "schedule" ]]; then
+            echo "Setting test level to L1 for nightly scheduled run"
+            TEST_LEVEL="L1"
+          fi
+
+          # Override test level if specified in workflow_dispatch
+          if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
+            echo "Overriding test level from $TEST_LEVEL to ${{ inputs.test_to_run }}"
+            TEST_LEVEL="${{ inputs.test_to_run }}"
+          fi
+
+          echo "test_level=$TEST_LEVEL" | tee -a "$GITHUB_OUTPUT"
+
+  pr-branch-up-to-date-check:
+    name: Check if PR branch is up to date
+    needs: [pre-flight]
+    if: ${{ github.event_name == 'pull_request' }}
+    runs-on: ubuntu-latest
+    env:
+      MAX_COMMITS_BEHIND: 10
+    steps:
+      - name: Check how many commits behind target branch
+        env:
+          GH_TOKEN: ${{ github.token }}
+          REPO: ${{ github.repository }}
+          BASE_SHA: ${{ github.event.pull_request.base.sha }}
+          HEAD_SHA: ${{ github.event.pull_request.head.sha }}
+          BASE_REF: ${{ github.base_ref }}
+          HEAD_LABEL: ${{ github.event.pull_request.head.label }}
+        run: |
+          echo "Repository: $REPO"
+          echo "Base branch: $BASE_REF (SHA: $BASE_SHA)"
+          echo "PR head: $HEAD_LABEL (SHA: $HEAD_SHA)"
+          echo "Maximum commits behind allowed: $MAX_COMMITS_BEHIND"
+
+          API_RESPONSE=$(gh api "repos/$REPO/compare/$HEAD_SHA...$BASE_REF" --jq '{behind_by: .behind_by, ahead_by: .ahead_by, status: .status}')
+
+          COMMITS_BEHIND=$(echo "$API_RESPONSE" | jq -r '.ahead_by')
+          COMMITS_AHEAD=$(echo "$API_RESPONSE" | jq -r '.behind_by')
+          STATUS=$(echo "$API_RESPONSE" | jq -r '.status')
+
+          echo "Comparison status: $STATUS"
+          echo "PR is $COMMITS_BEHIND commits behind and $COMMITS_AHEAD commits ahead of $BASE_REF"
+
+          # Check if we're behind by more than the allowed number
+          if [ "$COMMITS_BEHIND" -gt "$MAX_COMMITS_BEHIND" ]; then
+            echo "❌ ERROR: This PR is $COMMITS_BEHIND commits behind $BASE_REF, which exceeds the maximum allowed ($MAX_COMMITS_BEHIND commits)."
+            echo "Please rebase or merge the latest changes from $BASE_REF into your PR branch."
+            exit 1
+          else
+            echo "✅ PR is acceptably fresh ($COMMITS_BEHIND commits behind, limit is $MAX_COMMITS_BEHIND)"
+          fi
+
+  lint-check:
+    name: Lint check
+    needs: [pre-flight]
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          submodules: 'recursive'
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+        with:
+          version: "0.9.1"
+          enable-cache: true
+          prune-cache: false
+      # Faster than uv python install since it caches python alongside runner
+      - name: "Set up Python"
+        uses: actions/setup-python@v5
+        with:
+          python-version-file: ".python-version"
+      - name: Check lint
+        run: |
+          uv venv
+          uv run --group dev pre-commit install
+          uv run --group dev pre-commit run --all-files --show-diff-on-failure --color=always
+      # TODO: this is a temporary check and should be removed once we have 100% correctness
+      - name: Check if any files with zero errors not in whitelist
+        run: |
+          missing_count=0
+          for file in $(uv run --group dev pyrefly check $(git ls-files 'nemo_rl/**/*.py' 'examples/**/*.py' 'docs/*.py' 'tools/**/*.py') --output-format json | jq -r --slurpfile all_files <(git ls-files 'nemo_rl/**/*.py' 'examples/**/*.py' 'docs/*.py' 'tools/**/*.py' | jq -R -s 'split("\n")[:-1]') --arg pwd "$(pwd)/" '(.errors | group_by(.path) | map({(.[0].path | sub($pwd; "")): length}) | add // {}) as $error_counts | $all_files[0][] | . as $file | if ($error_counts[$file] // 0) == 0 then $file else empty end'); do
+            if ! fgrep -q "$file" pyrefly.toml; then
+              echo "File $file has zero errors but is not in pyrefly.toml in the 'project-includes' list. Please add it to this whitelist."
+              ((missing_count++))
+            fi
+          done
+
+          exit $missing_count
+      - name: Minimize uv cache
+        run: uv cache prune --ci
+
+  sphinx-build:
+    needs: [pre-flight]
+    if: ${{ needs.pre-flight.outputs.test_level != 'none' }}
+    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_build_docs.yml@v0.57.0
+
+  build-container:
+    if: ${{ needs.pre-flight.outputs.test_level != 'none' }}
+    needs: [pre-flight]
+    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_build_container.yml@v0.52.0
+    with:
+      build-ref: ${{ github.sha }}
+      image-name: nemo_rl_container
+      dockerfile: docker/Dockerfile
+      image-label: nemo-rl
+      target: hermetic
+      build-contexts: |
+        nemo-rl=${{ github.run_id }}/
+      build-args: |
+        MAX_JOBS=32
+        NEMO_RL_COMMIT=${{ github.sha }}
+
+  cicd-doc-tests:
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - script: Docs_Tests
+            runner: self-hosted-azure
+    needs: [pre-flight, build-container]
+    if: ${{ contains('docs L0 L1 L2', needs.pre-flight.outputs.test_level) }}
+    runs-on: ${{ matrix.runner }}
+    name: ${{ matrix.is_optional && 'PLEASEFIXME_' || '' }}${{ matrix.script }}
+    environment: nemo-ci
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: main
+        uses: ./.github/actions/test-template
+        with:
+          runner: ${{ runner.name }}
+          script: ${{ matrix.script }}
+          is_doc_test: "true"
+          is_fork_pr: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name != github.event.pull_request.base.repo.full_name }}
+
+  cicd-unit-tests:
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - script: L0_Unit_Tests_Generation
+            runner: self-hosted-azure
+          - script: L0_Unit_Tests_Policy
+            runner: self-hosted-azure
+          - script: L0_Unit_Tests_Other
+            runner: self-hosted-azure
+    needs: [pre-flight, build-container, cicd-doc-tests]
+    if: ${{ contains('L0 L1 L2', needs.pre-flight.outputs.test_level) }}
+    runs-on: ${{ matrix.runner }}
+    name: ${{ matrix.script }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: main
+        uses: ./.github/actions/test-template
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        with:
+          runner: ${{ runner.name }}
+          script: ${{ matrix.script }}
+          is_unit_test: "true"
+          cpu-only: ${{ matrix.cpu-only || false }}
+          is_fork_pr: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name != github.event.pull_request.base.repo.full_name }}
+
+  cicd-functional-tests:
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - script: L1_Functional_Tests_GPU
+            runner: self-hosted-azure
+    needs: [pre-flight, build-container, cicd-unit-tests]
+    runs-on: ${{ matrix.runner }}
+    if: ${{ contains('L1 L2', needs.pre-flight.outputs.test_level) }}
+    name: ${{ matrix.is_optional && 'PLEASEFIXME_' || '' }}${{ matrix.script }}
+    environment: nemo-ci
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: main
+        uses: ./.github/actions/test-template
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        with:
+          runner: ${{ runner.name }}
+          script: ${{ matrix.script }}
+          is_fork_pr: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name != github.event.pull_request.base.repo.full_name }}
+
+  CI_QA_Gate:
+    name: "CI quality check${{ needs.pre-flight.outputs.test_level == 'none' && ' (No tests run: Label CI:L*)' || '' }}"
+    if: always()
+    runs-on: ubuntu-latest
+    needs:
+      - pre-flight
+      - pr-branch-up-to-date-check
+      - lint-check
+      - sphinx-build
+      - build-container
+      - cicd-doc-tests
+      - cicd-unit-tests
+      - cicd-functional-tests
+    steps:
+      - name: main
+        env:
+          JOB_RESULTS: ${{ toJSON(needs) }}
+          # Job is considered successful if nothing was run, or if all jobs were successful (the tests run even if only docs were run b/c doctests are selected)
+          ALL_SUCCESS: >-
+            ${{
+              needs.lint-check.result == 'success' &&
+              (needs.pr-branch-up-to-date-check.result == 'success' || needs.pr-branch-up-to-date-check.result == 'skipped') &&
+              (
+                needs.pre-flight.outputs.test_level != 'none' &&
+                needs.sphinx-build.result == 'success' &&
+                needs.build-container.result == 'success' &&
+                (
+                  (
+                    needs.cicd-doc-tests.result == 'success' &&
+                    (needs.cicd-unit-tests.result == 'skipped' || needs.cicd-unit-tests.result == 'success') &&
+                    (needs.cicd-functional-tests.result == 'skipped' || needs.cicd-functional-tests.result == 'success')
+                  )
+                )
+              )
+            }}
+          CI_SKIP: ${{ github.event.label.name == 'Skip CICD' }}
+          TEST_LEVEL: ${{ needs.pre-flight.outputs.test_level }}
+        run: |
+          SUMMARY=$(echo $JOB_RESULTS | jq 'to_entries[] | .key + ": " + .value.result' | tr -d '"')
+          echo '🤖: CICD Result for test level: ${{ needs.pre-flight.outputs.test_level }}' >> $GITHUB_STEP_SUMMARY
+          echo "$SUMMARY" >> $GITHUB_STEP_SUMMARY
+          test "$ALL_SUCCESS" = "true" || test "$CI_SKIP" = "true"
+
+  notify-nightly-failure:
+    name: Notify nightly test failure
+    runs-on: ubuntu-latest
+    needs: [CI_QA_Gate]
+    environment: main
+    if: ${{ always() && github.event_name == 'schedule' && needs.CI_QA_Gate.result == 'failure' }}
+    steps:
+      - name: Send Slack notification
+        env:
+          SLACK_WEBHOOK: ${{ secrets.SLACK_TEAM_CHANNEL_WEBHOOK }}
+        run: |
+          MESSAGE='{
+            "blocks": [
+              {
+                "type": "section",
+                "text": {
+                  "type": "mrkdwn",
+                  "text": "🚨 Nightly GitHub CI test failed on main branch\n\n• Repository: ${{ github.repository }}\n• Commit: `${{ github.sha }}`\n• Workflow: <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|View Run>"
+                }
+              }
+            ]
+          }'
+
+          curl -X POST -H "Content-type: application/json" --data "$MESSAGE" "$SLACK_WEBHOOK"
+
+  Coverage:
+    runs-on: ubuntu-latest
+    needs:
+      - CI_QA_Gate
+      - cicd-doc-tests
+      - cicd-unit-tests
+      - cicd-functional-tests
+    if: always()
+    strategy:
+      matrix:
+        flag: [doc-test, unit-test, e2e]
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Download coverage reports of current branch
+        uses: actions/download-artifact@v4
+        with:
+          pattern: coverage-${{ matrix.flag }}-*
+
+      - name: Check if artifacts were downloaded
+        id: check-artifacts
+        run: |
+          # Check if any coverage directories were downloaded
+          if ls coverage-* 1> /dev/null 2>&1; then
+            echo "artifacts-found=true" >> $GITHUB_OUTPUT
+            echo "Found coverage artifacts for ${{ matrix.flag }}"
+          else
+            echo "artifacts-found=false" >> $GITHUB_OUTPUT
+            echo "No coverage artifacts found for ${{ matrix.flag }}"
+          fi
+
+      - name: Get total coverage of current branch
+        shell: bash -x -e -u -o pipefail {0}
+        if: ${{ steps.check-artifacts.outputs.artifacts-found == 'true' }}
+        run: |
+          pip install coverage
+
+          ls -al .
+          ls -al coverage-*/
+          coverage combine --keep $(ls coverage-*/.coverage)
+          coverage report -i --show-missing
+          rm -rf coverage-*
+          ls -al
+
+      - name: Skip coverage processing
+        if: ${{ steps.check-artifacts.outputs.artifacts-found == 'false' }}
+        run: |
+          echo "No coverage artifacts found for ${{ matrix.flag }}, skipping coverage processing"
+
+      - name: Upload coverage reports to Codecov
+        if: ${{ steps.check-artifacts.outputs.artifacts-found == 'true' }}
+        uses: codecov/codecov-action@v5
+        with:
+          token: ${{ secrets.CODECOV_TOKEN }}
+          verbose: true
+          flags: ${{ matrix.flag }}
+
+      - name: Upload artifacts
+        if: ${{ steps.check-artifacts.outputs.artifacts-found == 'true' }}
+        uses: actions/upload-artifact@v4
+        with:
+          name: coverage-${{ matrix.flag }}-aggregated
+          path: |
+            .coverage
+          include-hidden-files: true
+
+  DCO_merge_group:
+    name: DCO
+    if: github.event_name == 'merge_group'
+    runs-on: ubuntu-latest
+    steps:
+      - run: echo "The real DCO check happens on PRs only. This is a placeholder for the merge queue to keep the DCO check as a required status check."
diff --git a/.github/workflows/close-inactive-issue-pr.yml b/github/workflows/close-inactive-issue-pr.yml
similarity index 100%
rename from .github/workflows/close-inactive-issue-pr.yml
rename to github/workflows/close-inactive-issue-pr.yml
diff --git a/.github/workflows/community-bot.yml b/github/workflows/community-bot.yml
similarity index 75%
rename from .github/workflows/community-bot.yml
rename to github/workflows/community-bot.yml
index bf314b7e8d..fa004e2822 100644
--- a/.github/workflows/community-bot.yml
+++ b/github/workflows/community-bot.yml
@@ -8,6 +8,8 @@ on:
 
 jobs:
   community-bot:
-    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_community_bot.yml@v0.49.1
+    uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/_community_bot.yml@v0.54.4
+    with:
+      community_project_id: ${{ vars.COMMUNITY_PROJECT_ID }}
     secrets:
       GH_TOKEN: ${{ secrets.PAT }}
diff --git a/.github/workflows/copyright-check.yml b/github/workflows/copyright-check.yml
similarity index 100%
rename from .github/workflows/copyright-check.yml
rename to github/workflows/copyright-check.yml
diff --git a/github/workflows/healthcheck_vms.yml b/github/workflows/healthcheck_vms.yml
new file mode 100644
index 0000000000..40a5bc2d19
--- /dev/null
+++ b/github/workflows/healthcheck_vms.yml
@@ -0,0 +1,145 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+name: VM Health Check and Reboot
+on:
+  schedule:
+    - cron: '0 7 * * *'
+  workflow_dispatch:
+
+jobs:
+  pre-flight:
+    runs-on: ubuntu-latest
+    outputs:
+      list-of-vms: ${{ steps.main.outputs.main }}
+    environment: main
+    steps:
+      - name: Get list of VMs
+        id: main
+        env:
+          GITHUB_TOKEN: ${{ secrets.PAT }}
+        run: |
+          RUNNERS=$(curl -L \
+            -H "Accept: application/vnd.github+json" \
+            -H "Authorization: Bearer $GITHUB_TOKEN" \
+            -H "X-GitHub-Api-Version: 2022-11-28" \
+            ${{ github.api_url }}/repos/${{ github.repository }}/actions/runners)
+
+          MATRIX=$(echo $RUNNERS \
+            | jq -c '[
+                .runners[]
+                | select(.status == "online")
+                | select(.name | contains("cpu") | not)
+                | {
+                  "vm": .name
+                }
+              ]
+            '
+          )
+          echo main=$MATRIX | tee -a "$GITHUB_OUTPUT"
+
+  healthcheck:
+    needs: pre-flight
+    strategy:
+      fail-fast: false
+      matrix:
+        include: ${{ fromJSON(needs.pre-flight.outputs.list-of-vms )}}
+    uses: ./.github/workflows/_healthcheck_vm.yml
+    with:
+      vm: ${{ matrix.vm }}
+      n_gpus: "2"
+    secrets:
+      SLACK_WEBHOOK_ADMIN: ${{ secrets.SLACK_WEBHOOK_ADMIN }}
+      SLACK_GITHUB_CI_WEBHOOK: ${{ secrets.SLACK_GITHUB_CI_WEBHOOK }}
+      VM_KEY: ${{ secrets.VM_KEY }}
+      PAT: ${{ secrets.PAT }}
+
+  check-reboots-needed:
+    needs: [pre-flight, healthcheck]
+    if: ${{ always() }}
+    runs-on: ubuntu-latest
+    outputs:
+      has_reboots: ${{ steps.check-artifacts.outputs.has_reboots }}
+    steps:
+      - name: Download all healthcheck artifacts
+        uses: actions/download-artifact@v4
+        with:
+          pattern: healthcheck-*
+          path: ./healthcheck-results/
+          merge-multiple: true
+
+      - name: Check if any VMs needed reboots
+        id: check-artifacts
+        env:
+          VM_LIST: ${{ needs.pre-flight.outputs.list-of-vms }}
+        run: |
+          echo "Checking healthcheck artifacts for reboot status..."
+          HAS_REBOOTS=false
+
+          # Create a list of VMs to check
+          VM_NAMES=$(echo "$VM_LIST" | jq -r '.[] | .vm')
+
+          # Check each VM's artifact
+          for VM in $VM_NAMES; do
+            echo "Checking reboot status for VM: $VM"
+
+            REBOOT_FILE="./healthcheck-results/${VM}-reboot-needed.txt"
+            if [[ -f "$REBOOT_FILE" ]]; then
+              REBOOT_NEEDED=$(cat "$REBOOT_FILE")
+              echo "VM $VM reboot needed: $REBOOT_NEEDED"
+
+              if [[ "$REBOOT_NEEDED" == "true" ]]; then
+                echo "VM $VM needs/needed a reboot"
+                HAS_REBOOTS=true
+              fi
+            else
+              echo "WARNING: No artifact found for VM $VM"
+            fi
+          done
+
+          if [[ "$HAS_REBOOTS" == "true" ]]; then
+            echo "At least one VM was rebooted"
+            echo "has_reboots=true" >> "$GITHUB_OUTPUT"
+          else
+            echo "No VMs were rebooted"
+            echo "has_reboots=false" >> "$GITHUB_OUTPUT"
+          fi
+
+  wait-for-reboot:
+    needs: check-reboots-needed
+    if: ${{ needs.check-reboots-needed.outputs.has_reboots == 'true' }}
+    runs-on: ubuntu-latest
+    steps:
+      - name: Wait for VMs to come back online
+        run: |
+          WAIT_MINUTES=3
+          echo "Waiting ${WAIT_MINUTES} minutes for rebooted VMs to come back online..."
+          sleep $((WAIT_MINUTES * 60))
+
+  recheck:
+    needs: [pre-flight, wait-for-reboot]
+    if: ${{ always() && needs.wait-for-reboot.result == 'success' }}
+    strategy:
+      fail-fast: false
+      matrix:
+        include: ${{ fromJSON(needs.pre-flight.outputs.list-of-vms )}}
+    uses: ./.github/workflows/_healthcheck_vm.yml
+    with:
+      vm: ${{ matrix.vm }}
+      n_gpus: "2"
+      is_recheck: true
+    secrets:
+      SLACK_WEBHOOK_ADMIN: ${{ secrets.SLACK_WEBHOOK_ADMIN }}
+      SLACK_GITHUB_CI_WEBHOOK: ${{ secrets.SLACK_GITHUB_CI_WEBHOOK }}
+      VM_KEY: ${{ secrets.VM_KEY }}
+      PAT: ${{ secrets.PAT }}
diff --git a/.github/workflows/labeler.yaml b/github/workflows/labeler.yaml
similarity index 100%
rename from .github/workflows/labeler.yaml
rename to github/workflows/labeler.yaml
diff --git a/github/workflows/merge-queue-retry.yml b/github/workflows/merge-queue-retry.yml
new file mode 100644
index 0000000000..872bc43351
--- /dev/null
+++ b/github/workflows/merge-queue-retry.yml
@@ -0,0 +1,119 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: "Merge Queue Auto-Retry"
+
+on:
+  pull_request:
+    types:
+      - dequeued
+
+jobs:
+  requeue-pr:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Generate GitHub App token
+        id: generate_token
+        uses: actions/create-github-app-token@v1
+        with:
+          app-id: ${{ vars.BOT_ID }}
+          private-key: ${{ secrets.BOT_KEY }}
+
+      - name: Check dequeue reason and retry count
+        id: check_retry
+        if: github.event.reason == 'CI_TIMEOUT'
+        env:
+          GH_TOKEN: ${{ steps.generate_token.outputs.token }}
+        run: |
+          PR_NUMBER=${{ github.event.pull_request.number }}
+
+          # Debug: Show all comments first
+          echo "=== All PR Comments ==="
+          gh api "repos/${{ github.repository }}/issues/${PR_NUMBER}/comments" \
+            --jq '.[] | {id: .id, created_at: .created_at, body: .body[:100]}'
+
+          echo "=== Filtering for retry comments ==="
+
+          # Get the current number of retry attempts from PR comments
+          RETRY_COUNT=$(gh api "repos/${{ github.repository }}/issues/${PR_NUMBER}/comments" \
+            --jq '[.[] | select(.body | contains("Auto-retry attempt")) | .body] | length')
+
+          echo "Current retry count: $RETRY_COUNT"
+
+          MAX_RETRIES=3
+
+          if [ "$RETRY_COUNT" -lt "$MAX_RETRIES" ]; then
+            echo "should_retry=true" >> $GITHUB_OUTPUT
+            echo "retry_count=$((RETRY_COUNT + 1))" >> $GITHUB_OUTPUT
+            echo "✅ Will retry (attempt $((RETRY_COUNT + 1))/$MAX_RETRIES)"
+          else
+            echo "should_retry=false" >> $GITHUB_OUTPUT
+            echo "❌ Max retries ($MAX_RETRIES) reached for PR #${PR_NUMBER}"
+          fi
+
+      - name: Add retry comment
+        if: steps.check_retry.outputs.should_retry == 'true'
+        env:
+          GH_TOKEN: ${{ steps.generate_token.outputs.token }}
+        run: |
+          PR_NUMBER=${{ github.event.pull_request.number }}
+          RETRY_COUNT=${{ steps.check_retry.outputs.retry_count }}
+
+          gh api "repos/${{ github.repository }}/issues/${PR_NUMBER}/comments" \
+            -f body="🔄 Auto-retry attempt ${RETRY_COUNT}: PR was removed from merge queue, automatically requeuing..."
+
+      - name: Requeue Pull Request
+        if: steps.check_retry.outputs.should_retry == 'true'
+        env:
+          GH_TOKEN: ${{ steps.generate_token.outputs.token }}
+        run: |
+          PR_NUMBER=${{ github.event.pull_request.number }}
+          PR_NODE_ID="${{ github.event.pull_request.node_id }}"
+
+          echo "Requeuing PR #${PR_NUMBER}..."
+
+          # First, try using GraphQL API to enqueue the PR directly
+          GRAPHQL_RESPONSE=$(curl -s -X POST \
+            -H "Authorization: Bearer ${{ steps.generate_token.outputs.token }}" \
+            -H "Content-Type: application/json" \
+            -d "{\"query\": \"mutation { enqueuePullRequest(input: {pullRequestId: \\\"${PR_NODE_ID}\\\"}) { clientMutationId } }\"}" \
+            https://api.github.com/graphql)
+
+          if echo "$GRAPHQL_RESPONSE" | jq -e '.data.enqueuePullRequest' > /dev/null; then
+            echo "PR #${PR_NUMBER} has been successfully requeued"
+          else
+            echo "❌ Failed to enqueue PR #${PR_NUMBER}. GraphQL response for debugging:"
+            echo "$GRAPHQL_RESPONSE"
+            exit 1
+          fi
+
+      - name: Max retries reached comment
+        if: steps.check_retry.outputs.should_retry == 'false'
+        env:
+          GH_TOKEN: ${{ steps.generate_token.outputs.token }}
+        run: |
+          PR_NUMBER=${{ github.event.pull_request.number }}
+
+          gh api "repos/${{ github.repository }}/issues/${PR_NUMBER}/comments" \
+            -f body="⚠️ Maximum auto-retry attempts reached. PR was removed from merge queue multiple times. Please investigate the issue and manually requeue if needed."
+
+      - name: Notify on failure
+        if: failure()
+        env:
+          GH_TOKEN: ${{ steps.generate_token.outputs.token }}
+        run: |
+          PR_NUMBER=${{ github.event.pull_request.number }}
+
+          gh api "repos/${{ github.repository }}/issues/${PR_NUMBER}/comments" \
+            -f body="❌ Auto-retry failed due to an error in the workflow. Please manually requeue the PR."
diff --git a/github/workflows/pr-checks-comment.yml b/github/workflows/pr-checks-comment.yml
new file mode 100644
index 0000000000..8f38d6375d
--- /dev/null
+++ b/github/workflows/pr-checks-comment.yml
@@ -0,0 +1,40 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: "Automodel Integration and Submodule Checks"
+
+on:
+  pull_request_target:
+
+jobs:
+
+  submodule-check-comment:
+    name: Post submodule check comment
+    uses: ./.github/workflows/_pr_comment.yml
+    with:
+      pr_number: "${{ github.event.number }}"
+      artifact_name: submodule-check
+      head_sha: "${{ github.event.pull_request.head.sha }}"
+      max_retries: 30
+      sleep_duration: 10
+
+  automodel-integration-comment:
+    name: Post automodel integration comment
+    uses: ./.github/workflows/_pr_comment.yml
+    with:
+      pr_number: "${{ github.event.number }}"
+      artifact_name: automodel-integration-check
+      head_sha: "${{ github.event.pull_request.head.sha }}"
+      max_retries: 30
+      sleep_duration: 10
diff --git a/.github/workflows/release-freeze.yml b/github/workflows/release-freeze.yml
similarity index 100%
rename from .github/workflows/release-freeze.yml
rename to github/workflows/release-freeze.yml
diff --git a/.github/workflows/release.yaml b/github/workflows/release.yaml
similarity index 100%
rename from .github/workflows/release.yaml
rename to github/workflows/release.yaml
diff --git a/.github/workflows/semantic-pull-request.yml b/github/workflows/semantic-pull-request.yml
similarity index 100%
rename from .github/workflows/semantic-pull-request.yml
rename to github/workflows/semantic-pull-request.yml
diff --git a/nemo_rl/__init__.py b/nemo_rl/__init__.py
index b94f78916d..9217b6a580 100644
--- a/nemo_rl/__init__.py
+++ b/nemo_rl/__init__.py
@@ -46,6 +46,7 @@
 )
 
 os.environ["RAY_USAGE_STATS_ENABLED"] = "0"
+os.environ["RAY_ENABLE_UV_RUN_RUNTIME_ENV"] = "0"
 
 
 def _patch_nsight_file():
@@ -113,3 +114,25 @@ def _patch_nsight_file():
 
 # Apply the patch
 _patch_nsight_file()
+
+
+# Need to set PYTHONPATH to include transformers downloaded modules.
+# Assuming the cache directory is the same cross venvs.
+def patch_transformers_module_dir(env_vars: dict[str, str]):
+    hf_home = os.environ.get("HF_HOME", None)
+    if hf_home is None:
+        return env_vars
+
+    module_dir = os.path.join(hf_home, "modules")
+    if not os.path.isdir(module_dir):
+        return env_vars
+
+    if "PYTHONPATH" not in env_vars:
+        env_vars["PYTHONPATH"] = module_dir
+    else:
+        env_vars["PYTHONPATH"] = f"{module_dir}:{env_vars['PYTHONPATH']}"
+
+    return env_vars
+
+
+patch_transformers_module_dir(os.environ)
diff --git a/nemo_rl/algorithms/async_utils.py b/nemo_rl/algorithms/async_utils.py
new file mode 100644
index 0000000000..c1ce9ab762
--- /dev/null
+++ b/nemo_rl/algorithms/async_utils.py
@@ -0,0 +1,730 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import threading as _threading
+import time
+from typing import Any, Optional
+
+import ray
+from torchdata.stateful_dataloader import StatefulDataLoader
+from transformers import PreTrainedTokenizerBase
+
+from nemo_rl.algorithms.grpo import MasterConfig
+from nemo_rl.data.interfaces import DatumSpec
+from nemo_rl.distributed.batched_data_dict import BatchedDataDict
+from nemo_rl.environments.interfaces import EnvironmentInterface
+from nemo_rl.experience.rollouts import (
+    run_async_multi_turn_rollout,
+)
+from nemo_rl.models.generation.interfaces import GenerationInterface
+
+TokenizerType = PreTrainedTokenizerBase
+
+
+@ray.remote  # pragma: no cover
+class ReplayBuffer:
+    """Replay buffer storing per-prompt groups.
+
+    A single entry corresponds to 1 prompt repeated by
+    grpo.num_generations_per_prompt (required to compute per-prompt advantages).
+    """
+
+    def __init__(self, max_size: int):
+        if max_size <= 0:
+            raise ValueError(f"max_size must be positive, got {max_size}")
+        self.max_size = max_size
+        self.trajectories = []  # List[dict[str, Any]]
+        # If trajectory_version is 1 and target_weight_version is 4 it means that weight version 1 was used for generating a trajectory and this trajectory will be used for training when weight version is 4.
+        self.trajectory_versions = []  # it is the weight-version used for generation of a trajectory
+        self.target_weight_versions = []  # it is the weight-version of the trainer where this trajectory will be used.
+
+        self.last_target_weight_already_generated = -1
+        self._lock = _threading.Lock()
+
+    def push_with_wait_signal(
+        self,
+        trajectory: dict[str, Any],
+        weight_version: int,
+        target_weight_version: int,
+    ) -> str:
+        """Add a per-prompt trajectory group with metadata.
+
+        Args:
+            trajectory: data dict
+            weight_version: version of the model weights used for generation
+            target_weight_version: version of the model weights this trajectory is intended for training
+        """
+        with self._lock:
+            if len(self.trajectories) >= self.max_size:
+                return "full"
+
+            print("🔍 ReplayBuffer.push_with_wait_signal: Adding trajectory")
+            self.trajectories.append(trajectory)
+            self.trajectory_versions.append(weight_version)
+            self.target_weight_versions.append(target_weight_version)
+            self.last_target_weight_already_generated = max(
+                self.last_target_weight_already_generated, target_weight_version
+            )
+            print(
+                f"ReplayBuffer state: {len(self.trajectories)} groups, versions={self.trajectory_versions}, targets={self.target_weight_versions}, last_target_weight_already_generated={self.last_target_weight_already_generated}"
+            )
+            return "success"
+
+    def get_debug_info(self) -> dict:
+        """Get debug information about buffer state."""
+        return {
+            "total_trajectories": len(self.trajectories),
+            "trajectory_versions": self.trajectory_versions,
+            "target_weight_versions": self.target_weight_versions,
+            "max_size": self.max_size,
+        }
+
+    def get_last_target_weight_already_generated(self) -> int:
+        with self._lock:
+            return self.last_target_weight_already_generated
+
+    def get_existing_target_weights(self) -> set[int]:
+        """Get set of target weight versions that already have trajectories."""
+        with self._lock:
+            return set(self.target_weight_versions)
+
+    def sample(
+        self,
+        num_prompt_groups: int,
+        current_weight_version: int,
+        max_age_steps: int,
+    ) -> Optional[dict[str, Any]]:
+        """Sample per-prompt trajectory groups intended for the current training step.
+
+        Only returns trajectories with target_weight_version == current_weight_version.
+        If insufficient trajectories are available, returns None to stall training
+        until the remaining trajectories are generated. This ensures no trajectory
+        loses its last chance to be used for its intended training step.
+
+        Returns:
+            Dictionary with 'trajectories' and 'avg_trajectory_age' keys, or None if insufficient data
+        """
+        with self._lock:
+            if not self.trajectories:
+                return None
+
+            total_trajectories = len(self.trajectories)
+            print("🔍 ReplayBuffer sampling debug:")
+            print(f"   {current_weight_version=}, {max_age_steps=}")
+            print(f"   {self.trajectory_versions=}")
+
+            # For debugging: check for unexpected old trajectories
+            from collections import Counter
+
+            version_counts = Counter(self.trajectory_versions)
+            print(f"   {version_counts=}")
+
+            # Compute minimum valid version based on age window
+            # max_age_steps=1 means trajectories from the last 1 step are valid
+            min_valid_version = max(0, current_weight_version - max_age_steps)
+            print(f"   {min_valid_version=}")
+
+            # Check for unexpected old trajectories
+            old_trajectories = [
+                v for v in self.trajectory_versions if v < min_valid_version
+            ]
+            if old_trajectories:
+                raise ValueError(
+                    f"Found {len(old_trajectories)} trajectories older than min_valid_version {min_valid_version}"
+                )
+
+            # Filter for valid trajectories without modifying the buffer
+            valid_indices = [
+                i
+                for i, v in enumerate(self.trajectory_versions)
+                if min_valid_version <= v <= current_weight_version
+            ]
+            print(
+                f"   valid_indices: {len(valid_indices)}/{total_trajectories} trajectories within age window"
+            )
+            if not valid_indices:
+                print("No trajectories available for sampling.")
+                return None
+
+            # Enforce exact number of groups if available; otherwise, signal to wait
+            if len(valid_indices) < num_prompt_groups:
+                print(
+                    f"Insufficient valid groups: have {len(valid_indices)}, need {num_prompt_groups}. Waiting for buffer to fill."
+                )
+                return None
+
+            # Only select trajectories intended for the current training step
+            # This ensures no trajectory loses its "last chance" to be used for its intended step
+            intended_indices = [
+                i
+                for i in valid_indices
+                if self.target_weight_versions[i] == current_weight_version
+            ]
+
+            print(
+                f"   🎯 Found {len(intended_indices)} trajectories intended for current step {current_weight_version}"
+            )
+
+            # Stall training if we don't have enough trajectories intended for this step
+            if len(intended_indices) < num_prompt_groups:
+                print(
+                    f"   ⏸️ STALLING: Need {num_prompt_groups} trajectories for step {current_weight_version}, but only {len(intended_indices)} are ready"
+                )
+                print(
+                    f"   ⏸️ Training will wait for remaining {num_prompt_groups - len(intended_indices)} trajectories to be generated"
+                )
+                return None
+
+            # Select exactly the trajectories intended for this step (FIFO within same target)
+            selected: list[int] = intended_indices[:num_prompt_groups]
+            print(
+                f"   ✅ Selected {len(selected)} trajectories all intended for step {current_weight_version}"
+            )
+
+            from collections import Counter
+
+            sampled_weights = [self.trajectory_versions[i] for i in selected]
+            avg_trajectory_age = current_weight_version - sum(sampled_weights) / len(
+                sampled_weights
+            )
+            print(
+                f"✅ Selected counts by generation weight-version: {Counter(sampled_weights)}"
+            )
+            print(f"📊 Average trajectory age: {avg_trajectory_age:.2f} steps")
+            print(
+                f"🎯 All selected trajectories target step {current_weight_version} (100% target match)"
+            )
+
+            sampled_items = [self.trajectories[i] for i in selected]
+
+            # Remove selected items in reverse order to maintain correct indices
+            for idx in sorted(selected, reverse=True):
+                self.trajectory_versions.pop(idx)
+                self.target_weight_versions.pop(idx)
+                self.trajectories.pop(idx)
+            print(
+                f"🗑️ Consumed and removed {len(selected)} groups from buffer, old buffer size: {total_trajectories}, new buffer size: {len(self.trajectories)}, new target weight versions {self.target_weight_versions}"
+            )
+
+            return {
+                "trajectories": sampled_items,
+                "avg_trajectory_age": avg_trajectory_age,
+            }
+
+    def size(self) -> int:
+        """Return current buffer size."""
+        with self._lock:
+            return len(self.trajectories)
+
+    def clear(self) -> None:
+        """Clear the buffer."""
+        with self._lock:
+            self.trajectories.clear()
+            self.trajectory_versions.clear()
+            self.target_weight_versions.clear()
+
+
+@ray.remote  # pragma: no cover
+class AsyncTrajectoryCollector:
+    """Collects trajectories asynchronously and adds them to replay buffer."""
+
+    def __init__(
+        self,
+        policy_generation: GenerationInterface,
+        tokenizer: TokenizerType,
+        task_to_env: dict[str, EnvironmentInterface],
+        master_config: MasterConfig,
+        replay_buffer: Any,
+        start_step: int = 0,
+    ):
+        self.policy_generation = policy_generation
+        self.tokenizer = tokenizer
+        self.task_to_env = task_to_env
+        self.master_config = master_config
+        self.replay_buffer = replay_buffer
+        self.running = False
+
+        self._pg_lock: _threading.Lock = _threading.Lock()
+
+        # Event for manual pause/resume control
+        self._manual_pause_cleared = _threading.Event()
+        self._manual_pause_cleared.set()
+
+        self._refit_pause_cleared = _threading.Event()
+        self._refit_pause_cleared.set()  # Start in cleared state
+
+        self.current_weight_version: int = start_step
+        self.initial_weight_version: int = start_step
+
+        # Track when generation limits cause collection to pause
+        self._last_limit_warning_version = None
+
+        # Event to signal when generation limits are cleared (more efficient than polling)
+        self._generation_limit_cleared = _threading.Event()
+        self._generation_limit_cleared.set()  # Start in cleared state
+
+        # Track threads
+        self._inflight_threads: set[_threading.Thread] = set()
+        self._threads_lock: _threading.Lock = _threading.Lock()
+
+        # Limit in-flight generator requests to num_prompts_per_step * max_trajectory_age_steps
+        # This value limits the parallelism of the generation requests.
+        max_inflight = (
+            int(self.master_config["grpo"]["num_prompts_per_step"])
+            * int(self.master_config["grpo"]["async_grpo"]["max_trajectory_age_steps"])
+        ) or 1
+        self._inflight_sema = _threading.Semaphore(max_inflight)
+
+        # Simple lock to prevent race conditions when checking/spawning workers
+        self._generation_check_lock: _threading.Lock = _threading.Lock()
+        # Track which target weights are currently being generated (globally)
+        self._generating_targets: set[int] = set()
+
+    def _calculate_target_weights(self, generation_weight_version: int) -> list[int]:
+        """Calculate target weight versions for given generation weight version.
+
+        The list of versions returned enumerate the possible version a generation
+        server can target. These versions are looped over to see what training
+        step they can target. If all target versions are exhausted, this generation
+        server will remain idle until the next weight update.
+
+        Example:
+        generation_weight_version = 10
+        max_trajectory_age_steps = 4
+
+        Returns:
+            [11, 12, 13, 14]  # Meaning this generation server can create trajectories for training step 11, 12, 13, 14
+        """
+        # Read async config strictly from grpo.async_grpo
+        async_cfg = self.master_config.get("grpo", {}).get("async_grpo", {})
+        max_trajectory_age = async_cfg["max_trajectory_age_steps"]
+        if generation_weight_version == self.initial_weight_version:
+            return [
+                i
+                for i in range(
+                    self.initial_weight_version,
+                    self.initial_weight_version + max_trajectory_age + 1,
+                )
+            ]
+
+        return [generation_weight_version + i for i in range(1, max_trajectory_age + 1)]
+
+    def _get_next_target_for_generation(
+        self, generation_weight_version: int
+    ) -> Optional[int]:
+        """Get the next target weight that needs generation (if any)."""
+        target_weights = self._calculate_target_weights(generation_weight_version)
+        last_target_weight_already_generated = ray.get(
+            self.replay_buffer.get_last_target_weight_already_generated.remote()
+        )
+
+        with self._generation_check_lock:
+            for target_weight in target_weights:
+                if (
+                    target_weight > last_target_weight_already_generated
+                    and target_weight not in self._generating_targets
+                ):
+                    self._generating_targets.add(target_weight)
+                    print(f"🎯 Reserved target weight {target_weight} for generation")
+                    return target_weight
+
+        return None
+
+    def set_weight_version(self, version: int) -> None:
+        self.current_weight_version = version
+
+        # Resume collection if it was paused due to generation limits
+        was_paused = not self._generation_limit_cleared.is_set()
+        if was_paused:
+            self._generation_limit_cleared.set()  # Signal that collection can resume
+            print(f"🔄 Updated weight version to {version}, resuming collection")
+        else:
+            print(f"🔄 Updated weight version to {version}")
+
+    def _should_pause_for_generation_limits(self) -> bool:
+        """Check if collection should be paused due to generation limits."""
+        try:
+            target_weights = self._calculate_target_weights(self.current_weight_version)
+            last_target_weight_already_generated = ray.get(
+                self.replay_buffer.get_last_target_weight_already_generated.remote()
+            )
+
+            # Check if any target weight in our range needs generation
+            with self._generation_check_lock:
+                for target_weight in target_weights:
+                    if (
+                        target_weight > last_target_weight_already_generated
+                        and target_weight not in self._generating_targets
+                    ):
+                        return False  # Found a target that needs generation
+
+            print(
+                f"⏸️ All target weights {target_weights} already generated or in progress, pausing"
+            )
+            return True
+        except Exception:
+            return False
+
+    def start_collection(self, dataloader: StatefulDataLoader) -> None:
+        """Start collecting trajectories from dataloader."""
+        self.running = True
+        self.dataloader = dataloader
+
+        print("Started continuous trajectory collection")
+
+        self.collection_thread = _threading.Thread(target=self._collection_loop)
+        self.collection_thread.daemon = True
+        self.collection_thread.start()
+
+        print("Collection thread started, start_collection returning")
+
+    def _collection_loop(self):
+        """Run the collection loop in background thread."""
+        try:
+            for batch in self.dataloader:
+                if not self.running:
+                    break
+
+                # Check if manually paused and wait
+                if not self._manual_pause_cleared.is_set() and self.running:
+                    self._manual_pause_cleared.wait()
+
+                # Check if refit is in progress and wait
+                if not self._refit_pause_cleared.is_set() and self.running:
+                    print("⏸️ Pausing collection for refit...")
+                    self._refit_pause_cleared.wait()
+                    print("▶️ Refit completed, resuming collection")
+
+                # Check if generation limits require pausing collection
+                if self._should_pause_for_generation_limits() and self.running:
+                    # Only log warning once per weight version
+                    if self._last_limit_warning_version != self.current_weight_version:
+                        async_cfg = self.master_config.get("grpo", {}).get(
+                            "async_grpo", {}
+                        )
+                        max_trajectory_age = async_cfg["max_trajectory_age_steps"]
+                        target_weights = [
+                            self.current_weight_version + i
+                            for i in range(max_trajectory_age)
+                        ]
+
+                        print(
+                            f"⏸️ Pausing collection: all target weights {target_weights} for weight version {self.current_weight_version} "
+                            f"already exist in buffer. Waiting for weight update..."
+                        )
+                        self._last_limit_warning_version = self.current_weight_version
+
+                        self._generation_limit_cleared.clear()  # Clear the event to pause
+
+                    # Efficiently wait for generation limits to be cleared (no polling!)
+                    self._generation_limit_cleared.wait()
+
+                    # Double-check we're still running after being woken up
+                    if not self.running:
+                        break
+
+                if not self.running:
+                    break
+
+                self._process_batch(batch)
+
+        except Exception as e:
+            print(f"❌ Error in trajectory collection: {e}")
+            import traceback
+
+            traceback.print_exc()
+        finally:
+            self.running = False
+            print("🛑 Trajectory collection stopped")
+
+    def _process_batch(self, batch: BatchedDataDict[DatumSpec]) -> None:
+        """Process a single batch and generate for one target weight."""
+        try:
+            generation_weight_version = self.current_weight_version
+            num_generations = self.master_config["grpo"]["num_generations_per_prompt"]
+            num_prompts = batch.size
+
+            # Get the next target weight that needs generation
+            target_weight = self._get_next_target_for_generation(
+                generation_weight_version
+            )
+
+            if target_weight is None:
+                print(
+                    f"🔄 No targets need generation for weight {generation_weight_version}"
+                )
+                return
+
+            print(
+                f"🎯 Generating for target weight {target_weight} from generation_weight_version {generation_weight_version}"
+            )
+
+            # Generate for all prompts in this batch for the target weight
+            for prompt_idx in range(num_prompts):
+                # Wait for refit to complete if in progress
+                if not self._refit_pause_cleared.is_set() and self.running:
+                    with self._threads_lock:
+                        active_threads = len(self._inflight_threads)
+                    print(
+                        f"⏸️ Waiting for refit to complete before starting new generation ({active_threads} threads still active)"
+                    )
+                    print(
+                        "   Note: With vLLM V1 async engine, active threads can complete during weight update"
+                    )
+                    self._refit_pause_cleared.wait()
+
+                    # After refit finishes if weight version has updated, reflect that in the new trajectories
+                    generation_weight_version = self.current_weight_version
+
+                single_prompt_batch = batch.slice(prompt_idx, prompt_idx + 1)
+                repeated_batch = single_prompt_batch.repeat_interleave(num_generations)
+
+                self._inflight_sema.acquire()
+                worker = _threading.Thread(
+                    target=self._run_prompt_group_worker,
+                    args=(
+                        repeated_batch,
+                        generation_weight_version,
+                        target_weight,
+                        prompt_idx,
+                    ),
+                    daemon=True,
+                )
+                with self._threads_lock:
+                    self._inflight_threads.add(worker)
+                worker.start()
+
+            self._cleanup_finished_threads()
+
+        except Exception as e:
+            print(f"❌ Error processing batch: {e}")
+            import traceback
+
+            traceback.print_exc()
+
+    def get_weight_version(self) -> int:
+        return self.current_weight_version
+
+    def pause(self) -> None:
+        """Pause trajectory collection."""
+        self._manual_pause_cleared.clear()  # Signal collection to pause
+        print("Trajectory collection paused")
+
+    def resume(self) -> None:
+        """Resume trajectory collection."""
+        self._manual_pause_cleared.set()  # Signal collection to resume
+        print("Trajectory collection resumed")
+
+    def prepare_for_refit(self) -> None:
+        """Pause new generation starts and optionally wait for pending generations.
+
+        For vLLM V1 async engine, leverages in-flight weight updates via collective_rpc,
+        allowing ongoing generations to continue with their current KV caches while
+        weights are updated. This significantly improves async performance.
+
+        For non-async engines, waits for all pending generations to complete before refit.
+        """
+        start_time = time.time()
+        print("🔄 Preparing for refit: pausing new generations...")
+
+        # Pause new generation starts
+        self._refit_pause_cleared.clear()
+        print("⏸️ New generation starts paused")
+
+        # Check if we're using vLLM async engine
+        vllm_cfg = (
+            self.master_config.get("policy", {})
+            .get("generation", {})
+            .get("vllm_cfg", {})
+        )
+        is_async_engine = vllm_cfg.get("async_engine", False)
+        in_flight_weight_updates = (
+            self.master_config.get("grpo", {})
+            .get("async_grpo", {})
+            .get("in_flight_weight_updates", False)
+        )
+
+        if is_async_engine and in_flight_weight_updates:
+            # vLLM V1 async engine supports in-flight weight updates
+            # Ongoing generations will continue with their current KV caches
+            # New generations (after weight update) will use the updated weights
+            print(
+                "🚀 Using vLLM V1 in-flight weight update - skipping wait for pending generations"
+            )
+            print(
+                f"   {len(self._inflight_threads)} ongoing generations will complete with current weights"
+            )
+        else:
+            # For non-async engines, wait for all pending generations to complete
+            print(
+                "⏸️ Non-async engine: waiting for all pending generations to complete..."
+            )
+            self.wait_for_pending_generations()
+
+        elapsed = time.time() - start_time
+        print(f"✅ Ready for refit (took {elapsed:.2f}s)")
+
+    def resume_after_refit(self) -> None:
+        """Resume new generation starts after refit is complete."""
+        print("🔄 Resuming generation starts after refit")
+
+        # Invalidate&recompute vLLM caches after the in-flight weight updates if
+        # recompute_kv_cache_after_weight_updates is True (AREAL-style implementation).
+        # Otherwise, keep using the stale KV caches (Magistral-style implementation).
+        async_cfg = self.master_config.get("grpo", {}).get("async_grpo", {})
+        if async_cfg.get("in_flight_weight_updates", False) and async_cfg.get(
+            "recompute_kv_cache_after_weight_updates", False
+        ):
+            try:
+                print("🔄 Invalidating vLLM prefix/KV caches after weight update")
+                invalidated = self.policy_generation.invalidate_kv_cache()
+                if invalidated:
+                    print("✅ Invalidated vLLM prefix/KV caches after weight update")
+                else:
+                    print(
+                        "⚠️ vLLM cache invalidation reported partial/unsuccessful on some workers"
+                    )
+            except Exception as e:
+                print(f"⚠️ Failed to invalidate vLLM caches: {e}")
+
+        self._refit_pause_cleared.set()
+
+    def wait_for_pending_generations(self) -> None:
+        """Wait for all in-flight generation threads to complete."""
+        start_time = time.time()
+
+        while True:
+            with self._threads_lock:
+                finished = {t for t in self._inflight_threads if not t.is_alive()}
+                for t in finished:
+                    self._inflight_threads.remove(t)
+
+                pending_count = len(self._inflight_threads)
+
+            if pending_count == 0:
+                print("✅ All generation threads completed")
+                break
+
+            elapsed = time.time() - start_time
+            print(
+                f"⏳ Waiting for {pending_count} pending generation threads... ({elapsed:.1f}s elapsed)"
+            )
+            time.sleep(0.5)
+
+    def get_dataloader_state(self) -> dict:
+        """Get the current dataloader state for checkpointing."""
+        if hasattr(self, "dataloader") and hasattr(self.dataloader, "state_dict"):
+            return self.dataloader.state_dict()
+        return {}
+
+    def _cleanup_finished_threads(self) -> None:
+        with self._threads_lock:
+            finished = {t for t in self._inflight_threads if not t.is_alive()}
+            for t in finished:
+                self._inflight_threads.remove(t)
+
+    def _run_prompt_group_worker(
+        self,
+        repeated_batch: BatchedDataDict[DatumSpec],
+        generation_weight_version: int,
+        target_weight_version: int,
+        prompt_idx: int,
+    ) -> None:
+        try:
+            # Run rollout for this prompt group
+            # Async engine supports concurrent generation; avoid locking
+            final_batch, rollout_metrics = run_async_multi_turn_rollout(
+                policy_generation=self.policy_generation,
+                input_batch=repeated_batch,
+                tokenizer=self.tokenizer,
+                task_to_env=self.task_to_env,
+                max_seq_len=self.master_config["policy"]["max_total_sequence_length"],
+                max_rollout_turns=self.master_config["grpo"]["max_rollout_turns"],
+                greedy=False,
+            )
+
+            # Move to CPU and push to buffer (avoid blocking on GC/push)
+            final_batch_cpu = final_batch.to("cpu")
+            del final_batch
+
+            trajectory_group = {
+                "batch": final_batch_cpu,
+                "rollout_metrics": rollout_metrics,
+                "timestamp": time.time(),
+            }
+
+            # Use exponential backoff when buffer is full
+            try:
+                backoff_delay = 0.01
+                while self.running:
+                    status = ray.get(
+                        self.replay_buffer.push_with_wait_signal.remote(
+                            trajectory_group,
+                            generation_weight_version,
+                            target_weight_version,
+                        )
+                    )
+                    if status == "success":
+                        print(
+                            f"📦 Buffered per-prompt group (prompt_idx {prompt_idx}, target_weight {target_weight_version})"
+                        )
+
+                        # Release reservation when FIRST prompt group for this target is successfully buffered
+                        if prompt_idx == 0:
+                            with self._generation_check_lock:
+                                if target_weight_version in self._generating_targets:
+                                    self._generating_targets.discard(
+                                        target_weight_version
+                                    )
+                                    print(
+                                        f"🧹 Released reservation for target weight {target_weight_version} (first prompt buffered)"
+                                    )
+                        break
+                    elif status == "full":
+                        # Exponential backoff up to 0.5 second
+                        time.sleep(min(backoff_delay, 0.5))
+                        backoff_delay *= 1.5
+                    else:
+                        # Unexpected status, wait briefly
+                        time.sleep(0.01)
+            except Exception as e:
+                print(f"❌ Failed to enqueue per-prompt group to buffer: {e}")
+                import traceback
+
+                traceback.print_exc()
+        except Exception as e:
+            print(f"❌ Error in prompt group worker: {e}")
+            import traceback
+
+            traceback.print_exc()
+        finally:
+            # Clean up reservation in case of error (if not already cleaned up)
+            with self._generation_check_lock:
+                if target_weight_version in self._generating_targets:
+                    self._generating_targets.discard(target_weight_version)
+                    print(
+                        f"🧹 Emergency cleanup: Released reservation for target weight {target_weight_version}"
+                    )
+
+            # Detach thread record when finished
+            with self._threads_lock:
+                current = _threading.current_thread()
+                if current in self._inflight_threads:
+                    self._inflight_threads.remove(current)
+            try:
+                self._inflight_sema.release()
+            except Exception:
+                import traceback
+
+                traceback.print_exc()
diff --git a/nemo_rl/algorithms/distillation.py b/nemo_rl/algorithms/distillation.py
new file mode 100644
index 0000000000..b00af9e892
--- /dev/null
+++ b/nemo_rl/algorithms/distillation.py
@@ -0,0 +1,1059 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and limitations.
+# limitations under the License.
+import os
+import warnings
+from pathlib import Path
+from typing import Any, NotRequired, Optional, TypedDict, TypeVar, cast
+
+import numpy as np
+import ray
+import torch
+from torchdata.stateful_dataloader import StatefulDataLoader
+from transformers import AutoConfig, AutoTokenizer
+from transformers.tokenization_utils_base import PreTrainedTokenizerBase
+
+from nemo_rl.algorithms.grpo import _should_use_async_rollouts, refit_policy_generation
+from nemo_rl.algorithms.loss_functions import (
+    DistillationLossConfig,
+    DistillationLossDataDict,
+    DistillationLossFn,
+)
+from nemo_rl.algorithms.utils import set_seed
+from nemo_rl.data import DataConfig
+from nemo_rl.data.collate_fn import rl_collate_fn
+from nemo_rl.data.datasets import AllTaskProcessedDataset
+from nemo_rl.data.interfaces import DatumSpec
+from nemo_rl.data.llm_message_utils import (
+    batched_message_log_to_flat_message,
+    get_keys_from_message_log,
+)
+from nemo_rl.distributed.batched_data_dict import BatchedDataDict
+from nemo_rl.distributed.virtual_cluster import (
+    ClusterConfig,
+    RayVirtualCluster,
+)
+from nemo_rl.environments.interfaces import EnvironmentInterface
+from nemo_rl.experience.rollouts import (
+    run_async_multi_turn_rollout,
+    run_multi_turn_rollout,
+)
+from nemo_rl.models.generation.interfaces import (
+    GenerationInterface,
+)
+from nemo_rl.models.generation.vllm import VllmConfig, VllmGeneration
+from nemo_rl.models.policy import PolicyConfig
+from nemo_rl.models.policy.interfaces import ColocatablePolicyInterface
+from nemo_rl.models.policy.lm_policy import Policy
+from nemo_rl.utils.checkpoint import CheckpointingConfig, CheckpointManager
+from nemo_rl.utils.logger import (
+    Logger,
+    LoggerConfig,
+    print_message_log_samples,
+)
+from nemo_rl.utils.nsys import maybe_gpu_profile_step
+from nemo_rl.utils.timer import TimeoutChecker, Timer
+
+# ===============================================================================
+# Configuration
+# ===============================================================================
+TokenizerType = TypeVar("TokenizerType", bound=PreTrainedTokenizerBase)
+
+
+class DistillationConfig(TypedDict):
+    # Training configuration
+    num_prompts_per_step: int
+    num_generations_per_prompt: int
+    max_rollout_turns: int  # for multi-turn rollouts. Math Environments just have 1 turn (answering the question)
+    max_num_steps: int  # maximum number of steps to train for
+    max_num_epochs: int  # maximum number of epochs to train for
+    val_batch_size: int
+    val_period: int
+    val_at_start: bool
+    max_val_samples: int
+    topk_logits_k: int
+    seed: int
+
+
+class DistillationSaveState(TypedDict):
+    total_steps: int  # Track total number of steps across all epochs
+    current_epoch: int  # Track current epoch
+    current_step: int  # Track step within current epoch
+    val_reward: NotRequired[
+        float
+    ]  # Can be any metric. Setted to 'accuracy' by default in validation.
+    consumed_samples: int
+    total_valid_tokens: int  # Track total number of non-padding tokens during training
+
+
+def _default_distillation_save_state() -> DistillationSaveState:
+    return {
+        "current_epoch": 0,
+        "current_step": 0,
+        "total_steps": 0,
+        "val_reward": -99999999.0,  # Aligned with GRPO
+        "consumed_samples": 0,
+        "total_valid_tokens": 0,
+    }
+
+
+class MasterConfig(TypedDict):
+    """Main configuration structure."""
+
+    policy: PolicyConfig  # Student model configuration
+    teacher: PolicyConfig  # Teacher model configuration
+    loss_fn: DistillationLossConfig  # Loss function configuration
+    env: dict[str, Any]  # Environment configuration
+    data: DataConfig  # Data configuration
+    distillation: DistillationConfig  # Distillation configuration
+    logger: LoggerConfig  # Logger configuration
+    cluster: ClusterConfig  # Cluster configuration
+    checkpointing: CheckpointingConfig  # Checkpointing configuration
+
+
+# ===============================================================================
+# Setup & Initialization
+# ===============================================================================
+def check_vocab_equality(
+    tokenizer: TokenizerType, student_model_name: str, teacher_model_name: str
+) -> None:
+    """Check if the vocab of the tokenizer (student) and the teacher tokenizer are equal."""
+    teacher_tokenizer = AutoTokenizer.from_pretrained(teacher_model_name)
+
+    skip_hint = "Set NRL_SKIP_DISTILLATION_TOKENIZER_CHECK=true to skip this check."
+
+    # 1) Exact token->id mapping equality
+    vocab_a = tokenizer.get_vocab()
+    vocab_b = teacher_tokenizer.get_vocab()
+    assert vocab_a == vocab_b, (
+        f"Token->ID mapping differs between student and teacher. {skip_hint}"
+    )
+
+    # 2) Size consistency (sanity checks)
+    assert len(tokenizer) == len(teacher_tokenizer), (
+        f"Effective vocab sizes differ between student and teacher. {skip_hint}"
+    )
+
+    # 3) Chech model.config.vocab_size to guarantee the last dimension of the logits is the same
+    student_config = AutoConfig.from_pretrained(student_model_name)
+    teacher_config = AutoConfig.from_pretrained(teacher_model_name)
+    assert student_config.vocab_size == teacher_config.vocab_size, (
+        f"Model config vocab sizes differ between student and teacher. {skip_hint}"
+    )
+
+
+def setup(
+    master_config: MasterConfig,
+    tokenizer: TokenizerType,
+    train_dataset: AllTaskProcessedDataset,
+    val_dataset: Optional[AllTaskProcessedDataset],
+) -> tuple[
+    ColocatablePolicyInterface,  # student_policy
+    ColocatablePolicyInterface,  # teacher_policy
+    Optional[GenerationInterface],  # student_generation
+    StatefulDataLoader,
+    Optional[StatefulDataLoader],
+    DistillationLossFn,
+    Logger,
+    CheckpointManager,
+    DistillationSaveState,
+    MasterConfig,
+]:
+    """Main entry point for distillation algorithm.
+
+    Returns:
+        tuple of student_policy, teacher_policy, student_generation,
+        train_dataloader, val_dataloader,
+        loss_fn, logger, checkpointer, distillation_save_state, master_config
+    """
+    # Extract configuration
+    policy_config = master_config["policy"]
+    teacher_config = master_config["teacher"]
+    generation_config = master_config["policy"]["generation"]
+    loss_config = master_config["loss_fn"]
+    distillation_config = master_config["distillation"]
+    data_config = master_config["data"]
+    logger_config = master_config["logger"]
+    cluster_config = master_config["cluster"]
+
+    assert generation_config is not None, (
+        "A generation config in the PolicyConfig is required for distillation"
+    )
+
+    # Disallow SP + packing for dtensor path
+    for cfg, who in ((policy_config, "student"), (teacher_config, "teacher")):
+        # DTensor sequence parallel is supported; ensure CP and SP are not enabled together
+        # This incompatibility is enforced in DTensor workers during initialization.
+        # Additionally, SP may not be compatible with sequence packing for some models.
+        # Refer to https://github.com/NVIDIA-NeMo/RL/issues/1178 for more details.
+        # Therefore, we disable SP + packing for distillation.
+        dtensor_enabled = cfg["dtensor_cfg"]["enabled"]
+        sequence_packing_enabled = (
+            "sequence_packing" in cfg and cfg["sequence_packing"]["enabled"]
+        )
+        sequence_parallel_enabled = (
+            "sequence_parallel" in cfg["dtensor_cfg"]
+            and cfg["dtensor_cfg"]["sequence_parallel"]
+        )
+
+        if dtensor_enabled and sequence_packing_enabled and sequence_parallel_enabled:
+            raise AssertionError(
+                f"Distillation does not support DTensor sequence parallel + sequence packing ({who} policy). "
+                "Please refer to https://github.com/NVIDIA-NeMo/RL/issues/1178 for more details."
+            )
+
+    # Set random seed
+    set_seed(distillation_config["seed"])
+
+    # ==========================
+    #         Logger
+    # ==========================
+    logger = Logger(logger_config)
+    logger.log_hyperparams(master_config)
+
+    # ==========================
+    #      Checkpointing
+    # ==========================
+    checkpointer = CheckpointManager(master_config["checkpointing"])
+    last_checkpoint_path = checkpointer.get_latest_checkpoint_path()
+    distillation_save_state: Optional[DistillationSaveState] = cast(
+        Optional[DistillationSaveState],
+        checkpointer.load_training_info(last_checkpoint_path),
+    )
+    if distillation_save_state is None:
+        distillation_save_state = _default_distillation_save_state()
+
+    # ==========================
+    #           Data
+    # ==========================
+    dataloader = StatefulDataLoader(
+        train_dataset,
+        batch_size=distillation_config["num_prompts_per_step"],
+        shuffle=data_config["shuffle"],
+        collate_fn=rl_collate_fn,
+        drop_last=True,
+    )
+
+    if last_checkpoint_path:
+        dataloader_state_dict = torch.load(
+            os.path.join(last_checkpoint_path, "train_dataloader.pt")
+        )
+        dataloader.load_state_dict(dataloader_state_dict)
+
+    print(
+        f"  ✓ Training dataloader loaded with {len(train_dataset)} samples", flush=True
+    )
+
+    # Load validation dataset if provided
+    val_dataloader: Optional[StatefulDataLoader] = None
+    # If validation is enabled, load the validation dataloader
+    if distillation_config["val_period"] > 0 or distillation_config["val_at_start"]:
+        assert val_dataset is not None, (
+            "Validation dataset is required if validation is enabled"
+        )
+        val_dataloader = StatefulDataLoader(
+            val_dataset,
+            batch_size=distillation_config["val_batch_size"],
+            shuffle=False,
+            collate_fn=rl_collate_fn,
+        )
+        print(
+            f"  ✓ Validation dataloader loaded with {len(val_dataset)} samples",
+            flush=True,
+        )
+
+    # ==========================
+    #          Cluster
+    # ==========================
+    print("\n▶ Setting up compute cluster...", flush=True)
+    colocated_inference = generation_config["colocated"]["enabled"]
+
+    if colocated_inference:
+        cluster = RayVirtualCluster(
+            name="distillation_cluster",
+            bundle_ct_per_node_list=[cluster_config["gpus_per_node"]]
+            * cluster_config["num_nodes"],
+            use_gpus=True,
+            num_gpus_per_node=cluster_config["gpus_per_node"],
+            max_colocated_worker_groups=1
+            if generation_config["backend"] == "megatron"
+            else 3,
+        )
+        train_cluster = cluster
+        inference_cluster = cluster
+        print(
+            f"  ✓ Ray cluster initialized with {cluster_config['num_nodes']} nodes",
+            flush=True,
+        )
+    else:
+        assert generation_config["backend"] != "megatron", (
+            "Non-colocated inference is not supported for Megatron generation backends. "
+            "Please use vLLM backend for generation."
+        )
+
+        # train resources will be updated through overall and inference resources below
+        train_gpus_per_node = cluster_config["gpus_per_node"]
+        train_nodes = cluster_config["num_nodes"]
+
+        inference_resources = generation_config["colocated"]["resources"]
+        inference_gpus_per_node = inference_resources["gpus_per_node"]
+        inference_nodes = inference_resources["num_nodes"]
+
+        # validate and configure resources
+        if cluster_config["num_nodes"] == 1:
+            assert (
+                inference_gpus_per_node is not None and inference_gpus_per_node > 0
+            ), (
+                "policy.generation.colocated.resources.gpus_per_node must be explicitly set to a value > 0 "
+                "when cluster.num_nodes = 1 and inference is non-colocated, "
+                f"but got {inference_gpus_per_node}."
+            )
+            assert inference_nodes is None or inference_nodes == 1, (
+                "policy.generation.colocated.resources.num_nodes must be 1 or set to null "
+                "when cluster.num_nodes = 1 and inference is non-colocated, "
+                f"but got {inference_nodes}."
+            )
+            inference_nodes = 1
+            train_gpus_per_node -= inference_gpus_per_node
+        else:
+            assert inference_nodes > 0, (
+                "policy.generation.colocated.resources.num_nodes must be > 0 "
+                "when cluster.num_nodes > 1 and inference is non-colocated, "
+                f"but got {inference_nodes}."
+            )
+            assert (
+                inference_gpus_per_node is not None
+                and inference_gpus_per_node == cluster_config["gpus_per_node"]
+            ), (
+                "policy.generation.colocated.resources.gpus_per_node must be explicitly set and equal to cluster.gpus_per_node "
+                "when cluster.num_nodes > 1 and inference is non-colocated, "
+                f"but got inference_gpus_per_node={inference_gpus_per_node}, cluster.gpus_per_node={cluster_config['gpus_per_node']}."
+            )
+            train_nodes -= inference_nodes
+
+        # create clusters
+        train_cluster = RayVirtualCluster(
+            name="distillation_train_cluster",
+            bundle_ct_per_node_list=[train_gpus_per_node] * train_nodes,
+            use_gpus=True,
+            num_gpus_per_node=train_gpus_per_node,
+            max_colocated_worker_groups=3,
+        )
+        inference_cluster = RayVirtualCluster(
+            name="distillation_inference_cluster",
+            bundle_ct_per_node_list=[inference_gpus_per_node] * inference_nodes,
+            use_gpus=True,
+            num_gpus_per_node=inference_gpus_per_node,
+            max_colocated_worker_groups=3,
+        )
+        print(
+            f"  ✓ Separate clusters created: train={train_nodes}x{train_gpus_per_node}GPUs, inference={inference_nodes}x{inference_gpus_per_node}GPUs",
+            flush=True,
+        )
+
+    # ==========================
+    #      Teacher Policy
+    # ==========================
+    print("\n▶ Setting up teacher policy...", flush=True)
+    # Checkpoint paths
+    weights_path = None
+    optimizer_path = None
+
+    if not bool(os.getenv("NRL_SKIP_DISTILLATION_TOKENIZER_CHECK", False)):
+        check_vocab_equality(
+            tokenizer, policy_config["model_name"], teacher_config["model_name"]
+        )
+
+    if "megatron_cfg" in teacher_config and teacher_config["megatron_cfg"]["enabled"]:
+        ## NOTE: this is equal to the total number of scheduler steps
+        total_train_iters = min(
+            distillation_config["max_num_steps"],
+            distillation_config["max_num_epochs"] * len(dataloader),
+        )
+        teacher_config["megatron_cfg"]["train_iters"] = total_train_iters
+
+    teacher_policy = Policy(
+        name_prefix="teacher",
+        cluster=train_cluster,
+        config=teacher_config,
+        tokenizer=tokenizer,
+        weights_path=weights_path,
+        optimizer_path=optimizer_path,
+        init_optimizer=False,
+        init_reference_model=False,
+    )
+    teacher_policy.offload_after_refit()
+
+    # ==========================
+    #    Student Generation Interface
+    # ==========================
+    backend = generation_config["backend"]
+    generation_config["model_name"] = policy_config["model_name"]  # Needed for vLLM
+
+    if backend == "megatron":
+        student_generation = None
+    elif backend == "vllm":
+        generation_config = cast(VllmConfig, generation_config)
+        if "vllm_cfg" in generation_config:
+            ## make vllm hf overrides match the training policy
+            generation_config["vllm_cfg"]["hf_overrides"] = policy_config.get(
+                "hf_config_overrides", {}
+            )
+        student_generation = VllmGeneration(
+            cluster=inference_cluster, config=generation_config
+        )
+        student_generation.finish_generation()
+        print(
+            f"  ✓ Using vLLM backend for generation with {policy_config['model_name']}",
+            flush=True,
+        )
+
+    # ==========================
+    #      Student Policy
+    # ==========================
+    print("\n▶ Setting up student policy...", flush=True)
+
+    # Checkpoint paths
+    if last_checkpoint_path:
+        weights_path = Path(last_checkpoint_path) / "policy" / "weights"
+        optimizer_path = Path(last_checkpoint_path) / "policy" / "optimizer"
+    else:
+        weights_path = None
+        optimizer_path = None
+
+    if "megatron_cfg" in policy_config and policy_config["megatron_cfg"]["enabled"]:
+        ## NOTE: this is equal to the total number of scheduler steps
+        total_train_iters = min(
+            distillation_config["max_num_steps"],
+            distillation_config["max_num_epochs"] * len(dataloader),
+        )
+        policy_config["megatron_cfg"]["train_iters"] = total_train_iters
+
+    student_policy = Policy(
+        name_prefix="student",
+        cluster=train_cluster,
+        config=policy_config,
+        tokenizer=tokenizer,
+        weights_path=weights_path,
+        optimizer_path=optimizer_path,
+        init_optimizer=True,
+        init_reference_model=False,
+    )
+
+    if student_generation is not None:
+        state_dict_info = student_policy.prepare_refit_info()
+        student_generation.prepare_refit_info(state_dict_info)
+
+    # if it is not colocated inference, initialize collective communication for update weights
+    if not colocated_inference:
+        ip, port = train_cluster.get_master_address_and_port()
+        print(f"Using ip: {ip}, port: {port} for collective communication", flush=True)
+        train_world_size = train_cluster.world_size()
+        # inference cluster + head node of the train cluster
+        world_size = train_world_size + inference_nodes * inference_gpus_per_node
+        # init collective
+        futures_train = student_policy.init_collective(
+            ip, port, world_size, train_world_size=train_world_size
+        )
+        futures_inference = student_generation.init_collective(
+            ip, port, world_size, train_world_size=train_world_size
+        )  # type: ignore
+        # wait for all futures to complete
+        ray.get(futures_train + futures_inference)
+
+    loss_fn = DistillationLossFn(loss_config)
+
+    print("\n" + "=" * 60)
+    print(" " * 18 + "SETUP COMPLETE")
+    print("=" * 60 + "\n", flush=True)
+
+    return (
+        student_policy,
+        teacher_policy,
+        student_generation,
+        dataloader,
+        val_dataloader,
+        loss_fn,
+        logger,
+        checkpointer,
+        distillation_save_state,
+        master_config,
+    )
+
+
+# ===============================================================================
+# Training & Validation
+# ===============================================================================
+
+
+def distillation_train(
+    student_policy: ColocatablePolicyInterface,
+    teacher_policy: ColocatablePolicyInterface,
+    student_generation: Optional[GenerationInterface],
+    dataloader: StatefulDataLoader,
+    val_dataloader: Optional[StatefulDataLoader],
+    tokenizer: TokenizerType,
+    loss_fn: DistillationLossFn,
+    task_to_env: dict[str, EnvironmentInterface],
+    val_task_to_env: Optional[dict[str, EnvironmentInterface]],
+    logger: Logger,
+    checkpointer: CheckpointManager,
+    distillation_save_state: DistillationSaveState,
+    master_config: MasterConfig,
+) -> None:
+    """Run Distillation training algorithm."""
+    timer = Timer()
+    timeout = TimeoutChecker(
+        timeout=master_config["checkpointing"]["checkpoint_must_save_by"],
+        fit_last_save_time=True,
+    )
+    timeout.start_iterations()
+
+    NEED_REFIT = True
+    # If student_generation is None, use the student_policy as the generation interface (megatron framework backend)
+    if student_generation is None:
+        student_generation = student_policy  # type: ignore
+        NEED_REFIT = False
+    POLICY_GENERATION_STALE = True  # tracks if generation needs a refit before running
+    assert student_generation is not None  # for mypy type check
+
+    # common config/state items
+    current_epoch = distillation_save_state["current_epoch"]  # current epoch
+    current_step = distillation_save_state[
+        "current_step"
+    ]  # current step within current epoch
+    total_steps = distillation_save_state[
+        "total_steps"
+    ]  # total number of steps across all epochs
+    consumed_samples = distillation_save_state["consumed_samples"]
+    total_valid_tokens = distillation_save_state["total_valid_tokens"]
+    val_period = master_config["distillation"]["val_period"]
+    val_at_start = master_config["distillation"]["val_at_start"]
+    colocated_inference = master_config["policy"]["generation"]["colocated"]["enabled"]
+    max_epochs = master_config["distillation"][
+        "max_num_epochs"
+    ]  # max number of epochs to train for
+    max_steps = master_config["distillation"][
+        "max_num_steps"
+    ]  # max number of steps to train for
+
+    # Run validation at the start if configured
+    if val_at_start and total_steps == 0:
+        print("\n🔍 Running initial validation...", flush=True)
+        if NEED_REFIT and POLICY_GENERATION_STALE:
+            refit_policy_generation(
+                student_policy, student_generation, colocated_inference
+            )
+            POLICY_GENERATION_STALE = False
+        else:
+            student_generation.prepare_for_generation()
+        val_metrics, validation_timings = validate(
+            student_generation,
+            val_dataloader,
+            tokenizer,
+            val_task_to_env,
+            step=total_steps,
+            master_config=master_config,
+        )
+        student_generation.finish_generation()
+        logger.log_metrics(val_metrics, total_steps, prefix="validation")
+        logger.log_metrics(validation_timings, total_steps, prefix="timing/validation")
+
+    # Run distillation training (multi-epoch until reaching max_num_steps or max_num_epochs)
+    batch: BatchedDataDict[DatumSpec]
+
+    while total_steps < max_steps and current_epoch < max_epochs:
+        print(
+            f"\n{'=' * 25} Epoch {current_epoch + 1}/{max_epochs} {'=' * 25}",
+            flush=True,
+        )
+
+        for batch in dataloader:
+            print(
+                f"\n{'=' * 25} Step {current_step + 1}/{min(len(dataloader), max_steps)} {'=' * 25}",
+                flush=True,
+            )
+            maybe_gpu_profile_step(student_policy, total_steps + 1)
+            if student_policy != student_generation:
+                maybe_gpu_profile_step(student_generation, total_steps + 1)
+            val_metrics, validation_timings = None, None
+
+            with timer.time("total_step_time"):
+                # Prepare batch
+                print("▶ Preparing batch...", flush=True)
+                with timer.time("data_processing"):
+                    # Repeat batch items
+                    repeated_batch: BatchedDataDict[DatumSpec] = (
+                        batch.repeat_interleave(
+                            master_config["distillation"]["num_generations_per_prompt"]
+                        )
+                    )
+
+                # Generate responses - this updates the LLMMessageLogType in repeated_batch
+                print(
+                    f"▶ Generating responses for batch of size {repeated_batch.size}...",
+                    flush=True,
+                )
+                with timer.time("prepare_for_generation"):
+                    if NEED_REFIT and POLICY_GENERATION_STALE:
+                        refit_policy_generation(
+                            student_policy,
+                            student_generation,
+                            colocated_inference,
+                            timer=timer,
+                        )
+                        POLICY_GENERATION_STALE = False
+                    else:
+                        student_generation.prepare_for_generation()
+
+                with timer.time("generation"):
+                    # Use async rollouts if vLLM async engine is enabled
+                    if _should_use_async_rollouts(master_config):
+                        (
+                            repeated_batch,
+                            rollout_metrics,
+                        ) = run_async_multi_turn_rollout(
+                            policy_generation=student_generation,
+                            input_batch=repeated_batch,
+                            tokenizer=tokenizer,
+                            task_to_env=task_to_env,
+                            max_seq_len=master_config["policy"][
+                                "max_total_sequence_length"
+                            ],
+                            max_rollout_turns=master_config["distillation"][
+                                "max_rollout_turns"
+                            ],
+                            greedy=False,
+                        )
+                    else:
+                        repeated_batch, rollout_metrics = run_multi_turn_rollout(
+                            policy_generation=student_generation,
+                            input_batch=repeated_batch,
+                            tokenizer=tokenizer,
+                            task_to_env=task_to_env,
+                            max_seq_len=master_config["policy"][
+                                "max_total_sequence_length"
+                            ],
+                            max_rollout_turns=master_config["distillation"][
+                                "max_rollout_turns"
+                            ],
+                            greedy=False,
+                        )
+                    student_generation.finish_generation()
+
+                with timer.time("data_processing"):
+                    # Add loss mask and advantages to each message in LLMMessageLogType
+                    for message_log in repeated_batch["message_log"]:
+                        for message in message_log:
+                            if message["role"] == "assistant":
+                                message["token_loss_mask"] = torch.ones_like(
+                                    message["token_ids"]
+                                )
+                            else:
+                                message["token_loss_mask"] = torch.zeros_like(
+                                    message["token_ids"]
+                                )
+
+                    # Convert updated LLMMessageLogType to FlatMessagesType for training
+                    flat_messages, input_lengths = batched_message_log_to_flat_message(
+                        repeated_batch["message_log"],
+                        pad_value_dict={"token_ids": tokenizer.pad_token_id},
+                        make_sequence_length_divisible_by=master_config["policy"][
+                            "make_sequence_length_divisible_by"
+                        ],
+                    )
+
+                    # Create training data from flattened messages
+                    train_data = BatchedDataDict[DistillationLossDataDict](
+                        {
+                            "input_ids": flat_messages["token_ids"],
+                            "input_lengths": input_lengths,
+                            "token_mask": flat_messages["token_loss_mask"],
+                            "sample_mask": repeated_batch["loss_multiplier"],
+                        }
+                    )
+                    # this will be mini-batched inside the policy, so maintain the packed multimodal structure
+                    train_data.update(
+                        flat_messages.get_multimodal_dict(as_tensors=False)
+                    )
+                    train_data.to("cpu")
+
+                print("▶ Preparing for teacher logprob inference...", flush=True)
+                with timer.time("teacher_logprob_inference_prep"):
+                    teacher_policy.prepare_for_lp_inference()
+
+                print("▶ Computing teacher logprobs...", flush=True)
+                with timer.time("teacher_logprob_inference"):
+                    teacher_topk = teacher_policy.get_topk_logits(
+                        train_data, k=master_config["distillation"]["topk_logits_k"]
+                    )
+                    train_data["teacher_topk_logits"] = teacher_topk["topk_logits"]
+                    train_data["teacher_topk_indices"] = teacher_topk["topk_indices"]
+
+                print("▶ Preparing for training...", flush=True)
+                with timer.time("training_prep"):
+                    teacher_policy.offload_after_refit()
+                    student_policy.prepare_for_training()  # set model train and reload optim to GPU
+                    POLICY_GENERATION_STALE = True
+
+                print("▶ Training policy...", flush=True)
+                with timer.time("policy_training"):
+                    train_results = student_policy.train(train_data, loss_fn)
+
+                is_last_step = (total_steps + 1 >= max_steps) or (
+                    (current_epoch + 1 == max_epochs)
+                    and (current_step + 1 == len(dataloader))
+                )
+
+                # Run validation if it's a validation step
+                if val_period > 0 and (total_steps + 1) % val_period == 0:
+                    if NEED_REFIT and POLICY_GENERATION_STALE:
+                        refit_policy_generation(
+                            student_policy, student_generation, colocated_inference
+                        )
+                        POLICY_GENERATION_STALE = False
+                    else:
+                        student_generation.prepare_for_generation()
+                    val_metrics, validation_timings = validate(
+                        student_generation,
+                        val_dataloader,
+                        tokenizer,
+                        val_task_to_env,
+                        step=total_steps + 1,
+                        master_config=master_config,
+                    )
+                    student_generation.finish_generation()
+                    logger.log_metrics(
+                        validation_timings, total_steps + 1, prefix="timing/validation"
+                    )
+                    logger.log_metrics(
+                        val_metrics, total_steps + 1, prefix="validation"
+                    )
+
+                metrics = {
+                    "loss": train_results["loss"].numpy(),
+                    "grad_norm": train_results["grad_norm"].numpy(),
+                    "mean_prompt_length": repeated_batch["length"].numpy(),
+                    "total_num_tokens": input_lengths.numpy(),
+                }
+                metrics.update(train_results["all_mb_metrics"])
+                for k, v in metrics.items():
+                    if k in {
+                        "lr",
+                        "wd",
+                        "global_valid_seqs",
+                        "global_valid_toks",
+                        "mean_prompt_length",
+                    }:
+                        metrics[k] = np.mean(v).item()
+                    else:
+                        metrics[k] = np.sum(v).item()
+                metrics.update(rollout_metrics)
+                total_valid_tokens += metrics["global_valid_toks"]
+
+                ## Checkpointing
+                consumed_samples += master_config["distillation"][
+                    "num_prompts_per_step"
+                ]
+                timeout.mark_iteration()
+
+                should_save_by_step = (
+                    is_last_step
+                    or (total_steps + 1) % master_config["checkpointing"]["save_period"]
+                    == 0
+                )
+                # +1 because total_steps is 0-indexed
+                # Check if timeout-based checkpointing is enabled in config.
+                should_save_by_timeout = timeout.check_save()
+
+                if master_config["checkpointing"]["enabled"] and (
+                    should_save_by_step or should_save_by_timeout
+                ):
+                    student_policy.prepare_for_training()
+
+                    distillation_save_state["current_epoch"] = current_epoch
+                    distillation_save_state["current_step"] = current_step + 1
+                    distillation_save_state["total_steps"] = total_steps + 1
+                    distillation_save_state["total_valid_tokens"] = total_valid_tokens
+                    if val_metrics is not None:
+                        distillation_save_state["val_reward"] = val_metrics["accuracy"]
+                    elif "val_reward" in distillation_save_state:
+                        del distillation_save_state["val_reward"]
+                    distillation_save_state["consumed_samples"] = consumed_samples
+
+                    full_metric_name = master_config["checkpointing"]["metric_name"]
+                    if full_metric_name is not None:
+                        assert full_metric_name.startswith(
+                            "train:"
+                        ) or full_metric_name.startswith("val:"), (
+                            f"metric_name={full_metric_name} must start with 'val:' or 'train:',\n"
+                            f'followed by the corresponding name in the "val" or "train" metrics dictionary.'
+                            f"  If you are using an old config, please updated checkpointing.metric_name to the new format, "
+                            f" e.g. 'val_reward --> 'val:accuracy'"
+                        )
+                        prefix, metric_name = full_metric_name.split(":", 1)
+                        metrics_source = metrics if prefix == "train" else val_metrics
+                        if not metrics_source:
+                            warnings.warn(
+                                f"You asked to save checkpoints based on {metric_name} but no {prefix} metrics were collected. "
+                                "This checkpoint will not be saved as top-k.",
+                                stacklevel=2,
+                            )
+                            if full_metric_name in distillation_save_state:
+                                del distillation_save_state[full_metric_name]
+                        elif metric_name not in metrics_source:
+                            raise ValueError(
+                                f"Metric {metric_name} not found in {prefix} metrics"
+                            )
+                        else:
+                            distillation_save_state[full_metric_name] = metrics_source[
+                                metric_name
+                            ]
+
+                    with timer.time("checkpointing"):
+                        print(
+                            f"Saving checkpoint for step {total_steps + 1}...",
+                            flush=True,
+                        )
+                        checkpoint_path = checkpointer.init_tmp_checkpoint(
+                            total_steps + 1, distillation_save_state, master_config
+                        )
+                        student_policy.save_checkpoint(
+                            weights_path=os.path.join(
+                                checkpoint_path, "policy", "weights"
+                            ),
+                            optimizer_path=os.path.join(
+                                checkpoint_path, "policy", "optimizer"
+                            ),
+                            tokenizer_path=os.path.join(
+                                checkpoint_path, "policy", "tokenizer"
+                            ),
+                            checkpointing_cfg=master_config["checkpointing"],
+                        )
+                        torch.save(
+                            dataloader.state_dict(),
+                            os.path.join(checkpoint_path, "train_dataloader.pt"),
+                        )
+                        checkpointer.finalize_checkpoint(checkpoint_path)
+
+            # Logging
+            # Log training data
+            log_data = {"content": flat_messages["content"]}
+            log_data["input_lengths"] = input_lengths.tolist()
+            logger.log_batched_dict_as_jsonl(
+                log_data, f"train_data_step{total_steps + 1}.jsonl"
+            )
+
+            timing_metrics: dict[str, float] = timer.get_timing_metrics(
+                reduction_op="sum"
+            )  # type: ignore
+
+            print("\n📊 Training Results:")
+
+            print(f"  • Loss: {metrics['loss']:.4f}")
+            print(
+                f"  • Mean Generation Length: {rollout_metrics['mean_gen_tokens_per_sample']:.4f}"
+            )
+            if "total_flops" in train_results:
+                total_tflops = (
+                    train_results["total_flops"]
+                    / timing_metrics["policy_training"]
+                    / 1e12
+                )
+                num_ranks = train_results["num_ranks"]
+                print(
+                    f"  • Training FLOPS: {total_tflops:.2f} TFLOPS ({total_tflops / num_ranks:.2f} TFLOPS per rank)",
+                    flush=True,
+                )
+                if "theoretical_tflops" in train_results:
+                    theoretical_tflops = train_results["theoretical_tflops"]
+                    print(
+                        f"  • Training Model Floating Point Utilization: {100 * total_tflops / theoretical_tflops:.2f}%",
+                        flush=True,
+                    )
+                    metrics["train_fp_utilization"] = total_tflops / theoretical_tflops
+
+            print("\n⏱️  Timing:", flush=True)
+            # Display total time first, separately
+            total_time = timing_metrics.get("total_step_time", 0)
+
+            total_num_gpus = (
+                master_config["cluster"]["num_nodes"]
+                * master_config["cluster"]["gpus_per_node"]
+            )
+            metrics.update(
+                {
+                    "tokens_per_sec_per_gpu": metrics["total_num_tokens"]
+                    / total_time
+                    / total_num_gpus
+                }
+            )
+
+            print(f"  • Total step time: {total_time:.2f}s", flush=True)
+
+            # Display all other timing metrics
+            for k, v in sorted(
+                timing_metrics.items(), key=lambda item: item[1], reverse=True
+            ):
+                if k != "total_step_time":
+                    percent = (v / total_time * 100) if total_time > 0 else 0
+                    print(f"  • {k}: {v:.2f}s ({percent:.1f}%)", flush=True)
+
+            timing_metrics["valid_tokens_per_sec_per_gpu"] = (
+                metrics["global_valid_toks"] / total_time / total_num_gpus
+            )
+            logger.log_metrics(metrics, total_steps + 1, prefix="train")
+            logger.log_metrics(timing_metrics, total_steps + 1, prefix="timing/train")
+
+            timer.reset()
+            current_step += 1
+            total_steps += 1
+            if should_save_by_timeout:
+                print("Timeout has been reached, stopping training early", flush=True)
+                return
+            if total_steps >= max_steps:
+                print(
+                    "Max number of steps has been reached, stopping training early",
+                    flush=True,
+                )
+                return
+
+        # End of epoch
+        current_epoch += 1
+        current_step = 0  # Reset step counter for new epoch
+
+
+def validate(
+    policy_generation: GenerationInterface,
+    val_dataloader: Optional[StatefulDataLoader],
+    tokenizer,
+    val_task_to_env: Optional[dict[str, EnvironmentInterface]],
+    step: int,
+    master_config: MasterConfig,
+) -> tuple[dict[str, Any], dict[str, Any]]:
+    """Run validation on the validation dataset."""
+    if val_dataloader is None:
+        print("  ⚠️ No validation dataloader provided, skipping validation", flush=True)
+        return {}, {}
+
+    if val_task_to_env is None:
+        print(
+            "  ⚠️ No validation task to environment mapping provided, skipping validation",
+            flush=True,
+        )
+        return {}, {}
+
+    timer = Timer()
+    with timer.time("total_validation_time"):
+        print(f"▶ Starting validation at step {step}...", flush=True)
+
+        total_rewards = []  # Can be any metric. Setted to 'accuracy' by default.
+        total_lengths = []
+        all_message_logs = []  # Collect all message logs
+
+        max_batches = (
+            master_config["distillation"]["max_val_samples"]
+            // master_config["distillation"]["val_batch_size"]
+        )
+        for batch_idx, val_batch in enumerate(val_dataloader):
+            if batch_idx >= max_batches:
+                break
+
+            # Generate responses (updates the LLMMessageLogType in batch_with_msg_logs)
+            # Use async rollouts if vLLM async engine is enabled
+            if _should_use_async_rollouts(master_config):
+                val_batch, gen_metrics = run_async_multi_turn_rollout(
+                    policy_generation,
+                    val_batch,
+                    tokenizer,
+                    val_task_to_env,
+                    max_seq_len=master_config["policy"]["max_total_sequence_length"],
+                    max_rollout_turns=master_config["distillation"][
+                        "max_rollout_turns"
+                    ],
+                    greedy=False,
+                )
+            else:
+                val_batch, gen_metrics = run_multi_turn_rollout(
+                    policy_generation,
+                    val_batch,
+                    tokenizer,
+                    val_task_to_env,
+                    max_seq_len=master_config["policy"]["max_total_sequence_length"],
+                    max_rollout_turns=master_config["distillation"][
+                        "max_rollout_turns"
+                    ],
+                    greedy=False,
+                )
+            rewards = val_batch["total_reward"]
+
+            total_rewards.extend(rewards.tolist())
+            total_lengths.append(gen_metrics["mean_gen_tokens_per_sample"])
+
+            # Collect message logs for later display
+            to_env = [
+                get_keys_from_message_log(
+                    val_batch["message_log"][i], ["role", "content"]
+                )
+                for i in range(len(val_batch["message_log"]))
+            ]
+
+            all_message_logs.extend(to_env)
+
+        # Calculate validation metrics
+        accuracy = (
+            sum(total_rewards) / len(total_rewards) if len(total_rewards) > 0 else 0
+        )
+        avg_length = (
+            sum(total_lengths) / len(total_lengths) if len(total_lengths) > 0 else 0
+        )
+
+        val_metrics = {
+            "accuracy": accuracy,
+            "avg_length": avg_length,
+        }
+
+        # Print sample conversations only once at the end of validation
+        try:
+            print_message_log_samples(
+                all_message_logs,
+                total_rewards,
+                num_samples=min(
+                    master_config["logger"]["num_val_samples_to_print"],
+                    len(all_message_logs),
+                ),
+                step=step,
+            )
+        except Exception as e:
+            print(f"\n  ⚠️ Error displaying message samples: {str(e)}")
+            print("  ⚠️ Continuing validation without displaying samples...", flush=True)
+
+    # Get timing metrics
+    timing_metrics = timer.get_timing_metrics(reduction_op="sum")
+    validation_time = timing_metrics.get("total_validation_time", 0)
+
+    # Print summary of validation results
+    print("\n📊 Validation Results:")
+    print(f"    • Accuracy: {accuracy:.4f}")
+    print(f"    • Average response length: {avg_length:.1f} tokens")
+    print(f"    • Samples processed: {len(total_rewards)}", flush=True)
+
+    # Print timing information
+    print("\n  ⏱️  Validation Timing:")
+    validation_time = timing_metrics.get("total_validation_time", 0)
+    print(f"    • Total validation time: {validation_time:.2f}s", flush=True)
+
+    # Make sure to reset the timer after validation
+    timer.reset()
+
+    return val_metrics, timing_metrics
diff --git a/nemo_rl/algorithms/dpo.py b/nemo_rl/algorithms/dpo.py
index 30ba78f6f2..59b3b374f3 100644
--- a/nemo_rl/algorithms/dpo.py
+++ b/nemo_rl/algorithms/dpo.py
@@ -16,7 +16,7 @@
 from collections import defaultdict
 from functools import partial
 from pathlib import Path
-from typing import NotRequired, Optional, TypedDict, cast
+from typing import Optional, TypedDict, cast
 
 import numpy as np
 import torch
@@ -26,9 +26,10 @@
 from nemo_rl.algorithms.loss_functions import (
     DPOLossFn,
 )
-from nemo_rl.algorithms.utils import set_seed
+from nemo_rl.algorithms.utils import maybe_pad_last_batch, set_seed
 from nemo_rl.data import DataConfig
-from nemo_rl.data.datasets import AllTaskProcessedDataset, dpo_collate_fn
+from nemo_rl.data.collate_fn import preference_collate_fn
+from nemo_rl.data.datasets import AllTaskProcessedDataset
 from nemo_rl.distributed.virtual_cluster import ClusterConfig, RayVirtualCluster
 from nemo_rl.models.policy import PolicyConfig
 from nemo_rl.models.policy.interfaces import PolicyInterface
@@ -36,15 +37,15 @@
 from nemo_rl.utils.checkpoint import CheckpointingConfig, CheckpointManager
 from nemo_rl.utils.logger import Logger, LoggerConfig
 from nemo_rl.utils.nsys import maybe_gpu_profile_step
-from nemo_rl.utils.timer import Timer
+from nemo_rl.utils.timer import TimeoutChecker, Timer
 
 
 class DPOSaveState(TypedDict):
     epoch: int  # Track current epoch
     step: int  # Track step within current epoch
     total_steps: int  # Track total number of steps across all epochs
-    val_loss: NotRequired[float]  # Optional field - may not be present during training
     consumed_samples: int
+    total_valid_tokens: int  # Track total number of non-padding tokens during training
 
 
 def _default_dpo_save_state() -> DPOSaveState:
@@ -53,6 +54,7 @@ def _default_dpo_save_state() -> DPOSaveState:
         "step": 0,
         "total_steps": 0,
         "consumed_samples": 0,
+        "total_valid_tokens": 0,
     }
 
 
@@ -86,6 +88,18 @@ class MasterConfig(TypedDict):
     checkpointing: CheckpointingConfig
 
 
+class DPOValMetrics(TypedDict):
+    loss: float
+    sft_loss: float
+    preference_loss: float
+    accuracy: float
+    rewards_chosen_mean: float
+    rewards_rejected_mean: float
+    num_valid_samples: float
+    global_valid_seqs: float
+    global_valid_toks: float
+
+
 # =======================================================
 # Setup & Initialization
 # =======================================================
@@ -93,12 +107,12 @@ def setup(
     master_config: MasterConfig,
     tokenizer: AutoTokenizer,
     train_dataset: AllTaskProcessedDataset,
-    val_dataset: AllTaskProcessedDataset,
+    val_dataset: dict[str, AllTaskProcessedDataset],
 ) -> tuple[
     Policy,
     RayVirtualCluster,
     StatefulDataLoader,
-    StatefulDataLoader,
+    dict[str, StatefulDataLoader],
     DPOLossFn,
     Logger,
     CheckpointManager,
@@ -152,15 +166,17 @@ def setup(
     train_dataloader = StatefulDataLoader(
         train_dataset,
         batch_size=policy_config["train_global_batch_size"],
-        shuffle=True,
+        shuffle=data_config["shuffle"],
         collate_fn=partial(
-            dpo_collate_fn,
+            preference_collate_fn,
             tokenizer=tokenizer,
             make_sequence_length_divisible_by=policy_config[
                 "make_sequence_length_divisible_by"
             ],
+            add_loss_mask=True,
         ),
         drop_last=True,
+        num_workers=data_config["num_workers"],
     )
 
     if last_checkpoint_path is not None:
@@ -169,19 +185,24 @@ def setup(
         )
         train_dataloader.load_state_dict(dataloader_state_dict)
 
-    val_dataloader = StatefulDataLoader(
-        val_dataset,
-        batch_size=dpo_config["val_global_batch_size"],
-        shuffle=False,
-        collate_fn=partial(
-            dpo_collate_fn,
-            tokenizer=tokenizer,
-            make_sequence_length_divisible_by=policy_config[
-                "make_sequence_length_divisible_by"
-            ],
-        ),
-        drop_last=True,
-    )
+    val_dataloader = {
+        k: StatefulDataLoader(
+            v,
+            batch_size=dpo_config["val_global_batch_size"],
+            shuffle=False,
+            collate_fn=partial(
+                preference_collate_fn,
+                tokenizer=tokenizer,
+                make_sequence_length_divisible_by=policy_config[
+                    "make_sequence_length_divisible_by"
+                ],
+                add_loss_mask=True,
+            ),
+            drop_last=False,
+            num_workers=data_config["num_workers"],
+        )
+        for k, v in val_dataset.items()
+    }
 
     # ==========================
     #          Cluster
@@ -201,6 +222,19 @@ def setup(
     #   Training
     # ==========================
     print("\n▶ Setting up model...")
+    if policy_config.get("megatron_cfg", {}).get("enabled", False):
+        total_train_iters = min(
+            dpo_config["max_num_steps"],
+            dpo_config["max_num_epochs"] * len(train_dataloader),
+        )
+        ## NOTE: we double the train_iters because effective batch size is doubled
+        ## for (chosen, rejected) pairs
+        policy_config["megatron_cfg"]["train_iters"] = total_train_iters * 2
+        if "scheduler" in policy_config["megatron_cfg"]:
+            for k in policy_config["megatron_cfg"]["scheduler"]:
+                if "iters" in k:
+                    policy_config["megatron_cfg"]["scheduler"][k] *= 2
+
     policy = Policy(
         cluster=cluster,
         config=policy_config,
@@ -214,6 +248,9 @@ def setup(
         init_optimizer=True,
         init_reference_model=True,
     )
+    # print the node IP and GPU ID of the policy workers for debugging
+    policy.print_node_ip_and_gpu_id()
+
     loss_fn = DPOLossFn(master_config["dpo"])
     print("  ✓ Model initialized")
 
@@ -246,6 +283,15 @@ def add_ref_logprobs_to_data(dataloader, policy, master_config, is_val=False):
                 else master_config["policy"]["train_micro_batch_size"] * 2
             )
 
+            # when running validation with drop_last=False, we might end up with a partial batch.
+            # In this case, we pad the batch to the next multiple of micro_batch_size * dp_size.
+            dp_size = policy.sharding_annotations.get_axis_size("data_parallel")
+            if batch.size % (dp_size * micro_batch_size) != 0:
+                assert is_val, (
+                    "Partial batches should only happen during validation, but got a partial batch during training."
+                )
+                batch = maybe_pad_last_batch(batch, dp_size, micro_batch_size)
+
             ## append ref policy logprobs to batch
             logprobs = policy.get_reference_policy_logprobs(
                 batch,
@@ -266,7 +312,7 @@ def add_ref_logprobs_to_data(dataloader, policy, master_config, is_val=False):
 # =======================================================
 def validate(
     policy: PolicyInterface,
-    val_dataloader: StatefulDataLoader,
+    val_dataloader: dict[str, StatefulDataLoader],
     tokenizer,
     loss_fn,
     step: int,
@@ -274,18 +320,69 @@ def validate(
     val_batches: int,
     val_batch_size: int,
     val_mbs: int,
+    logger: Logger,
 ):
-    """Run validation on the validation dataset."""
+    val_metrics, validation_timings = {}, {}
+    for val_dataset_name, v in val_dataloader.items():
+        k_val_metrics, k_validation_timings = validate_one_dataset(
+            policy=policy,
+            val_dataloader=v,
+            loss_fn=loss_fn,
+            step=step,
+            master_config=master_config,
+            val_batches=val_batches,
+            val_batch_size=val_batch_size,
+            val_mbs=val_mbs,
+            dataset_name=val_dataset_name,
+        )
+        prefix = f"validation-{val_dataset_name}"
+
+        logger.log_metrics(k_val_metrics, step, prefix=prefix)
+        logger.log_metrics(k_validation_timings, step, prefix=f"timing/{prefix}")
+
+        for metric_name in DPOValMetrics.__annotations__.keys():
+            val_metrics[f"{prefix}_{metric_name}"] = k_val_metrics[metric_name]
+        validation_timings[prefix + "_total_validation_time"] = k_validation_timings[
+            "total_validation_time"
+        ]
+
+    if len(validation_timings) > 0:
+        total_validation_time = sum(validation_timings.values())
+        logger.log_metrics(
+            {"total_validation_time": total_validation_time},
+            step,
+            prefix="timing/validation",
+        )
+        validation_timings["total_validation_time"] = total_validation_time
+
+    return val_metrics, validation_timings
+
+
+def validate_one_dataset(
+    policy: PolicyInterface,
+    val_dataloader: StatefulDataLoader,
+    loss_fn,
+    step: int,
+    master_config: MasterConfig,
+    val_batches: int,
+    val_batch_size: int,
+    val_mbs: int,
+    dataset_name: str,
+):
+    """Run validation on one validation dataset."""
     if val_dataloader is None:
+        assert val_dataloader is not None or master_config["dpo"]["val_period"] == 0, (
+            "val_dataloader is None, so dpo.val_period must be 0"
+        )
         print("  ⚠️ No validation dataloader provided, skipping validation")
         return
 
     timer = Timer()
 
     with timer.time("total_validation_time"):
-        print(f"▶ Starting validation at step {step}...")
+        print(f"▶ Starting validation at step {step} for `{dataset_name}` set..")
 
-        val_metrics = defaultdict(lambda: 0.0)
+        val_metrics = defaultdict(list)
         num_valid_batches = 0
         for batch_idx, val_batch in enumerate(
             add_ref_logprobs_to_data(val_dataloader, policy, master_config, is_val=True)
@@ -295,7 +392,7 @@ def validate(
                 val_batch,
                 loss_fn,
                 eval_mode=True,
-                gbs=val_batch_size * 2,
+                gbs=val_batch.size,
                 mbs=val_mbs * 2,
             )
 
@@ -304,22 +401,61 @@ def validate(
                     "No validation metrics were collected for this batch."
                     " This is likely because there were no valid samples."
                 )
-
             else:
-                for k, v in val_results["all_mb_metrics"].items():
-                    if k in {"lr", "wd", "global_valid_seqs", "global_valid_toks"}:
-                        val_metrics[k] += np.mean(v).item()
-                    else:
-                        val_metrics[k] += np.sum(v).item()
+                for metric_name in DPOValMetrics.__annotations__.keys():
+                    reduction = (
+                        np.mean
+                        if metric_name in {"global_valid_seqs", "global_valid_toks"}
+                        else sum
+                    )
+                    val_metrics[metric_name] += [
+                        reduction(val_results["all_mb_metrics"][metric_name])
+                    ]
+
                 num_valid_batches += 1
 
             if val_batches > 0 and batch_idx >= val_batches - 1:
                 break
 
-        for k, v in val_metrics.items():
-            if k == "num_valid_samples":
-                continue
-            val_metrics[k] /= num_valid_batches
+        if num_valid_batches > 0:
+            sum_num_valid_samples = sum(val_metrics["num_valid_samples"])
+            global_valid_toks = sum(val_metrics["global_valid_toks"])
+            global_valid_seqs = sum(val_metrics["global_valid_seqs"])
+            val_metrics = DPOValMetrics(
+                num_valid_samples=sum_num_valid_samples,
+                global_valid_seqs=global_valid_seqs,
+                global_valid_toks=global_valid_toks,
+                **{
+                    metric_name: sum(
+                        [
+                            value * weight
+                            for value, weight in zip(
+                                val_metrics[metric_name],
+                                val_metrics["num_valid_samples"],
+                            )
+                        ]
+                    )
+                    / sum_num_valid_samples
+                    for metric_name in DPOValMetrics.__annotations__.keys()
+                    if metric_name
+                    not in {
+                        "num_valid_samples",
+                        "global_valid_seqs",
+                        "global_valid_toks",
+                    }
+                },
+            )
+        else:
+            warnings.warn(
+                "No validation metrics were collected."
+                " This is likely because there were no valid samples in the validation set."
+            )
+            val_metrics = DPOValMetrics(
+                **{
+                    metric_name: 0.0
+                    for metric_name in DPOValMetrics.__annotations__.keys()
+                }
+            )
 
         # Calculate validation metrics
         policy.prepare_for_training()
@@ -336,12 +472,12 @@ def validate(
 
     else:
         # Print summary of validation results
-        print("\n📊 Validation Results:")
-        print(f"    • Validation loss: {float(val_metrics['loss']):.4f}")
-        print(f"    • Validation accuracy: {float(val_metrics['accuracy']):.4f}")
+        print(f"\n📊 Validation Results for `{dataset_name}` set:")
+        for metric_name in DPOValMetrics.__annotations__.keys():
+            print(f"    • Validation {metric_name}: {val_metrics[metric_name]:.4f}")
 
         # Print timing information
-        print("\n  ⏱️  Validation Timing:")
+        print(f"\n  ⏱️  Validation Timing for `{dataset_name}` set:")
         validation_time = timing_metrics.get("total_validation_time", 0)
         print(f"    • Total validation time: {validation_time:.2f}s")
 
@@ -364,16 +500,25 @@ def dpo_train(
 ) -> None:
     # Run dpo training
     timer = Timer()
+    timeout = TimeoutChecker(
+        timeout=master_config["checkpointing"]["checkpoint_must_save_by"],
+        fit_last_save_time=True,
+    )
+    timeout.start_iterations()
 
     if dpo_save_state is None:
         dpo_save_state = _default_dpo_save_state()
         current_epoch = 0
         current_step = 0
         total_steps = 0
+        total_valid_tokens = 0
     else:
         current_epoch = dpo_save_state["epoch"]
         current_step = dpo_save_state["step"]
         total_steps = dpo_save_state["total_steps"]
+        total_valid_tokens = dpo_save_state.get(
+            "total_valid_tokens", 0
+        )  # Default to 0 for backward compatibility with older checkpoints
 
     dpo_config = master_config["dpo"]
     # Validation configuration
@@ -394,15 +539,13 @@ def dpo_train(
             val_batches=dpo_config["val_batches"],
             val_batch_size=dpo_config["val_global_batch_size"],
             val_mbs=dpo_config["val_micro_batch_size"],
+            logger=logger,
         )
         if validation_result is not None:
             val_metrics, validation_timings = validation_result
         else:
             val_metrics, validation_timings = None, None
 
-        logger.log_metrics(val_metrics, total_steps, prefix="validation")
-        logger.log_metrics(validation_timings, total_steps, prefix="timing/validation")
-
     policy.prepare_for_training()
 
     while (
@@ -420,15 +563,16 @@ def dpo_train(
 
             with timer.time("total_step_time"):
                 print("▶ Taking a training step...")
-                train_results = policy.train(
-                    batch,
-                    loss_fn,
-                    eval_mode=False,
-                    ## NOTE: we double the batch size here because each preference example corresponds to a pair of
-                    ## examples, chosen and rejected, and the pair needs to be processed as part of the same microbatch.
-                    gbs=master_config["policy"]["train_global_batch_size"] * 2,
-                    mbs=master_config["policy"]["train_micro_batch_size"] * 2,
-                )
+                with timer.time("policy_training"):
+                    train_results = policy.train(
+                        batch,
+                        loss_fn,
+                        eval_mode=False,
+                        ## NOTE: we double the batch size here because each preference example corresponds to a pair of
+                        ## examples, chosen and rejected, and the pair needs to be processed as part of the same microbatch.
+                        gbs=master_config["policy"]["train_global_batch_size"] * 2,
+                        mbs=master_config["policy"]["train_micro_batch_size"] * 2,
+                    )
 
                 is_last_step = total_steps + 1 >= master_config["dpo"][
                     "max_num_steps"
@@ -449,45 +593,91 @@ def dpo_train(
                         val_batches=dpo_config["val_batches"],
                         val_batch_size=dpo_config["val_global_batch_size"],
                         val_mbs=dpo_config["val_micro_batch_size"],
+                        logger=logger,
                     )
                     if validation_result is not None:
                         val_metrics, validation_timings = validation_result
                     else:
                         val_metrics, validation_timings = None, None
-                    logger.log_metrics(
-                        validation_timings, total_steps + 1, prefix="timing/validation"
-                    )
-                    logger.log_metrics(
-                        val_metrics, total_steps + 1, prefix="validation"
-                    )
+                metrics = {
+                    "loss": train_results["loss"].numpy(),
+                    "grad_norm": train_results["grad_norm"].numpy(),
+                }
+                metrics.update(train_results["all_mb_metrics"])
+                for k, v in metrics.items():
+                    if k in {"lr", "wd", "global_valid_seqs", "global_valid_toks"}:
+                        metrics[k] = np.mean(v).item()
+                    else:
+                        metrics[k] = np.sum(v).item()
+                total_valid_tokens += metrics["global_valid_toks"]
 
                 ## Checkpointing
                 dpo_save_state["consumed_samples"] += master_config["policy"][
                     "train_global_batch_size"
                 ]
-                if master_config["checkpointing"]["enabled"] and (
+                timeout.mark_iteration()
+
+                should_save_by_step = (
                     is_last_step
                     or (total_steps + 1) % master_config["checkpointing"]["save_period"]
                     == 0
-                ):  # +1 because step is 0-indexed
+                )
+                # +1 because step is 0-indexed
+                # Check if timeout-based checkpointing is enabled in config.
+                should_save_by_timeout = timeout.check_save()
+
+                if master_config["checkpointing"]["enabled"] and (
+                    should_save_by_step or should_save_by_timeout
+                ):
                     dpo_save_state["step"] = (current_step + 1) % len(train_dataloader)
                     dpo_save_state["total_steps"] = total_steps + 1
                     dpo_save_state["epoch"] = current_epoch
-                    if val_metrics is not None:
-                        dpo_save_state["val_loss"] = val_metrics["loss"]
-                    elif "val_loss" in dpo_save_state:
-                        del dpo_save_state["val_loss"]
-
-                    if master_config["checkpointing"]["metric_name"] is not None:
+                    dpo_save_state["total_valid_tokens"] = total_valid_tokens
+                    # Remove outdated validation metrics
+                    for key in list(dpo_save_state):
                         if (
-                            master_config["checkpointing"]["metric_name"]
-                            not in dpo_save_state
+                            key.startswith("val")
+                            and any(
+                                [
+                                    key.endswith(f"_{metric_name}")
+                                    for metric_name in DPOValMetrics.__annotations__.keys()
+                                    if metric_name != "num_valid_samples"
+                                ]
+                            )
+                            and (val_metrics is None or key not in val_metrics)
                         ):
+                            del dpo_save_state[key]
+                    if val_metrics is not None:
+                        dpo_save_state.update(val_metrics)
+
+                    full_metric_name = master_config["checkpointing"]["metric_name"]
+                    if full_metric_name is not None:
+                        assert full_metric_name.startswith(
+                            "train:"
+                        ) or full_metric_name.startswith("val:"), (
+                            f"metric_name={full_metric_name} must start with 'val:' or 'train:',\n"
+                            f'followed by the corresponding name in the "val" or "train" metrics dictionary.'
+                            f"  If you are using an old config, please updated checkpointing.metric_name to the new format, "
+                            f" e.g. 'val_loss --> 'val:validation-default_loss'"
+                        )
+                        prefix, metric_name = full_metric_name.split(":", 1)
+                        metrics_source = metrics if prefix == "train" else val_metrics
+                        if not metrics_source:
                             warnings.warn(
-                                f"You asked to save checkpoints based on {master_config['checkpointing']['metric_name']} but the metric is not found in the save state. "
-                                "Saving most recent k checkpoints instead."
+                                f"You asked to save checkpoints based on {metric_name} but no {prefix} metrics were collected. "
+                                "This checkpoint will not be saved as top-k.",
+                                stacklevel=2,
+                            )
+                            if full_metric_name in dpo_save_state:
+                                del dpo_save_state[full_metric_name]
+                        elif metric_name not in metrics_source:
+                            raise ValueError(
+                                f"Metric {metric_name} not found in {prefix} metrics"
                             )
-                            master_config["checkpointing"]["metric_name"] = None
+                        else:
+                            dpo_save_state[full_metric_name] = metrics_source[
+                                metric_name
+                            ]
 
                     with timer.time("checkpointing"):
                         print(f"Saving checkpoint for step {total_steps + 1}...")
@@ -504,6 +694,7 @@ def dpo_train(
                             tokenizer_path=os.path.join(
                                 checkpoint_path, "policy", "tokenizer"
                             ),
+                            checkpointing_cfg=master_config["checkpointing"],
                         )
                         torch.save(
                             train_dataloader.state_dict(),
@@ -511,21 +702,27 @@ def dpo_train(
                         )
                         checkpointer.finalize_checkpoint(checkpoint_path)
 
-            losses = train_results["loss"]
-            metrics = {
-                "loss": train_results["loss"].numpy(),
-                "grad_norm": train_results["grad_norm"].numpy(),
-            }
-            metrics.update(train_results["all_mb_metrics"])
-            for k, v in metrics.items():
-                if k in {"lr", "wd", "global_valid_seqs", "global_valid_toks"}:
-                    metrics[k] = np.mean(v).item()
-                else:
-                    metrics[k] = np.sum(v).item()
             timing_metrics = timer.get_timing_metrics(reduction_op="sum")
 
             print("\n📊 Training Results:")
-            print(f"  • Loss: {float(metrics['loss']):.4f}")
+            for metric_name in DPOValMetrics.__annotations__.keys():
+                print(f"  • {metric_name}: {float(metrics[metric_name]):.4f}")
+            if "total_flops" in train_results:
+                total_tflops = (
+                    train_results["total_flops"]
+                    / timing_metrics["policy_training"]
+                    / 1e12
+                )
+                num_ranks = train_results["num_ranks"]
+                print(
+                    f"  • Training FLOPS: {total_tflops:.2f} TFLOPS ({total_tflops / num_ranks:.2f} TFLOPS per rank)"
+                )
+                if "theoretical_tflops" in train_results:
+                    theoretical_tflops = train_results["theoretical_tflops"]
+                    print(
+                        f"  • Training Model Floating Point Utilization: {100 * total_tflops / theoretical_tflops:.2f}%"
+                    )
+                    metrics["train_fp_utilization"] = total_tflops / theoretical_tflops
             print("\n⏱️  Timing:")
             # Display total time first, separately
             total_time = timing_metrics.get("total_step_time", 0)
@@ -539,6 +736,13 @@ def dpo_train(
                     percent = (v / total_time * 100) if total_time > 0 else 0
                     print(f"  • {k}: {v:.2f}s ({percent:.1f}%)")
 
+            total_num_gpus = (
+                master_config["cluster"]["num_nodes"]
+                * master_config["cluster"]["gpus_per_node"]
+            )
+            timing_metrics["valid_tokens_per_sec_per_gpu"] = (
+                metrics["global_valid_toks"] / total_time / total_num_gpus
+            )
             logger.log_metrics(metrics, total_steps + 1, prefix="train")
             logger.log_metrics(timing_metrics, total_steps + 1, prefix="timing/train")
 
@@ -546,7 +750,14 @@ def dpo_train(
             current_step += 1
             total_steps += 1
 
+            if should_save_by_timeout:
+                print("Timeout has been reached, stopping training early", flush=True)
+                return
             if total_steps >= master_config["dpo"]["max_num_steps"]:
+                print(
+                    "Max number of steps has been reached, stopping training early",
+                    flush=True,
+                )
                 return
 
         current_epoch += 1
diff --git a/nemo_rl/algorithms/grpo.py b/nemo_rl/algorithms/grpo.py
index 96faec96cf..c67b532498 100644
--- a/nemo_rl/algorithms/grpo.py
+++ b/nemo_rl/algorithms/grpo.py
@@ -11,8 +11,12 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import gc
 import os
+import time
 import warnings
+from concurrent.futures import ThreadPoolExecutor
+from contextlib import nullcontext
 from pathlib import Path
 from typing import Any, NotRequired, Optional, TypedDict, TypeVar, cast
 
@@ -20,6 +24,7 @@
 import ray
 import torch
 from torchdata.stateful_dataloader import StatefulDataLoader
+from transformers import AutoProcessor
 from transformers.tokenization_utils_base import PreTrainedTokenizerBase
 
 from nemo_rl.algorithms.interfaces import LossFunction
@@ -28,31 +33,33 @@
     ClippedPGLossDataDict,
     ClippedPGLossFn,
 )
-from nemo_rl.algorithms.utils import calculate_baseline_and_std_per_prompt
-from nemo_rl.data import DataConfig
-from nemo_rl.data.datasets import AllTaskProcessedDataset, rl_collate_fn
-from nemo_rl.data.interfaces import (
-    DatumSpec,
+from nemo_rl.algorithms.reward_functions import (
+    RewardShapingConfig,
+    apply_reward_shaping,
+)
+from nemo_rl.algorithms.utils import (
+    calculate_baseline_and_std_per_prompt,
+    print_performance_metrics,
+    set_seed,
 )
+from nemo_rl.data import DataConfig
+from nemo_rl.data.collate_fn import rl_collate_fn
+from nemo_rl.data.datasets import AllTaskProcessedDataset
+from nemo_rl.data.interfaces import DatumSpec
 from nemo_rl.data.llm_message_utils import (
     batched_message_log_to_flat_message,
     get_keys_from_message_log,
 )
 from nemo_rl.distributed.batched_data_dict import BatchedDataDict
-from nemo_rl.distributed.virtual_cluster import (
-    ClusterConfig,
-    RayVirtualCluster,
-)
-from nemo_rl.environments.interfaces import (
-    EnvironmentInterface,
-)
+from nemo_rl.distributed.ray_actor_environment_registry import get_actor_python_env
+from nemo_rl.distributed.virtual_cluster import ClusterConfig, RayVirtualCluster
+from nemo_rl.environments.interfaces import EnvironmentInterface
 from nemo_rl.experience.rollouts import (
     run_async_multi_turn_rollout,
+    run_async_penguin_rollout,
     run_multi_turn_rollout,
 )
-from nemo_rl.models.generation.interfaces import (
-    GenerationInterface,
-)
+from nemo_rl.models.generation.interfaces import GenerationInterface
 from nemo_rl.models.generation.vllm import VllmConfig, VllmGeneration
 from nemo_rl.models.policy import PolicyConfig
 from nemo_rl.models.policy.interfaces import ColocatablePolicyInterface
@@ -64,7 +71,8 @@
     print_message_log_samples,
 )
 from nemo_rl.utils.nsys import maybe_gpu_profile_step
-from nemo_rl.utils.timer import Timer
+from nemo_rl.utils.timer import TimeoutChecker, Timer
+from nemo_rl.utils.venvs import create_local_venv_on_each_node
 
 # ===============================================================================
 # Configuration
@@ -72,9 +80,41 @@
 TokenizerType = TypeVar("TokenizerType", bound=PreTrainedTokenizerBase)
 
 
+class RewardScalingConfig(TypedDict):
+    """Configure linear reward scaling with clamping.
+
+    When `enabled` is True, each reward is clamped to the source interval
+    [source_min, source_max] and linearly mapped to the target interval
+    [target_min, target_max]. Refer to the scale_rewards function for the implementation.
+
+    Defaults:
+        source_min=0.0, source_max=1.0, target_min=0.0, target_max=1.0
+    """
+
+    enabled: bool
+    source_min: NotRequired[float]
+    source_max: NotRequired[float]
+    target_min: NotRequired[float]
+    target_max: NotRequired[float]
+
+
+class AsyncGRPOConfig(TypedDict):
+    enabled: bool
+    # Maximum trajectory age in training steps for samples drawn from the
+    # async replay buffer. Trajectories older than this are excluded during
+    # sampling; buffer sizing also scales with this value.
+    max_trajectory_age_steps: int
+    # Does the weight synchronization as soon as the training is done
+    # without waiting for the pending generations to finish.
+    in_flight_weight_updates: NotRequired[bool]
+    # Recomputes the KV cache after the in-flight weight updates.
+    recompute_kv_cache_after_weight_updates: NotRequired[bool]
+
+
 class GRPOConfig(TypedDict):
     num_prompts_per_step: int
     num_generations_per_prompt: int
+    max_num_epochs: int
     max_num_steps: int
     max_rollout_turns: int
     normalize_rewards: bool
@@ -83,21 +123,41 @@ class GRPOConfig(TypedDict):
     val_batch_size: int
     val_at_start: bool
     max_val_samples: int
+    seed: int
+    async_grpo: NotRequired[AsyncGRPOConfig]
+    overlong_filtering: NotRequired[bool]
+    # whether to enable dynamic sampling, i.e.
+    # whether to discard prompts whose rewards have zero standard deviation
+    use_dynamic_sampling: bool
+    # When using dynamic sampling, the maximum number of batches to generate
+    # before throwing an error
+    dynamic_sampling_max_gen_batches: NotRequired[int]
+    # When using dynamic sampling, generation prompt batch size will equal
+    # num_prompts_per_step * batch_multiplier
+    batch_multiplier: NotRequired[float]
+    reward_shaping: RewardShapingConfig
+    reward_scaling: RewardScalingConfig
 
 
 class GRPOSaveState(TypedDict):
-    step: int
+    consumed_samples: int
+    current_step: int
+    current_epoch: int
+    total_steps: int
+    total_valid_tokens: int  # Track total number of non-padding tokens during training
     val_reward: NotRequired[
         float
     ]  # Optional field - may not be present during training
-    consumed_samples: int
 
 
 def _default_grpo_save_state() -> GRPOSaveState:
     return {
-        "step": 0,
-        "val_reward": -99999999.0,
         "consumed_samples": 0,
+        "current_step": 0,
+        "current_epoch": 0,
+        "total_steps": 0,
+        "total_valid_tokens": 0,
+        "val_reward": -99999999.0,
     }
 
 
@@ -126,6 +186,7 @@ def setup(
     tokenizer: TokenizerType,
     dataset: AllTaskProcessedDataset,
     val_dataset: Optional[AllTaskProcessedDataset],
+    processor: Optional[AutoProcessor] = None,
 ) -> tuple[
     ColocatablePolicyInterface,
     Optional[GenerationInterface],
@@ -143,11 +204,16 @@ def setup(
     Returns:
         tuple of policy, cluster, dataloader, tokenizer, loss_fn, math_env, logger, master_config, val_dataloader
     """
+    # Start timing the entire setup process
+    setup_start_time = time.perf_counter()
+
     # Extract individual configs for easier access
     policy_config = master_config["policy"]
     generation_config = master_config["policy"]["generation"]
+    env_configs = master_config["env"]
     loss_config = master_config["loss_fn"]
     grpo_config = master_config["grpo"]
+    data_config = master_config["data"]
     logger_config = master_config["logger"]
     cluster_config = master_config["cluster"]
 
@@ -155,6 +221,9 @@ def setup(
         "A generation config in the PolicyConfig is required for GRPO"
     )
 
+    # Set seed for all random number generators
+    set_seed(grpo_config["seed"])
+
     # ==========================
     #         Logger
     # ==========================
@@ -175,12 +244,23 @@ def setup(
     # ==========================
     #           Data
     # ==========================
+    # Validate batch_multiplier
+    batch_multiplier = grpo_config["batch_multiplier"]
+    dataloader_batch_size = grpo_config["num_prompts_per_step"]
+    if not grpo_config["use_dynamic_sampling"]:
+        assert batch_multiplier == 1, (
+            "batch_multiplier>1 can only be used if use_dynamic_sampling=True"
+        )
+    else:
+        dataloader_batch_size = int(dataloader_batch_size * batch_multiplier)
+
     dataloader = StatefulDataLoader(
         dataset,
-        batch_size=grpo_config["num_prompts_per_step"],
-        shuffle=False,
+        batch_size=dataloader_batch_size,
+        shuffle=data_config["shuffle"],
         collate_fn=rl_collate_fn,
         drop_last=True,
+        num_workers=data_config["num_workers"],
     )
     if last_checkpoint_path is not None:
         dataloader_state_dict = torch.load(
@@ -188,7 +268,7 @@ def setup(
         )
         dataloader.load_state_dict(dataloader_state_dict)
 
-    print(f"  ✓ Training dataloader loaded with {len(dataset)} samples")
+    print(f"  ✓ Training dataloader loaded with {len(dataset)} samples", flush=True)
 
     # Load validation dataset if provided
     val_dataloader: Optional[StatefulDataLoader] = None
@@ -202,29 +282,66 @@ def setup(
             batch_size=grpo_config["val_batch_size"],
             shuffle=False,
             collate_fn=rl_collate_fn,
+            num_workers=data_config["num_workers"],
+        )
+        print(
+            f"  ✓ Validation dataloader loaded with {len(val_dataset)} samples",
+            flush=True,
         )
-        print(f"  ✓ Validation dataloader loaded with {len(val_dataset)} samples")
 
     # ==========================
     #          Cluster
     # ==========================
-    print("\n▶ Setting up compute cluster...")
+    print("\n▶ Setting up compute cluster...", flush=True)
     colocated_inference = generation_config["colocated"]["enabled"]
+    reward_model_enabled = (
+        "reward_model" in env_configs and env_configs["reward_model"]["enabled"]
+    )
+
+    total_nodes = cluster_config["num_nodes"]
+    if reward_model_enabled:
+        rm_resource = env_configs["reward_model"]["resources"]
+        rm_nodes = rm_resource["num_nodes"]
+        rm_gpus_per_node = rm_resource["gpus_per_node"]
+    else:
+        rm_nodes = 0
+        rm_gpus_per_node = 0
+
+    if total_nodes == 1:
+        policy_nodes = total_nodes
+    else:
+        policy_nodes = total_nodes - rm_nodes
+        assert policy_nodes > 0, (
+            "policy_nodes must be > 0, but got "
+            f"policy_nodes:{policy_nodes} + rm_nodes:{rm_nodes} = total_nodes:{total_nodes}"
+        )
 
     if colocated_inference:
+        if total_nodes == 1:
+            policy_gpus_per_node = cluster_config["gpus_per_node"] - rm_gpus_per_node
+            assert policy_gpus_per_node > 0, (
+                "policy.generation.colocated.resources.gpus_per_node must be > 0 "
+                "when cluster.num_nodes = 1, "
+                f"but got {policy_gpus_per_node}."
+            )
+        else:
+            policy_gpus_per_node = cluster_config["gpus_per_node"]
+
         cluster = RayVirtualCluster(
             name="grpo_policy_cluster",
-            bundle_ct_per_node_list=[cluster_config["gpus_per_node"]]
-            * cluster_config["num_nodes"],
+            bundle_ct_per_node_list=[policy_gpus_per_node] * policy_nodes,
             use_gpus=True,
-            num_gpus_per_node=cluster_config["gpus_per_node"],
+            num_gpus_per_node=policy_gpus_per_node,
             max_colocated_worker_groups=1
             if generation_config["backend"] == "megatron"
             else 2,
         )
         train_cluster = cluster
         inference_cluster = cluster
-        print(f"  ✓ Ray cluster initialized with {cluster_config['num_nodes']} nodes")
+        print(
+            f"  ✓ Ray cluster for policy initialized with {policy_nodes} nodes",
+            flush=True,
+        )
 
     else:
         assert generation_config["backend"] != "megatron", (
@@ -234,41 +351,58 @@ def setup(
 
         # train resources will be updated through overall and inference resources below
         train_gpus_per_node = cluster_config["gpus_per_node"]
-        train_nodes = cluster_config["num_nodes"]
+        train_nodes = policy_nodes
 
         inference_resources = generation_config["colocated"]["resources"]
         inference_gpus_per_node = inference_resources["gpus_per_node"]
         inference_nodes = inference_resources["num_nodes"]
 
         # validate and configure resources
-        if cluster_config["num_nodes"] == 1:
-            assert inference_gpus_per_node > 0, (
-                "policy.generation.colocated.resources.gpus_per_node must be > 0 "
-                "when cluster.num_nodes = 1 and inference is non-colocated, "
+        if policy_nodes == 1:
+            # When policy_nodes == 1, train and inference are on the same node
+            assert (
+                inference_gpus_per_node is not None and inference_gpus_per_node > 0
+            ), (
+                "policy.generation.colocated.resources.gpus_per_node must be explicitly set to a value > 0 "
+                "when policy_nodes = 1 and inference is non-colocated, "
                 f"but got {inference_gpus_per_node}."
             )
             assert inference_nodes is None or inference_nodes == 1, (
                 "policy.generation.colocated.resources.num_nodes must be 1 or set to null "
-                "when cluster.num_nodes = 1 and inference is non-colocated, "
+                "when policy_nodes = 1 and inference is non-colocated, "
                 f"but got {inference_nodes}."
             )
+
             inference_nodes = 1
-            train_gpus_per_node -= inference_gpus_per_node
+            # If total_nodes == 1, reward model is also on the same node; otherwise it's on a different node
+            reward_gpus_to_subtract = (
+                rm_gpus_per_node if total_nodes == 1 and reward_model_enabled else 0
+            )
+            train_gpus_per_node -= inference_gpus_per_node + reward_gpus_to_subtract
+            assert train_gpus_per_node > 0, (
+                "No enough GPUs for training, "
+                f"train_gpus_per_node:{train_gpus_per_node} = cluster_config['gpus_per_node']:{cluster_config['gpus_per_node']} - inference_gpus_per_node:{inference_gpus_per_node}"
+                + (
+                    f" - rm_gpus_per_node:{rm_gpus_per_node}"
+                    if total_nodes == 1 and reward_model_enabled
+                    else ""
+                )
+            )
         else:
+            # train, inference, and reward model are all on different nodes
             assert inference_nodes > 0, (
                 "policy.generation.colocated.resources.num_nodes must be > 0 "
                 "when cluster.num_nodes > 1 and inference is non-colocated, "
                 f"but got {inference_nodes}."
             )
             assert (
-                inference_gpus_per_node is None
-                or inference_gpus_per_node == cluster_config["gpus_per_node"]
+                inference_gpus_per_node is not None
+                and inference_gpus_per_node == cluster_config["gpus_per_node"]
             ), (
-                "policy.generation.colocated.resources.gpus_per_node must be equal to cluster.gpus_per_node or set to null "
+                "policy.generation.colocated.resources.gpus_per_node must be explicitly set and equal to cluster.gpus_per_node "
                 "when cluster.num_nodes > 1 and inference is non-colocated, "
-                f"but got {inference_gpus_per_node}."
+                f"but got inference_gpus_per_node={inference_gpus_per_node}, cluster.gpus_per_node={cluster_config['gpus_per_node']}."
             )
-            inference_gpus_per_node = cluster_config["gpus_per_node"]
             train_nodes -= inference_nodes
 
         # initialize train cluster
@@ -280,7 +414,8 @@ def setup(
             max_colocated_worker_groups=1,
         )
         print(
-            f"  ✓ Ray train cluster initialized with {train_nodes} nodes with {train_gpus_per_node} GPUs per node"
+            f"  ✓ Ray train cluster initialized with {train_nodes} nodes with {train_gpus_per_node} GPUs per node",
+            flush=True,
         )
 
         # initialize inference cluster
@@ -292,62 +427,153 @@ def setup(
             max_colocated_worker_groups=1,
         )
         print(
-            f"  ✓ Ray inference cluster initialized with {inference_nodes} nodes with {inference_gpus_per_node} GPUs per node"
+            f"  ✓ Ray inference cluster initialized with {inference_nodes} nodes with {inference_gpus_per_node} GPUs per node",
+            flush=True,
         )
 
     # ==========================
     #   Training and Inference
     # ==========================
-    print("\n▶ Setting up model and training...")
+    print("\n▶ Setting up model and training...", flush=True)
 
-    # vllm model loading prefers clean environment, initialize policy_generation before policy (#52 will fix this)
+    # vllm model loading prefers clean environment, initialize policy_generation before policy in colocated mode
     backend = generation_config["backend"]
     generation_config["model_name"] = policy_config["model_name"]  # Needed for vLLM
 
+    # Dictionary to store worker initialization timing stats for logging
+    worker_init_timing_metrics = {}
+
+    # Prepare checkpoint paths
+    if last_checkpoint_path:
+        weights_path = Path(last_checkpoint_path) / "policy" / "weights"
+        optimizer_path = Path(last_checkpoint_path) / "policy" / "optimizer"
+    else:
+        weights_path = None
+        optimizer_path = None
+
+    if policy_config.get("megatron_cfg", {}).get("enabled", False):
+        ## NOTE: this is equal to the total number of scheduler steps
+        total_train_iters = min(
+            grpo_config["max_num_steps"],
+            grpo_config["max_num_epochs"] * len(dataloader),
+        )
+        policy_config["megatron_cfg"]["train_iters"] = total_train_iters
+
+    # Define initialization functions that will be used in all paths
+    def init_policy():
+        """Initialize policy training workers."""
+        t0 = time.perf_counter()
+        p = Policy(
+            cluster=train_cluster,
+            config=policy_config,
+            tokenizer=tokenizer,
+            processor=processor,
+            weights_path=weights_path,
+            optimizer_path=optimizer_path,
+            init_optimizer=True,
+        )
+        return p, time.perf_counter() - t0
+
+    def init_vllm():
+        """Initialize vLLM generation workers."""
+        t0 = time.perf_counter()
+        pg = VllmGeneration(cluster=inference_cluster, config=generation_config)
+        pg.finish_generation()
+        return pg, time.perf_counter() - t0
+
+    # Handle backend-specific setup
     if backend == "megatron":
+        # Megatron backend: policy_generation is None, only initialize policy
         policy_generation = None
         print(
-            f"  ✓ Using {backend} backend for generation with {policy_config['model_name']}"
+            f"  ✓ Using {backend} backend for generation with {policy_config['model_name']}",
+            flush=True,
         )
+
+        policy, policy_time = init_policy()
+        worker_init_timing_metrics["policy_init_time_s"] = policy_time
+
     elif backend == "vllm":
+        # vLLM backend: setup config, then decide parallel vs sequential init
         generation_config = cast(VllmConfig, generation_config)
-        policy_generation = VllmGeneration(
-            cluster=inference_cluster, config=generation_config
+        if generation_config["vllm_cfg"]["precision"] == "fp8":
+            assert loss_config["use_importance_sampling_correction"] is True, (
+                "Importance sampling must be enabled for vLLM FP8 generation for good convergence!"
+            )
+        generation_config["vllm_cfg"]["hf_overrides"] = policy_config.get(
+            "hf_config_overrides", {}
         )
-        # Worker groups are not initialized until the first call to run something on workergroups.
-        # vllm 0.8 fails in initialization if its called in the first training step since it has no clean view of the GPU memory (HF is sharing the same memory).
-        policy_generation.finish_generation()
+
+        # Determine if parallel initialization is possible (non-colocated mode)
+        use_parallel_init = not colocated_inference
+
+        if use_parallel_init:
+            # Parallel initialization: vLLM and Policy can initialize simultaneously
+            print(
+                "  ⚡ Using parallel worker initialization (non-colocated mode)",
+                flush=True,
+            )
+
+            # Execute both initializations in parallel
+            parallel_start_time = time.perf_counter()
+            with ThreadPoolExecutor(max_workers=2) as executor:
+                vllm_future = executor.submit(init_vllm)
+                policy_future = executor.submit(init_policy)
+                policy_generation, vllm_time = vllm_future.result()
+                policy, policy_time = policy_future.result()
+            parallel_wall_time = time.perf_counter() - parallel_start_time
+
+            # Store timing metrics
+            worker_init_timing_metrics["vllm_init_time_s"] = vllm_time
+            worker_init_timing_metrics["policy_init_time_s"] = policy_time
+            worker_init_timing_metrics["parallel_wall_time_s"] = parallel_wall_time
+            worker_init_timing_metrics["parallel_init_enabled"] = True
+
+        else:
+            # Sequential initialization: colocated mode (GPU memory requires vLLM first)
+            print(
+                "  ⚙️  Using sequential worker initialization (colocated mode)",
+                flush=True,
+            )
+
+            # Initialize vLLM first (clean GPU memory), then policy
+            policy_generation, vllm_time = init_vllm()
+            worker_init_timing_metrics["vllm_init_time_s"] = vllm_time
+
+            policy, policy_time = init_policy()
+            worker_init_timing_metrics["policy_init_time_s"] = policy_time
+            worker_init_timing_metrics["parallel_init_enabled"] = 0.0
+
         print(
-            f"  ✓ Using vLLM backend for generation with {policy_config['model_name']}"
+            f"  ✓ Using vLLM backend for generation with {policy_config['model_name']}",
+            flush=True,
         )
 
-    if last_checkpoint_path:
-        weights_path = Path(last_checkpoint_path) / "policy" / "weights"
-        optimizer_path = Path(last_checkpoint_path) / "policy" / "optimizer"
-    else:
-        weights_path = None
-        optimizer_path = None
+    # Record when worker initialization completes (for calculating other setup time)
+    worker_init_complete_time = time.perf_counter() - setup_start_time
 
-    policy = Policy(
-        cluster=train_cluster,
-        config=policy_config,
-        tokenizer=tokenizer,
-        weights_path=weights_path,
-        optimizer_path=optimizer_path,
-        init_optimizer=True,
-    )
+    # print the node IP and GPU ID of the policy workers for debugging
+    policy.print_node_ip_and_gpu_id()
 
     # if it is not colocated inference, initialize collective communication for update weights
     if not colocated_inference:
+        t0 = time.perf_counter()
         ip, port = train_cluster.get_master_address_and_port()
-        print(f"Using ip: {ip}, port: {port} for collective communication")
-        # inference cluster + head node of the train cluster
-        world_size = inference_nodes * inference_gpus_per_node + 1
+        print(f"Using ip: {ip}, port: {port} for collective communication", flush=True)
+        # world includes all training workers and all inference workers
+        train_world_size = train_cluster.world_size()
+        inference_world_size = inference_nodes * inference_gpus_per_node
+        world_size = train_world_size + inference_world_size
         # init collective
-        futures_train = policy.init_collective(ip, port, world_size)
-        futures_inference = policy_generation.init_collective(ip, port, world_size)  # type: ignore
+        futures_train = policy.init_collective(
+            ip, port, world_size, train_world_size=train_world_size
+        )
+        futures_inference = policy_generation.init_collective(
+            ip, port, world_size, train_world_size=train_world_size
+        )  # type: ignore
         # wait for all futures to complete
         ray.get(futures_train + futures_inference)
+        worker_init_timing_metrics["collective_init_time_s"] = time.perf_counter() - t0
 
     # prepare refit info
     state_dict_info = policy.prepare_refit_info()
@@ -355,9 +581,38 @@ def setup(
 
     loss_fn = ClippedPGLossFn(loss_config)
 
+    # Calculate total setup time
+    total_setup_time = time.perf_counter() - setup_start_time
+    worker_init_timing_metrics["total_setup_time_s"] = total_setup_time
+
+    # Log worker initialization timing metrics to logger
+    if worker_init_timing_metrics:
+        print("\n▶ Worker Initialization Timing:")
+
+        vllm_time = worker_init_timing_metrics.get("vllm_init_time_s", 0)
+        policy_time = worker_init_timing_metrics.get("policy_init_time_s", 0)
+        total_setup = worker_init_timing_metrics.get("total_setup_time_s", 0)
+
+        if vllm_time:
+            print(f"  vLLM init: {vllm_time:.1f}s")
+
+        if policy_time:
+            print(f"  Policy init: {policy_time:.1f}s")
+
+        # Calculate "other" time (time after worker init completes)
+        other_time = total_setup - worker_init_complete_time
+        worker_init_timing_metrics["other_setup_time_s"] = other_time
+        print(f"  Other setup: {other_time:.1f}s")
+
+        print(f"  Total setup: {total_setup:.1f}s")
+
+        # Log all metrics to the logger for analysis
+        logger.log_metrics(worker_init_timing_metrics, step=0, prefix="timing/setup")
+
     print("\n" + "=" * 60)
     print(" " * 18 + "SETUP COMPLETE")
-    print("=" * 60 + "\n")
+    print(f"  Total setup time: {total_setup_time:.1f}s")
+    print("=" * 60 + "\n", flush=True)
 
     return (
         policy,
@@ -378,6 +633,203 @@ def setup(
 # ===============================================================================
 
 
+def normalize_advantages_with_epsilon(
+    advantages: torch.Tensor,
+    std: torch.Tensor,
+    epsilon: float = 1e-6,
+) -> torch.Tensor:
+    """Normalize advantages by standard deviation, skipping samples with zero std.
+
+    When std is exactly zero (from leave-one-out baseline with identical rewards),
+    normalization is skipped for those samples to prevent numerical instability.
+    This makes normalize_rewards compatible with use_leave_one_out_baseline.
+
+    Args:
+        advantages: Tensor of shape (batch_size, 1) containing advantage values
+        std: Tensor of shape (batch_size,) containing standard deviation values
+        epsilon: Small value to avoid division by very small std, defaults to 1e-6
+
+    Returns:
+        Normalized advantages tensor of same shape as input advantages
+    """
+    # Only normalize where std > 0 to avoid division by near-zero
+    non_zero_std_mask = std > 0
+    advantages[non_zero_std_mask] = advantages[non_zero_std_mask] / (
+        std.unsqueeze(-1)[non_zero_std_mask] + epsilon
+    )
+    return advantages
+
+
+def dynamic_sampling(
+    repeated_batch: BatchedDataDict[DatumSpec],
+    std: torch.Tensor,
+    baseline: torch.Tensor,
+    dynamic_sampling_num_gen_batches: int,
+    master_config: MasterConfig,
+    timer: Timer,
+    batch_cache: BatchedDataDict[DatumSpec] = None,
+) -> BatchedDataDict[DatumSpec]:
+    """Implements the dynamic sampling algorithm to select prompts with non-zero standard deviation.
+
+    This function filters the current batch to retain only those prompts that have a non-zero standard deviation.
+    If the current batch has fewer number of prompts with non-zero standard deviation than the required batch size, defined as num_prompts_per_step * num_generations_per_prompt,
+    we store it in the batch_cache to be used in later iterations.
+    If the current batch has more number of prompts with non-zero standard deviation than the required batch size, defined as num_prompts_per_step * num_generations_per_prompt,
+    the batch is sliced to ensure batch size is num_prompts_per_step * num_generations_per_prompt.
+    is_batch_complete is set to False to indicate that the current batch is not enough to meet the required batch size. This is used as a signal in the GRPO training loop
+    to continue sampling or proceed to training.
+    This approach is based on the dynamic sampling algorithm from the DAPO paper:
+    https://arxiv.org/pdf/2503.14476.
+
+    Args:
+        repeated_batch (BatchedDataDict[DatumSpec]): The current batch of data containing prompts, responses, rewards, baselines, and std.
+        std (torch.Tensor): Tensor representing the standard deviation for each prompt group.
+        baseline (torch.Tensor): Baseline values for each prompt group.
+        dynamic_sampling_num_gen_batches (int): Number of generation batches processed at the current step.
+        master_config (MasterConfig): Configuration containing GRPO and policy settings.
+        batch_cache (BatchedDataDict[DatumSpec], optional): Cache storing previously selected prompts with non-zero std.
+
+    Returns:
+        tuple: A tuple containing:
+            - repeated_batch (BatchedDataDict[DatumSpec]): Updated batch with selected prompts.
+            - is_batch_complete (bool): Indicates if the batch has enough samples with non-zero std for training.
+            - batch_cache (BatchedDataDict[DatumSpec]): Updated cache for future iterations.
+    """
+    # is_batch_complete is used to indicate if the current batch was able to generate enough prompts with non-zero std.
+    is_batch_complete = True
+
+    # Required batch size for training
+    train_prompts_size = (
+        master_config["grpo"]["num_prompts_per_step"]
+        * master_config["grpo"]["num_generations_per_prompt"]
+    )
+    # Store the baseline, std and total_reward for the current unfiltered batch.
+    repeated_batch["baseline"] = baseline
+    repeated_batch["std"] = std
+    total_rewards = repeated_batch["total_reward"]
+    dynamic_sampling_metrics = {}
+
+    # Dynamic sampling algorithm (used in DAPO algorithm)
+    # This block implements dynamic sampling by selecting prompt groups with non-zero std.
+    # If sampled prompts (with non-zero std) are fewer than num_prompts_per_step * num_generations_per_prompt, continue sampling until dynamic_sampling_max_gen_batches is reached.
+    if master_config["grpo"]["use_dynamic_sampling"]:
+        with timer.time("dynamic_sampling"):
+            # Get the prompt indices with non-zero std
+            non_zero_std_mask = std != 0.0
+
+            keep_prompt_indices = torch.arange(
+                len(non_zero_std_mask), device=std.device
+            )[non_zero_std_mask].tolist()
+
+            # Only select the inputs that have non-zero std
+            # total_reward is already a part of repeated_batch so we don't need to add it again
+            filtered_repeated_batch = repeated_batch.select_indices(keep_prompt_indices)
+            filtered_repeated_batch["std"] = std[keep_prompt_indices]
+            filtered_repeated_batch["baseline"] = baseline[keep_prompt_indices]
+
+            # Store filtered and total rewards to track them separately
+            filtered_rewards = filtered_repeated_batch["total_reward"]
+            filtered_repeated_batch["total_reward"] = total_rewards
+            filtered_repeated_batch["filtered_reward"] = filtered_rewards
+
+            # Store the total_reward for the current filtered batch.
+            # If none of the prompts in current batch have non-zero std, filtered_repeated_batch.size will be 0.
+            # In this case, the current batch will be ignored and the next batch will be processed and we generate responses for it.
+            if filtered_repeated_batch.size > 0:
+                # Concatenate the previous partially filled batch with the current batch. This serves as a cache to store and collect the prompts with non-zero std.
+                # This is used in the next iteration when the current batch is not enough to fill the buffer.
+                batch_cache = (
+                    filtered_repeated_batch
+                    if batch_cache is None
+                    else BatchedDataDict.from_batches(
+                        [batch_cache, filtered_repeated_batch]
+                    )
+                )
+                filtered_repeated_batch = batch_cache
+
+            filtered_prompts_size = filtered_repeated_batch.size
+            print(
+                f"Detected {filtered_prompts_size} prompts with non-zero std; "
+                f"{train_prompts_size} are required and used for training."
+            )
+
+            # If the generation samples size is smaller than a fixed threshold (train_prompts_size), keep generating by processing the next batch
+            if filtered_prompts_size < train_prompts_size:
+                dynamic_sampling_max_gen_batches = master_config["grpo"][
+                    "dynamic_sampling_max_gen_batches"
+                ]
+                assert dynamic_sampling_max_gen_batches > 0, (
+                    "When using grpo.use_dynamic_sampling, grpo.dynamic_sampling_max_gen_batches must be > 0"
+                )
+                if dynamic_sampling_num_gen_batches <= dynamic_sampling_max_gen_batches:
+                    print(
+                        f"Generation sample buffer size: {filtered_prompts_size} is smaller than train_prompts_size: {train_prompts_size}. Processed {dynamic_sampling_num_gen_batches} batches so far out of {dynamic_sampling_max_gen_batches}."
+                    )
+                    is_batch_complete = False
+                else:
+                    raise ValueError(
+                        f"Dynamic sampling has reached the maximum allowed number of batches ({dynamic_sampling_max_gen_batches}). Consider evaluating the complexity of your data or adjusting the num_prompts_per_step or num_generations_per_prompt parameters to enhance the diversity of the samples."
+                    )
+            else:
+                num_discarded_valid_samples = filtered_prompts_size - train_prompts_size
+                dynamic_sampling_metrics[
+                    "dynamic_sampling_num_discarded_valid_samples"
+                ] = num_discarded_valid_samples
+
+                #  Slice the batch, rewards, baselines and std to ensure batch size is train_prompts_size
+                filtered_repeated_batch = filtered_repeated_batch.slice(
+                    0, train_prompts_size
+                )
+
+    batch_to_return = (
+        filtered_repeated_batch
+        if master_config["grpo"]["use_dynamic_sampling"]
+        else repeated_batch
+    )
+    return batch_to_return, is_batch_complete, batch_cache, dynamic_sampling_metrics
+
+
+def scale_rewards(
+    repeated_batch: BatchedDataDict[DatumSpec], reward_scaling_cfg: RewardScalingConfig
+) -> BatchedDataDict[DatumSpec]:
+    """Linearly scales rewards from a source range to a target range.
+
+    If `reward_scaling.enabled` is True, each reward in `repeated_batch["total_reward"]`
+    is clamped to the configured source interval [source_min, source_max] and then
+    rescaled to the target interval [target_min, target_max].
+
+    Default configuration:
+        source_min = 0.0
+        source_max = 1.0
+        target_min = 0.0
+        target_max = 1.0
+    """
+    if reward_scaling_cfg["enabled"]:
+        rewards = repeated_batch["total_reward"]
+        source_min = float(reward_scaling_cfg["source_min"])
+        source_max = float(reward_scaling_cfg["source_max"])
+        target_min = float(reward_scaling_cfg["target_min"])
+        target_max = float(reward_scaling_cfg["target_max"])
+
+        # Detect out-of-range values
+        out_of_range_mask = (rewards < source_min) | (rewards > source_max)
+        if torch.any(out_of_range_mask):
+            print(
+                f"[reward_scaling] WARNING: {int(out_of_range_mask.sum())} rewards "
+                f"are outside the configured source range [{source_min}, {source_max}]. "
+                f"Values will be clipped before scaling."
+            )
+
+        # Clamp and scale
+        rewards = torch.clamp(rewards, min=source_min, max=source_max)
+        scaled_rewards = target_min + (rewards - source_min) / (
+            source_max - source_min
+        ) * (target_max - target_min)
+        repeated_batch["total_reward"] = scaled_rewards
+
+    return repeated_batch
+
+
 def _should_use_async_rollouts(master_config: MasterConfig) -> bool:
     """Determine if async rollouts should be used based on the configuration.
 
@@ -395,11 +847,35 @@ def _should_use_async_rollouts(master_config: MasterConfig) -> bool:
     return vllm_cfg.get("async_engine", False)
 
 
+def _should_use_penguin(master_config: MasterConfig) -> bool:
+    """Determine if Penguin should be used for rollouts and validation based on the configuration."""
+    env_config = master_config.get("env") or dict()
+    should_use_penguin = bool(env_config.get("should_use_penguin"))
+    if not should_use_penguin:
+        return should_use_penguin
+
+    # Validate the setup for training with Penguin
+    assert _should_use_async_rollouts(master_config), (
+        "❌ Error: In order to use Penguin, you must use vllm generation backend with `async_engine: true`!"
+    )
+
+    generation_config = master_config["policy"]["generation"]
+
+    # We piggyback off of `_should_use_async_rollouts` to guarantee the existence of these configs.
+    should_expose_http_server = generation_config["vllm_cfg"].get("expose_http_server")
+    assert should_expose_http_server, (
+        "In order to use Penguin, you must expose the vllm server via `expose_http_server: true`!"
+    )
+
+    return should_use_penguin
+
+
 def refit_policy_generation(
     policy: ColocatablePolicyInterface,
     policy_generation: GenerationInterface,
     colocated_inference: bool,
     _refit_buffer_size_gb: Optional[int] = None,
+    timer: Optional[Timer] = None,
 ) -> None:
     """Refit the policy generation interface with the latest policy weights.
 
@@ -409,48 +885,59 @@ def refit_policy_generation(
         _refit_buffer_size_gb: The size of the buffer to use for refitting.
             If it is None, the buffer size will be computed by the remaining memory.
             This parameter is primarily used for testing.
+        timer: Optional Timer used to time the prepare/transfer/update phase
     """
     if colocated_inference:
         policy.offload_before_refit()
         policy_generation.prepare_for_generation(tags=["weights"])
 
-    # update weights
-    update_success = False
-    if colocated_inference:
-        # get model param keys, which is grouped by size
-        grouped_param_keys = policy.prepare_weights_for_ipc(
-            _refit_buffer_size_gb=_refit_buffer_size_gb
-        )
-        total_num_keys = sum(len(k) for k in grouped_param_keys)
-        print(
-            f"[Refit] Split {total_num_keys} keys into {len(grouped_param_keys)} groups"
-        )
-        # do update
-        for keys in grouped_param_keys:
-            ipc_handles = policy.get_weights_ipc_handles(keys)
-            update_success = policy_generation.update_weights_from_ipc_handles(
-                ipc_handles
+    # Create a context manager that does nothing when timer is None
+    timer_context = (
+        timer.time("prepare_for_generation/transfer_and_update_weights")
+        if timer is not None
+        else nullcontext()
+    )
+    with timer_context:
+        # update weights
+        update_success = False
+        if colocated_inference:
+            # get model param keys, which is grouped by size
+            if _refit_buffer_size_gb is not None:
+                buffer_size_bytes = _refit_buffer_size_gb * (1024**3)
+            else:
+                # Empirically sets ratio as 30% to maximize efficiency.
+                # The remaining 70% is a necessary buffer reserved for the parameter all-gathering across the expert-parallelism dimension.
+                memory_ratio = os.getenv("NRL_REFIT_BUFFER_MEMORY_RATIO", "0.3")
+                buffer_size_bytes = int(
+                    policy.get_free_memory_bytes() * float(memory_ratio)
+                )
+
+            futures_train = policy.stream_weights_via_ipc_zmq(
+                buffer_size_bytes=buffer_size_bytes
             )
-            if not update_success:
-                break
-    else:
-        # update weights through nccl
-        futures_train = policy.broadcast_weights_for_collective()
-        futures_inference = policy_generation.update_weights_from_collective()
-        # wait for all futures to complete
-        ray.get(futures_train)
-        results = ray.get(futures_inference)
-        update_success = all(result for result in results if result is not None)
-
-    # check if update is successful
-    if not update_success:
-        error_tag = "cuda-ipc" if colocated_inference else "nccl"
-        error_message = (
-            "❌ Error: Updating weights for the generation policy failed during refit.\n"
-            f"This often indicates an issue with {error_tag} or "
-            "a problem within the generation backend (e.g., vLLM worker).\n"
-        )
-        raise RuntimeError(error_message)
+            futures_inference = policy_generation.update_weights_via_ipc_zmq()
+            # wait for all futures to complete
+            ray.get(futures_train)
+            results = ray.get(futures_inference)
+            update_success = all(result for result in results if result is not None)
+        else:
+            # update weights through nccl
+            futures_train = policy.broadcast_weights_for_collective()
+            futures_inference = policy_generation.update_weights_from_collective()
+            # wait for all futures to complete
+            ray.get(futures_train)
+            results = ray.get(futures_inference)
+            update_success = all(result for result in results if result is not None)
+
+        # check if update is successful
+        if not update_success:
+            error_tag = "cuda-ipc" if colocated_inference else "nccl"
+            error_message = (
+                "❌ Error: Updating weights for the generation policy failed during refit.\n"
+                f"This often indicates an issue with {error_tag} or "
+                "a problem within the generation backend (e.g., vLLM worker).\n"
+            )
+            raise RuntimeError(error_message)
 
     if colocated_inference:
         policy.offload_after_refit()
@@ -475,9 +962,16 @@ def grpo_train(
     checkpointer: CheckpointManager,
     grpo_save_state: GRPOSaveState,
     master_config: MasterConfig,
+    processor: Optional[AutoProcessor] = None,
 ) -> None:
     """Run GRPO training algorithm."""
     timer = Timer()
+    timeout = TimeoutChecker(
+        timeout=master_config["checkpointing"]["checkpoint_must_save_by"],
+        fit_last_save_time=True,
+    )
+    timeout.start_iterations()
+
     NEED_REFIT = True
     # If policy_generation is None, use the policy as the generation interface (megatron framework backend)
     if policy_generation is None:
@@ -487,15 +981,28 @@ def grpo_train(
     assert policy_generation is not None  # for mypy type check
 
     # common config/state itmes
-    step = grpo_save_state["step"]
-    consumed_samples = grpo_save_state["consumed_samples"]
-    val_period = master_config["grpo"]["val_period"]
+    current_step = grpo_save_state["current_step"]  # current step within an epoch
+    total_steps = grpo_save_state["total_steps"]  # total steps across all epochs
+    max_num_steps = master_config["grpo"][
+        "max_num_steps"
+    ]  # max number of steps to train for
+    current_epoch = grpo_save_state["current_epoch"]  # current epoch
+    max_num_epochs = master_config["grpo"][
+        "max_num_epochs"
+    ]  # max number of epochs to train for
+    consumed_samples = grpo_save_state[
+        "consumed_samples"
+    ]  # total samples consumed across all epochs
+    total_valid_tokens = grpo_save_state.get(
+        "total_valid_tokens", 0
+    )  # total valid tokens processed across all epochs; default to 0 for backward compatibility with older checkpoints
     val_at_start = master_config["grpo"]["val_at_start"]
+    val_period = master_config["grpo"]["val_period"]
     colocated_inference = master_config["policy"]["generation"]["colocated"]["enabled"]
 
     # Run validation at the start if configured
-    if val_at_start and step == 0:
-        print("\n🔍 Running initial validation...")
+    if val_at_start and current_step == 0:
+        print("\n🔍 Running initial validation...", flush=True)
         if NEED_REFIT and POLICY_GENERATION_STALE:
             refit_policy_generation(policy, policy_generation, colocated_inference)
             POLICY_GENERATION_STALE = False
@@ -510,305 +1017,518 @@ def grpo_train(
             master_config=master_config,
         )
         policy_generation.finish_generation()
-        logger.log_metrics(val_metrics, step, prefix="validation")
-        logger.log_metrics(validation_timings, step, prefix="timing/validation")
+        logger.log_metrics(val_metrics, current_step, prefix="validation")
+        logger.log_metrics(validation_timings, current_step, prefix="timing/validation")
+
+    while current_epoch < max_num_epochs and total_steps < max_num_steps:
+        print(f"\n{'=' * 25} Epoch {current_epoch + 1}/{max_num_epochs} {'=' * 25}")
+        # batch cache is used for DAPO. We store prompts with non-zero standard deviation in this cache.
+        batch_cache: BatchedDataDict[DatumSpec] = None
+        # This is the number of batches we processed so far at each step to generate responses whose std is non-zero. Maximum threshold is set by dynamic_sampling_max_gen_batches. Used in the case of dynamic sampling.
+        dynamic_sampling_num_gen_batches = 0
+
+        # Run grpo/dapo training loop (single-turn)
+        for batch in dataloader:
+            print(
+                f"\n{'=' * 25} Step {current_step + 1}/{min(len(dataloader), max_num_steps)} {'=' * 25}",
+                flush=True,
+            )
+            maybe_gpu_profile_step(policy, total_steps + 1)
+            if policy != policy_generation:
+                maybe_gpu_profile_step(policy_generation, total_steps + 1)
+            val_metrics, validation_timings = None, None
+
+            with timer.time("total_step_time"):
+                # Prepare batch
+                print("▶ Preparing batch...", flush=True)
+                with timer.time("data_processing"):
+                    # Repeat batch items
+                    repeated_batch: BatchedDataDict[DatumSpec] = (
+                        batch.repeat_interleave(
+                            master_config["grpo"]["num_generations_per_prompt"]
+                        )
+                    )
+                    # Convert LLMMessageLogType to FlatMessagesType for generation
+                    batched_flat, input_lengths = batched_message_log_to_flat_message(
+                        repeated_batch["message_log"],
+                        pad_value_dict={"token_ids": tokenizer.pad_token_id},
+                    )
+                    input_ids = batched_flat["token_ids"]
 
-    # Run grpo training (single-turn)
-    batch: BatchedDataDict[DatumSpec]
-    for batch in dataloader:
-        print(
-            f"\n{'=' * 25} Step {step + 1}/{min(len(dataloader), master_config['grpo']['max_num_steps'])} {'=' * 25}"
-        )
-        maybe_gpu_profile_step(policy, step + 1)
-        if policy != policy_generation:
-            maybe_gpu_profile_step(policy_generation, step + 1)
-        val_metrics, validation_timings = None, None
-
-        with timer.time("total_step_time"):
-            # Prepare batch
-            print("▶ Preparing batch...")
-            with timer.time("data_processing"):
-                # Repeat batch items
-                repeated_batch: BatchedDataDict[DatumSpec] = batch.repeat_interleave(
-                    master_config["grpo"]["num_generations_per_prompt"]
+                # Generate responses - this updates the LLMMessageLogType in repeated_batch
+                print(
+                    f"▶ Generating responses for batch of size {repeated_batch.size}...",
+                    flush=True,
                 )
-                # Convert LLMMessageLogType to FlatMessagesType for generation
-                batched_flat, input_lengths = batched_message_log_to_flat_message(
-                    repeated_batch["message_log"],
-                    pad_value_dict={"token_ids": tokenizer.pad_token_id},
+                with timer.time("prepare_for_generation/total"):
+                    if NEED_REFIT and POLICY_GENERATION_STALE:
+                        refit_policy_generation(
+                            policy, policy_generation, colocated_inference, timer=timer
+                        )
+                        POLICY_GENERATION_STALE = False
+                    else:
+                        if colocated_inference:
+                            policy.offload_after_refit()  # unload optimizer to make space for generation
+                        policy_generation.prepare_for_generation()
+
+                dynamic_sampling_num_gen_batches += 1
+                with timer.time("generation"):
+                    # Use penguin rollouts if enabled. We cascade penguin first since penguin requires async rollouts.
+                    if _should_use_penguin(master_config):
+                        generation_config = master_config["policy"]["generation"]
+                        penguin_rollout_result = run_async_penguin_rollout(
+                            policy_generation=policy_generation,
+                            input_batch=repeated_batch,
+                            tokenizer=tokenizer,
+                            task_to_env=task_to_env,
+                            max_seq_len=None,
+                            generation_config=generation_config,
+                            max_rollout_turns=None,
+                            greedy=False,
+                        )
+                        input_ids = penguin_rollout_result.input_ids
+                        repeated_batch = penguin_rollout_result.final_batch
+                        rollout_metrics = penguin_rollout_result.rollout_metrics
+                    # Use async rollouts if vLLM async engine is enabled
+                    elif _should_use_async_rollouts(master_config):
+                        (
+                            repeated_batch,
+                            rollout_metrics,
+                        ) = run_async_multi_turn_rollout(
+                            policy_generation=policy_generation,
+                            input_batch=repeated_batch,
+                            tokenizer=tokenizer,
+                            task_to_env=task_to_env,
+                            max_seq_len=master_config["policy"][
+                                "max_total_sequence_length"
+                            ],
+                            max_rollout_turns=master_config["grpo"][
+                                "max_rollout_turns"
+                            ],
+                            greedy=False,
+                        )
+                    else:
+                        repeated_batch, rollout_metrics = run_multi_turn_rollout(
+                            policy_generation=policy_generation,
+                            input_batch=repeated_batch,
+                            tokenizer=tokenizer,
+                            task_to_env=task_to_env,
+                            max_seq_len=master_config["policy"][
+                                "max_total_sequence_length"
+                            ],
+                            max_rollout_turns=master_config["grpo"][
+                                "max_rollout_turns"
+                            ],
+                            greedy=False,
+                        )
+                    policy_generation.finish_generation()
+
+                repeated_batch = scale_rewards(
+                    repeated_batch, master_config["grpo"]["reward_scaling"]
                 )
-                input_ids = batched_flat["token_ids"]
-
-            # Generate responses - this updates the LLMMessageLogType in repeated_batch
-            print(f"▶ Generating responses for batch of size {repeated_batch.size}...")
-            with timer.time("prepare_for_generation"):
-                if NEED_REFIT and POLICY_GENERATION_STALE:
-                    refit_policy_generation(
-                        policy, policy_generation, colocated_inference
+                # Process rewards with custom reward function
+                if master_config["grpo"]["reward_shaping"]["enabled"]:
+                    repeated_batch = apply_reward_shaping(
+                        repeated_batch, master_config["grpo"]["reward_shaping"]
                     )
-                    POLICY_GENERATION_STALE = False
-                else:
-                    policy_generation.prepare_for_generation()
-
-            with timer.time("generation"):
-                # Use async rollouts if vLLM async engine is enabled
-                if _should_use_async_rollouts(master_config):
-                    (
-                        repeated_batch,
-                        rollout_metrics,
-                    ) = run_async_multi_turn_rollout(
-                        policy_generation=policy_generation,
-                        input_batch=repeated_batch,
-                        tokenizer=tokenizer,
-                        task_to_env=task_to_env,
-                        max_seq_len=master_config["policy"][
-                            "max_total_sequence_length"
+
+                # Calculate rewards & advantages
+                print("▶ Processing rewards...,", flush=True)
+                with timer.time("reward_calculation"):
+                    # Extract rewards from final_batch
+                    rewards = repeated_batch["total_reward"]
+
+                    print("▶ Computing advantages...", flush=True)
+                    baseline, std = calculate_baseline_and_std_per_prompt(
+                        input_ids,
+                        rewards,
+                        torch.ones_like(rewards),
+                        leave_one_out_baseline=master_config["grpo"][
+                            "use_leave_one_out_baseline"
                         ],
-                        max_rollout_turns=master_config["grpo"]["max_rollout_turns"],
-                        greedy=False,
                     )
-                else:
-                    repeated_batch, rollout_metrics = run_multi_turn_rollout(
-                        policy_generation=policy_generation,
-                        input_batch=repeated_batch,
-                        tokenizer=tokenizer,
-                        task_to_env=task_to_env,
-                        max_seq_len=master_config["policy"][
-                            "max_total_sequence_length"
-                        ],
-                        max_rollout_turns=master_config["grpo"]["max_rollout_turns"],
-                        greedy=False,
+                    # Apply dynamic sampling to filter prompts with non-zero std (DAPO algorithm)
+                    repeated_batch, is_batch_complete, batch_cache, ds_metrics = (
+                        dynamic_sampling(
+                            repeated_batch,
+                            std,
+                            baseline,
+                            dynamic_sampling_num_gen_batches,
+                            master_config,
+                            timer,
+                            batch_cache,
+                        )
                     )
-                policy_generation.finish_generation()
-
-            # Calculate rewards & advantages
-            print("▶ Processing rewards...")
-            with timer.time("reward_calculation"):
-                # Extract rewards from final_batch
-                rewards = repeated_batch["total_reward"]
-
-                print("▶ Computing advantages...")
-                baseline, std = calculate_baseline_and_std_per_prompt(
-                    input_ids,
-                    rewards,
-                    torch.ones_like(rewards),
-                    leave_one_out_baseline=master_config["grpo"][
-                        "use_leave_one_out_baseline"
-                    ],
-                )
-                advantages = (rewards - baseline).unsqueeze(-1)
-
-                if master_config["grpo"]["normalize_rewards"]:
-                    # don't sharpen the ones with no variation
-                    zero_std_mask = std > 0
-                    advantages[zero_std_mask] = (
-                        advantages[zero_std_mask] / std.unsqueeze(-1)[zero_std_mask]
+                    if ds_metrics:
+                        ds_metrics["dynamic_sampling_num_gen_batches"] = (
+                            dynamic_sampling_num_gen_batches
+                        )
+                    # Get the updated rewards and baselines. For DAPO, these rewards and baselines only correspond to the prompts with non-zero std.
+                    rewards = (
+                        repeated_batch["total_reward"]
+                        if not master_config["grpo"]["use_dynamic_sampling"]
+                        else repeated_batch["filtered_reward"]
                     )
+                    baseline = repeated_batch["baseline"]
+                    std = repeated_batch["std"]
+
+                    # If the current batch is not enough to fill the buffer during dynamic sampling, we update the cache and process the next batch.
+                    if not is_batch_complete:
+                        continue
+                    advantages = (rewards - baseline).unsqueeze(-1)
+
+                    if master_config["grpo"]["normalize_rewards"]:
+                        advantages = normalize_advantages_with_epsilon(
+                            advantages=advantages,
+                            std=std,
+                        )
 
-            with timer.time("data_processing"):
-                # Add loss mask and advantages to each message in LLMMessageLogType
-                for i, message_log in enumerate(repeated_batch["message_log"]):
-                    for j, message in enumerate(message_log):
-                        if message["role"] == "assistant":
-                            message["token_loss_mask"] = torch.ones_like(
-                                message["token_ids"]
-                            )
-                        else:
-                            message["token_loss_mask"] = torch.zeros_like(
-                                message["token_ids"]
+                with timer.time("data_processing"):
+                    use_overlong_filtering = master_config["grpo"]["overlong_filtering"]
+                    if use_overlong_filtering:
+                        loss_multiplier = repeated_batch["loss_multiplier"].clone()
+                        truncated = repeated_batch["truncated"]
+
+                        if isinstance(truncated, list):
+                            truncated = torch.tensor(truncated, dtype=torch.bool)
+
+                        loss_multiplier[truncated] = 0
+                        repeated_batch["loss_multiplier"] = loss_multiplier
+                    # Add loss mask and advantages to each message in LLMMessageLogType
+                    for i, message_log in enumerate(repeated_batch["message_log"]):
+                        for j, message in enumerate(message_log):
+                            if message["role"] == "assistant":
+                                message["token_loss_mask"] = torch.ones_like(
+                                    message["token_ids"]
+                                )
+                            else:
+                                message["token_loss_mask"] = torch.zeros_like(
+                                    message["token_ids"]
+                                )
+                            if "generation_logprobs" not in message:
+                                message["generation_logprobs"] = torch.zeros_like(
+                                    message["token_ids"], dtype=torch.float32
+                                )
+                            message["advantages"] = advantages[i].expand(
+                                message["token_ids"].shape
                             )
-                        if "generation_logprobs" not in message:
-                            message["generation_logprobs"] = torch.zeros_like(
-                                message["token_ids"], dtype=torch.float32
-                            )
-                        message["advantages"] = advantages[i].expand(
-                            message["token_ids"].shape
-                        )
 
-                # Convert updated LLMMessageLogType to FlatMessagesType for training
-                flat_messages, input_lengths = batched_message_log_to_flat_message(
-                    repeated_batch["message_log"],
-                    pad_value_dict={"token_ids": tokenizer.pad_token_id},
-                    make_sequence_length_divisible_by=master_config["policy"][
-                        "make_sequence_length_divisible_by"
-                    ],
-                )
+                    # Convert updated LLMMessageLogType to FlatMessagesType for training
+                    flat_messages, input_lengths = batched_message_log_to_flat_message(
+                        repeated_batch["message_log"],
+                        pad_value_dict={"token_ids": tokenizer.pad_token_id},
+                        make_sequence_length_divisible_by=master_config["policy"][
+                            "make_sequence_length_divisible_by"
+                        ],
+                    )
 
-                # Create training data from flattened messages
-                train_data = BatchedDataDict[ClippedPGLossDataDict](
-                    {
-                        "input_ids": flat_messages["token_ids"],
-                        "input_lengths": input_lengths,
-                        "advantages": flat_messages["advantages"],
-                        "generation_logprobs": flat_messages["generation_logprobs"],
-                        "token_mask": flat_messages["token_loss_mask"],
-                        "sample_mask": repeated_batch["loss_multiplier"],
-                    }
+                    # Create training data from flattened messages
+                    train_data = BatchedDataDict[ClippedPGLossDataDict](
+                        {
+                            "input_ids": flat_messages["token_ids"],
+                            "input_lengths": input_lengths,
+                            "advantages": flat_messages["advantages"],
+                            "generation_logprobs": flat_messages["generation_logprobs"],
+                            "token_mask": flat_messages["token_loss_mask"],
+                            "sample_mask": repeated_batch["loss_multiplier"],
+                        }
+                    )
+                    # this will be mini-batched inside the policy, so maintain the packed multimodal structure
+                    train_data.update(
+                        flat_messages.get_multimodal_dict(as_tensors=False)
+                    )
+                    train_data.to("cpu")
+
+                print("▶ Preparing for logprob inference...", flush=True)
+                with timer.time("logprob_inference_prep"):
+                    policy.prepare_for_lp_inference()
+
+                print("▶ Computing logprobs...", flush=True)
+                with timer.time("policy_and_reference_logprobs"):
+                    fprop_logprobs = policy.get_logprobs(train_data)["logprobs"]
+                    reference_logprobs = policy.get_reference_policy_logprobs(
+                        train_data
+                    )["reference_logprobs"]
+                    train_data["prev_logprobs"] = fprop_logprobs
+                    train_data["reference_policy_logprobs"] = reference_logprobs
+
+                print("▶ Preparing for training...", flush=True)
+                with timer.time("training_prep"):
+                    policy.prepare_for_training()  # set model train and reload optim to GPU
+                    POLICY_GENERATION_STALE = True
+
+                print("▶ Training policy...", flush=True)
+                with timer.time("policy_training"):
+                    train_results = policy.train(train_data, loss_fn)
+
+                is_last_step = (total_steps + 1 >= max_num_steps) or (
+                    (current_epoch + 1 == max_num_epochs)
+                    and (current_step + 1 == len(dataloader))
                 )
-                train_data.to("cpu")
-
-            print("▶ Preparing for logprob inference...")
-            with timer.time("logprob_inference_prep"):
-                policy.prepare_for_lp_inference()
 
-            print("▶ Computing logprobs...")
-            with timer.time("policy_and_reference_logprobs"):
-                fprop_logprobs = policy.get_logprobs(train_data)["logprobs"]
-                reference_logprobs = policy.get_reference_policy_logprobs(train_data)[
-                    "reference_logprobs"
-                ]
-                train_data["prev_logprobs"] = fprop_logprobs
-                train_data["reference_policy_logprobs"] = reference_logprobs
+                # Run validation if it's a validation step
+                if val_period > 0 and (total_steps + 1) % val_period == 0:
+                    if NEED_REFIT and POLICY_GENERATION_STALE:
+                        refit_policy_generation(
+                            policy, policy_generation, colocated_inference
+                        )
+                        POLICY_GENERATION_STALE = False
+                    else:
+                        if colocated_inference:
+                            policy.offload_after_refit()  # unload optimizer to make space for generation
+                        policy_generation.prepare_for_generation()
+                    val_metrics, validation_timings = validate(
+                        policy_generation,
+                        val_dataloader,
+                        tokenizer,
+                        val_task_to_env,
+                        step=total_steps + 1,
+                        master_config=master_config,
+                    )
+                    policy_generation.finish_generation()
+                    logger.log_metrics(
+                        validation_timings, total_steps + 1, prefix="timing/validation"
+                    )
+                    logger.log_metrics(
+                        val_metrics, total_steps + 1, prefix="validation"
+                    )
 
-            print("▶ Preparing for training...")
-            with timer.time("training_prep"):
-                policy.prepare_for_training()  # set model train and reload optim to GPU
-                POLICY_GENERATION_STALE = True
+                # Get flat advantages and token mask for masked metrics computation
+                flat_advantages = flat_messages["advantages"]
+                flat_token_mask = flat_messages["token_loss_mask"]
 
-            print("▶ Training policy...")
-            with timer.time("policy_training"):
-                train_results = policy.train(train_data, loss_fn)
+                # Filter advantages using token mask (only valid response tokens)
+                response_advantages = torch.masked_select(
+                    flat_advantages, flat_token_mask.bool()
+                )
 
-            is_last_step = step + 1 == min(
-                master_config["grpo"]["max_num_steps"], len(dataloader)
+                metrics = {
+                    "loss": train_results["loss"].numpy(),
+                    "grad_norm": train_results["grad_norm"].numpy(),
+                    "reward": rewards.numpy(),
+                    "mean_prompt_length": repeated_batch["length"].numpy(),
+                    "total_num_tokens": input_lengths.numpy(),
+                    # Add masked advantages tracking metrics (only for valid response tokens)
+                    "advantages/mean": torch.mean(response_advantages).detach().item()
+                    if response_advantages.numel() > 0
+                    else 0.0,
+                    "advantages/max": torch.max(response_advantages).detach().item()
+                    if response_advantages.numel() > 0
+                    else 0.0,
+                    "advantages/min": torch.min(response_advantages).detach().item()
+                    if response_advantages.numel() > 0
+                    else 0.0,
+                    **ds_metrics,
+                }
+                if master_config["grpo"]["use_dynamic_sampling"]:
+                    metrics["filtered_reward"] = rewards.numpy()
+                    metrics["reward"] = repeated_batch["total_reward"].numpy()
+
+                metrics.update(train_results["all_mb_metrics"])
+                for k, v in metrics.items():
+                    if k in {
+                        "lr",
+                        "wd",
+                        "reward",
+                        "filtered_reward",
+                        "global_valid_seqs",
+                        "global_valid_toks",
+                        "mean_prompt_length",
+                    }:
+                        metrics[k] = np.mean(v).item()
+                    else:
+                        metrics[k] = np.sum(v).item()
+
+                metrics.update(rollout_metrics)
+                total_valid_tokens += metrics["global_valid_toks"]
+
+                ## Checkpointing
+                consumed_samples += master_config["grpo"]["num_prompts_per_step"]
+                timeout.mark_iteration()
+
+                should_save_by_step = (
+                    is_last_step
+                    or (total_steps + 1) % master_config["checkpointing"]["save_period"]
+                    == 0
+                )
+                # +1 because step is 0-indexed
+                # Check if timeout-based checkpointing is enabled in config.
+                should_save_by_timeout = timeout.check_save()
+
+                if master_config["checkpointing"]["enabled"] and (
+                    should_save_by_step or should_save_by_timeout
+                ):
+                    policy.prepare_for_training()
+
+                    # +1 because step is 0-indexed
+                    grpo_save_state["current_step"] = current_step + 1
+                    grpo_save_state["total_steps"] = total_steps + 1
+                    grpo_save_state["current_epoch"] = current_epoch
+                    grpo_save_state["total_valid_tokens"] = total_valid_tokens
+                    if val_metrics is not None:
+                        grpo_save_state["val_reward"] = val_metrics["accuracy"]
+                    elif "val_reward" in grpo_save_state:
+                        del grpo_save_state["val_reward"]
+                    grpo_save_state["consumed_samples"] = consumed_samples
+
+                    full_metric_name = master_config["checkpointing"]["metric_name"]
+                    if full_metric_name is not None:
+                        assert full_metric_name.startswith(
+                            "train:"
+                        ) or full_metric_name.startswith("val:"), (
+                            f"metric_name={full_metric_name} must start with 'val:' or 'train:',\n"
+                            f'followed by the corresponding name in the "val" or "train" metrics dictionary.'
+                            f"  If you are using an old config, please updated checkpointing.metric_name to the new format, "
+                            f" e.g. 'val_reward --> 'val:reward'"
+                        )
+                        prefix, metric_name = full_metric_name.split(":", 1)
+                        metrics_source = metrics if prefix == "train" else val_metrics
+                        if not metrics_source:
+                            warnings.warn(
+                                f"You asked to save checkpoints based on {metric_name} but no {prefix} metrics were collected. "
+                                "This checkpoint will not be saved as top-k.",
+                                stacklevel=2,
+                            )
+                            if full_metric_name in grpo_save_state:
+                                del grpo_save_state[full_metric_name]
+                        elif metric_name not in metrics_source:
+                            raise ValueError(
+                                f"Metric {metric_name} not found in {prefix} metrics"
+                            )
+                        else:
+                            grpo_save_state[full_metric_name] = metrics_source[
+                                metric_name
+                            ]
+
+                    with timer.time("checkpointing"):
+                        print(
+                            f"Saving checkpoint for step {total_steps + 1}...",
+                            flush=True,
+                        )
+                        checkpoint_path = checkpointer.init_tmp_checkpoint(
+                            total_steps + 1, grpo_save_state, master_config
+                        )
+                        policy.save_checkpoint(
+                            weights_path=os.path.join(
+                                checkpoint_path, "policy", "weights"
+                            ),
+                            optimizer_path=os.path.join(
+                                checkpoint_path, "policy", "optimizer"
+                            ),
+                            tokenizer_path=os.path.join(
+                                checkpoint_path, "policy", "tokenizer"
+                            ),
+                            checkpointing_cfg=master_config["checkpointing"],
+                        )
+                        torch.save(
+                            dataloader.state_dict(),
+                            os.path.join(checkpoint_path, "train_dataloader.pt"),
+                        )
+                        checkpointer.finalize_checkpoint(checkpoint_path)
+
+            # Logging
+            # Log training data
+            log_data = {"content": flat_messages["content"]}
+            log_data["rewards"] = rewards.tolist()
+            if master_config["grpo"]["use_dynamic_sampling"]:
+                log_data["filtered_rewards"] = rewards.tolist()
+                log_data["rewards"] = repeated_batch["total_reward"].tolist()
+
+            log_data["generation_logprobs"] = train_data["generation_logprobs"].tolist()
+            log_data["prev_logprobs"] = train_data["prev_logprobs"].tolist()
+            log_data["input_lengths"] = input_lengths.tolist()
+            logger.log_batched_dict_as_jsonl(
+                log_data, f"train_data_step{total_steps + 1}.jsonl"
             )
 
-            # Run validation if it's a validation step
-            if val_period > 0 and (step + 1) % val_period == 0:
-                if NEED_REFIT and POLICY_GENERATION_STALE:
-                    refit_policy_generation(
-                        policy, policy_generation, colocated_inference
-                    )
-                    POLICY_GENERATION_STALE = False
-                else:
-                    policy_generation.prepare_for_generation()
-                val_metrics, validation_timings = validate(
-                    policy_generation,
-                    val_dataloader,
-                    tokenizer,
-                    val_task_to_env,
-                    step=step + 1,
-                    master_config=master_config,
+            timing_metrics: dict[str, float] = timer.get_timing_metrics(
+                reduction_op="sum"
+            )  # type: ignore
+            # track example with high token mult prob error above 1.05
+            if metrics["token_mult_prob_error"] > 1.05:
+                logger.log_plot_token_mult_prob_error(
+                    {
+                        "prompt_lengths": repeated_batch["length"],
+                        "full_lengths": input_lengths,
+                        "generation_logprobs": train_data["generation_logprobs"],
+                        "prev_logprobs": train_data["prev_logprobs"],
+                        "token_mask": train_data["token_mask"],
+                        "sample_mask": train_data["sample_mask"],
+                    },
+                    total_steps + 1,
+                    name="train/token_mult_prob_error_plot_sample",
                 )
-                policy_generation.finish_generation()
-                logger.log_metrics(
-                    validation_timings, step + 1, prefix="timing/validation"
+            print("\n📊 Training Results:")
+
+            print(f"  • Loss: {metrics['loss']:.4f}")
+            print(f"  • Generation KL Error: {metrics['gen_kl_error']:.4f}")
+            if master_config["grpo"]["use_dynamic_sampling"]:
+                print(f"  • Avg Filtered Reward: {np.mean(rewards.numpy()):.4f}")
+                print(
+                    f"  • Avg Total Reward: {np.mean(repeated_batch['total_reward'].numpy()):.4f}"
                 )
-                logger.log_metrics(val_metrics, step + 1, prefix="validation")
-
-            ## Checkpointing
-            consumed_samples += master_config["grpo"]["num_prompts_per_step"]
-            if master_config["checkpointing"]["enabled"] and (
-                is_last_step
-                or (step + 1) % master_config["checkpointing"]["save_period"] == 0
-            ):  # +1 because step is 0-indexed
-                policy.prepare_for_training()
-
-                grpo_save_state["step"] = step + 1
-                if val_metrics is not None:
-                    grpo_save_state["val_reward"] = val_metrics["accuracy"]
-                elif "val_reward" in grpo_save_state:
-                    del grpo_save_state["val_reward"]
-                grpo_save_state["consumed_samples"] = consumed_samples
-
-                if master_config["checkpointing"]["metric_name"] is not None:
-                    if (
-                        master_config["checkpointing"]["metric_name"]
-                        not in grpo_save_state
-                    ):
-                        warnings.warn(
-                            f"You asked to save checkpoints based on {master_config['checkpointing']['metric_name']} but the metric is not found in the save state. "
-                            "Saving most recent k checkpoints instead."
-                        )
-                        master_config["checkpointing"]["metric_name"] = None
-
-                with timer.time("checkpointing"):
-                    print(f"Saving checkpoint for step {step + 1}...")
-                    checkpoint_path = checkpointer.init_tmp_checkpoint(
-                        step + 1, grpo_save_state, master_config
-                    )
-                    policy.save_checkpoint(
-                        weights_path=os.path.join(checkpoint_path, "policy", "weights"),
-                        optimizer_path=os.path.join(
-                            checkpoint_path, "policy", "optimizer"
-                        ),
-                        tokenizer_path=os.path.join(
-                            checkpoint_path, "policy", "tokenizer"
-                        ),
-                    )
-                    torch.save(
-                        dataloader.state_dict(),
-                        os.path.join(checkpoint_path, "train_dataloader.pt"),
-                    )
-                    checkpointer.finalize_checkpoint(checkpoint_path)
-                policy.offload_after_refit()
-
-        # Logging
-        # Log training data
-        log_data = {"content": flat_messages["content"]}
-        log_data["rewards"] = rewards.tolist()
-        log_data["generation_logprobs"] = train_data["generation_logprobs"].tolist()
-        log_data["prev_logprobs"] = train_data["prev_logprobs"].tolist()
-        log_data["input_lengths"] = input_lengths.tolist()
-        logger.log_batched_dict_as_jsonl(log_data, f"train_data_step{step}.jsonl")
-
-        metrics = {
-            "loss": train_results["loss"].numpy(),
-            "reward": rewards.numpy(),
-            "grad_norm": train_results["grad_norm"].numpy(),
-        }
-        metrics.update(train_results["all_mb_metrics"])
-        for k, v in metrics.items():
-            if k in {"lr", "wd", "reward", "global_valid_seqs", "global_valid_toks"}:
-                metrics[k] = np.mean(v).item()
             else:
-                metrics[k] = np.sum(v).item()
-        metrics.update(rollout_metrics)
-
-        timing_metrics: dict[str, float] = timer.get_timing_metrics(reduction_op="sum")  # type: ignore
-        # track example with high token mult prob error above 1.05
-        if metrics["token_mult_prob_error"] > 1.05:
-            logger.log_plot_token_mult_prob_error(
-                {
-                    "prompt_lengths": repeated_batch["length"],
-                    "full_lengths": input_lengths,
-                    "generation_logprobs": train_data["generation_logprobs"],
-                    "prev_logprobs": train_data["prev_logprobs"],
-                    "token_mask": train_data["token_mask"],
-                    "sample_mask": train_data["sample_mask"],
-                },
-                step + 1,
-                name="train/token_mult_prob_error_plot_sample",
+                print(f"  • Avg Reward: {np.mean(rewards.numpy()):.4f}")
+            print(
+                f"  • Mean Generation Length: {rollout_metrics['mean_gen_tokens_per_sample']:.4f}",
+                flush=True,
             )
 
-        print("\n📊 Training Results:")
+            print("\n⏱️  Timing:", flush=True)
+            # Display total time first, separately
+            total_time = timing_metrics.get("total_step_time", 0)
 
-        print(f"  • Loss: {metrics['loss']:.4f}")
-        print(f"  • Avg Reward: {np.mean(rewards.numpy()):.4f}")
-        print(
-            f"  • Mean Generation Length: {rollout_metrics['mean_gen_tokens_per_sample']:.4f}"
-        )
+            number_of_samples_per_step = (
+                master_config["grpo"]["num_prompts_per_step"]
+                * master_config["grpo"]["num_generations_per_prompt"]
+            )
+            total_num_gpus = (
+                master_config["cluster"]["num_nodes"]
+                * master_config["cluster"]["gpus_per_node"]
+            )
 
-        print("\n⏱️  Timing:")
-        # Display total time first, separately
-        total_time = timing_metrics.get("total_step_time", 0)
-        print(f"  • Total step time: {total_time:.2f}s")
+            print(f"  • Total step time: {total_time:.2f}s", flush=True)
 
-        # Display all other timing metrics
-        for k, v in sorted(
-            timing_metrics.items(), key=lambda item: item[1], reverse=True
-        ):
-            if k != "total_step_time":
-                percent = (v / total_time * 100) if total_time > 0 else 0
-                print(f"  • {k}: {v:.2f}s ({percent:.1f}%)")
+            # Display all other timing metrics
+            for k, v in sorted(
+                timing_metrics.items(), key=lambda item: item[1], reverse=True
+            ):
+                if k != "total_step_time":
+                    percent = (v / total_time * 100) if total_time > 0 else 0
+                    print(f"  • {k}: {v:.2f}s ({percent:.1f}%)", flush=True)
 
-        logger.log_metrics(metrics, step + 1, prefix="train")
-        logger.log_metrics(timing_metrics, step + 1, prefix="timing/train")
+            timing_metrics["valid_tokens_per_sec_per_gpu"] = (
+                metrics["global_valid_toks"] / total_time / total_num_gpus
+            )
+            performance_metrics = print_performance_metrics(
+                train_results, metrics, timing_metrics, master_config
+            )
 
-        timer.reset()
-        step += 1
-        if step >= master_config["grpo"]["max_num_steps"]:
-            break
+            logger.log_metrics(metrics, total_steps + 1, prefix="train")
+            logger.log_metrics(
+                performance_metrics, total_steps + 1, prefix="performance"
+            )
+            logger.log_metrics(timing_metrics, total_steps + 1, prefix="timing/train")
+
+            # Reset the batch and set dynamic_sampling_num_gen_batches to 0
+            batch_cache = None
+            dynamic_sampling_num_gen_batches = 0
+
+            timer.reset()
+            current_step += 1
+            total_steps += 1
+            if should_save_by_timeout:
+                print("Timeout has been reached, stopping training early", flush=True)
+                return
+            if total_steps >= max_num_steps:
+                print(
+                    "Max number of steps has been reached, stopping training early",
+                    flush=True,
+                )
+                return
+
+        current_epoch += 1
+        current_step = 0  # Reset step counter for new epoch
 
 
 def validate(
@@ -821,12 +1541,15 @@ def validate(
 ) -> tuple[dict[str, Any], dict[str, Any]]:
     """Run validation on the validation dataset."""
     if val_dataloader is None:
-        print("  ⚠️ No validation dataloader provided, skipping validation")
+        assert val_dataloader is not None or master_config["dpo"]["val_period"] == 0, (
+            "val_dataloader is None, so dpo.val_period must be 0"
+        )
+        print("  ⚠️ No validation dataloader provided, skipping validation", flush=True)
         return {}, {}
 
     timer = Timer()
     with timer.time("total_validation_time"):
-        print(f"▶ Starting validation at step {step}...")
+        print(f"▶ Starting validation at step {step}...", flush=True)
 
         total_rewards = []
         total_lengths = []
@@ -840,9 +1563,26 @@ def validate(
             if batch_idx >= max_batches:
                 break
 
+            additional_metrics_to_report = dict()
             # Generate responses (updates the LLMMessageLogType in batch_with_msg_logs)
             # Use async rollouts if vLLM async engine is enabled
-            if _should_use_async_rollouts(master_config):
+            # We cascade penguin first since penguin also uses async rollouts.
+            if _should_use_penguin(master_config):
+                generation_config = master_config["policy"]["generation"]
+                penguin_rollout_result = run_async_penguin_rollout(
+                    policy_generation=policy_generation,
+                    input_batch=val_batch,
+                    tokenizer=tokenizer,
+                    task_to_env=val_task_to_env,
+                    max_seq_len=None,
+                    generation_config=generation_config,
+                    max_rollout_turns=None,
+                    greedy=False,
+                )
+                val_batch = penguin_rollout_result.final_batch
+                gen_metrics = penguin_rollout_result.rollout_metrics
+                additional_metrics_to_report = gen_metrics
+            elif _should_use_async_rollouts(master_config):
                 val_batch, gen_metrics = run_async_multi_turn_rollout(
                     policy_generation,
                     val_batch,
@@ -862,9 +1602,8 @@ def validate(
                     max_rollout_turns=master_config["grpo"]["max_rollout_turns"],
                     greedy=False,
                 )
-            rewards = val_batch["total_reward"]
 
-            total_rewards.extend(rewards.tolist())
+            total_rewards.extend(val_batch["total_reward"].tolist())
             total_lengths.append(gen_metrics["mean_gen_tokens_per_sample"])
 
             # Collect message logs for later display
@@ -878,12 +1617,23 @@ def validate(
             all_message_logs.extend(to_env)
 
         # Calculate validation metrics
-        accuracy = sum(total_rewards) / len(total_rewards)
-        avg_length = sum(total_lengths) / len(total_lengths)
+        num_samples = len(total_rewards)
+        if num_samples > 0:
+            rewards_t = torch.tensor(total_rewards, dtype=torch.float32)
+            # Unscaled binary reward values range = {0.0, 1.0}
+            correct_response_reward = torch.tensor(1.0, dtype=torch.float32)
+            accuracy = (rewards_t == correct_response_reward).float().mean().item()
+        else:
+            accuracy = 0.0
+
+        avg_length = (
+            sum(total_lengths) / len(total_lengths) if len(total_lengths) > 0 else 0.0
+        )
 
         val_metrics = {
             "accuracy": accuracy,
             "avg_length": avg_length,
+            **additional_metrics_to_report,
         }
 
         # Print sample conversations only once at the end of validation
@@ -899,7 +1649,7 @@ def validate(
             )
         except Exception as e:
             print(f"\n  ⚠️ Error displaying message samples: {str(e)}")
-            print("  ⚠️ Continuing validation without displaying samples...")
+            print("  ⚠️ Continuing validation without displaying samples...", flush=True)
 
     # Get timing metrics
     timing_metrics = timer.get_timing_metrics(reduction_op="sum")
@@ -909,14 +1659,751 @@ def validate(
     print("\n📊 Validation Results:")
     print(f"    • Accuracy: {accuracy:.4f}")
     print(f"    • Average response length: {avg_length:.1f} tokens")
-    print(f"    • Samples processed: {len(total_rewards)}")
+    print(f"    • Samples processed: {len(total_rewards)}", flush=True)
 
     # Print timing information
     print("\n  ⏱️  Validation Timing:")
     validation_time = timing_metrics.get("total_validation_time", 0)
-    print(f"    • Total validation time: {validation_time:.2f}s")
+    print(f"    • Total validation time: {validation_time:.2f}s", flush=True)
 
     # Make sure to reset the timer after validation
     timer.reset()
 
+    # Explicit GPU memory cleanup after validation
+    gc.collect()
+    torch.cuda.empty_cache()
+
     return val_metrics, timing_metrics
+
+
+def async_grpo_train(
+    policy: ColocatablePolicyInterface,
+    policy_generation: Optional[GenerationInterface],
+    dataloader: StatefulDataLoader,
+    val_dataloader: Optional[StatefulDataLoader],
+    tokenizer: TokenizerType,
+    loss_fn: LossFunction,
+    task_to_env: dict[str, EnvironmentInterface],
+    val_task_to_env: Optional[dict[str, EnvironmentInterface]],
+    logger: Logger,
+    checkpointer: CheckpointManager,
+    grpo_save_state: GRPOSaveState,
+    master_config: MasterConfig,
+    max_trajectory_age_steps: int = 1,
+) -> None:
+    """Run asynchronous GRPO training with replay buffer.
+
+    Args:
+        policy: Training policy
+        policy_generation: Generation interface
+        dataloader: Training data loader
+        val_dataloader: Validation data loader
+        tokenizer: Tokenizer
+        loss_fn: Loss function
+        task_to_env: Training environments
+        val_task_to_env: Validation environments
+        logger: Logger
+        checkpointer: Checkpoint manager
+        grpo_save_state: Training state
+        master_config: Master configuration
+        max_trajectory_age_steps: Maximum age (in training steps) for trajectories to be used in training
+    """
+    # Ensure we are running with a compatible async generation backend
+    assert _should_use_async_rollouts(master_config), (
+        "Async GRPO requires vLLM backend with vllm_cfg.async_engine=True. "
+        "Set policy.generation.vllm_cfg.async_engine to true in your config."
+    )
+    assert master_config["loss_fn"]["use_importance_sampling_correction"] is True, (
+        "Importance sampling correction must be enabled for async GRPO for good convergence due to off-policy samples!"
+    )
+
+    if master_config["grpo"]["async_grpo"]["max_trajectory_age_steps"] > 1:
+        if not master_config["grpo"]["async_grpo"].get(
+            "in_flight_weight_updates", False
+        ):
+            print(
+                "⚠️ WARNING: In-flight weight updates must be enabled for async GRPO with max_trajectory_age_steps > 1. "
+                "Without in-flight weight updates, having more max_trajectory_age_steps will not give any performance benefit."
+            )
+
+    # Import async utilities only when needed
+    from nemo_rl.algorithms.async_utils import AsyncTrajectoryCollector, ReplayBuffer
+
+    timer = Timer()
+    timeout = TimeoutChecker(
+        timeout=master_config["checkpointing"]["checkpoint_must_save_by"],
+        fit_last_save_time=True,
+    )
+    timeout.start_iterations()
+    NEED_REFIT = True
+
+    # Setup generation interface
+    if policy_generation is None:
+        policy_generation = policy
+        NEED_REFIT = False
+    POLICY_GENERATION_STALE = True
+    assert policy_generation is not None
+
+    # Training state
+    step = grpo_save_state["current_step"]
+    weight_version = step  # Tracks refitted weight versions
+    consumed_samples = grpo_save_state["consumed_samples"]
+    total_valid_tokens = grpo_save_state.get(
+        "total_valid_tokens", 0
+    )  # Default to 0 for backward compatibility with older checkpoints
+    val_period = master_config["grpo"]["val_period"]
+    val_at_start = master_config["grpo"]["val_at_start"]
+    colocated_inference = master_config["policy"]["generation"]["colocated"]["enabled"]
+
+    assert not colocated_inference, (
+        "Colocated inference is not supported for async GRPO. Please use non-colocated inference."
+    )
+
+    # Calculate minimum buffer size from training requirements
+    # In per-prompt buffer mode, one buffer entry is 1 prompt * num_generations_per_prompt
+    num_prompts_per_step = master_config["grpo"]["num_prompts_per_step"]
+    samples_per_prompt_group = master_config["grpo"]["num_generations_per_prompt"]
+    train_gbs = master_config["policy"]["train_global_batch_size"]
+
+    # Ensure the buffer has at least one step worth of prompt-groups before training
+    min_trajectories_needed = num_prompts_per_step
+
+    print("📊 Buffer requirements calculation:")
+    print(f"   - num_prompts_per_step: {num_prompts_per_step}")
+    print(f"   - num_generations_per_prompt: {samples_per_prompt_group}")
+    print(f"   - samples_per_prompt_group: {samples_per_prompt_group}")
+    print(f"   - train_global_batch_size: {train_gbs}")
+    print(f"   - min_trajectories_needed: {min_trajectories_needed} (async mode)")
+
+    _replay_py_exec = get_actor_python_env(
+        "nemo_rl.algorithms.async_utils.ReplayBuffer"
+    )
+    if _replay_py_exec.startswith("uv"):
+        # Lazily build a dedicated venv across all Ray nodes on-demand.
+        _replay_py_exec = create_local_venv_on_each_node(
+            _replay_py_exec,
+            "nemo_rl.algorithms.async_utils.ReplayBuffer",
+        )
+
+    _replay_runtime_env = {
+        "py_executable": _replay_py_exec,
+        "env_vars": {
+            **os.environ,
+            "VIRTUAL_ENV": _replay_py_exec,
+            "UV_PROJECT_ENVIRONMENT": _replay_py_exec,
+        },
+    }
+
+    # Calculate optimal buffer size based on generation limits to prevent length bias
+    # Each weight version generates exactly num_prompts_per_step trajectories
+    # With max_age_steps, we keep trajectories from multiple weight versions
+    num_prompts_per_step = master_config["grpo"]["num_prompts_per_step"]
+    late_arrival_slack = 2
+    optimal_buffer_size = (
+        num_prompts_per_step * max_trajectory_age_steps * late_arrival_slack
+    )
+
+    replay_buffer = ReplayBuffer.options(runtime_env=_replay_runtime_env).remote(
+        max_size=optimal_buffer_size
+    )
+
+    _tc_py_exec = get_actor_python_env(
+        "nemo_rl.algorithms.async_utils.AsyncTrajectoryCollector"
+    )
+    if _tc_py_exec.startswith("uv"):
+        _tc_py_exec = create_local_venv_on_each_node(
+            _tc_py_exec,
+            "nemo_rl.algorithms.async_utils.AsyncTrajectoryCollector",
+        )
+
+    _tc_runtime_env = {
+        "py_executable": _tc_py_exec,
+        "env_vars": {
+            **os.environ,
+            "VIRTUAL_ENV": _tc_py_exec,
+            "UV_PROJECT_ENVIRONMENT": _tc_py_exec,
+        },
+    }
+
+    # Initialize trajectory collector with synchronized collection
+    trajectory_collector = AsyncTrajectoryCollector.options(
+        runtime_env=_tc_runtime_env
+    ).remote(
+        policy_generation=policy_generation,
+        tokenizer=tokenizer,
+        task_to_env=task_to_env,
+        master_config=master_config,
+        replay_buffer=replay_buffer,
+        start_step=step,
+    )
+
+    # Start trajectory collection in background
+    collection_task = trajectory_collector.start_collection.remote(dataloader)
+
+    # Ensure collector knows initial weight version
+    trajectory_collector.set_weight_version.remote(weight_version)
+
+    print("📦 Started continuous background trajectory collection")
+
+    print(
+        f"🚀 Starting async GRPO training with buffer_size={optimal_buffer_size}, max_age={max_trajectory_age_steps} steps"
+    )
+
+    print("⏳ Preparing policy generation for training...")
+    if NEED_REFIT and POLICY_GENERATION_STALE:
+        print("🔄 Refitting policy generation with actual model weights...")
+        try:
+            refit_policy_generation(policy, policy_generation, colocated_inference)
+            print("✅ Policy generation refit completed successfully")
+            POLICY_GENERATION_STALE = False
+        except Exception as e:
+            print(f"❌ Policy generation refit failed: {e}")
+            import traceback
+
+            traceback.print_exc()
+            return
+    else:
+        print("🔄 Preparing policy generation for inference...")
+        try:
+            policy_generation.prepare_for_generation()
+            print("✅ Policy generation preparation completed successfully")
+        except Exception as e:
+            print(f"❌ Policy generation preparation failed: {e}")
+            import traceback
+
+            traceback.print_exc()
+            return
+
+    print("✅ Policy generation setup complete, proceeding to validation...")
+
+    # Run validation at start if configured
+    if val_at_start and step == 0:
+        print("\n🔍 Running initial validation...")
+        # Pause trajectory collection during initial validation
+        trajectory_collector.pause.remote()
+
+        try:
+            val_metrics, validation_timings = validate(
+                policy_generation,
+                val_dataloader,
+                tokenizer,
+                val_task_to_env,
+                step=0,
+                master_config=master_config,
+            )
+            policy_generation.finish_generation()
+            logger.log_metrics(val_metrics, step, prefix="validation")
+            logger.log_metrics(validation_timings, step, prefix="timing/validation")
+            print("✅ Initial validation completed successfully")
+        except Exception as e:
+            print(f"❌ Initial validation failed: {e}")
+            import traceback
+
+            traceback.print_exc()
+            # Continue anyway since validation is optional
+        finally:
+            # Resume trajectory collection after initial validation
+            trajectory_collector.resume.remote()
+
+    print("✅ All setup complete, starting buffer wait...")
+
+    # Wait for initial buffer fill
+    print(
+        f"⏳ Waiting for replay buffer to have sufficient trajectories ({min_trajectories_needed} trajectories)..."
+    )
+    wait_iterations = 0
+    while True:
+        buffer_size_current = ray.get(replay_buffer.size.remote())
+
+        print(
+            f"  Wait iteration {wait_iterations}: buffer_filled_ratio={buffer_size_current}/{min_trajectories_needed}"
+        )
+
+        if buffer_size_current >= min_trajectories_needed:
+            break
+
+        time.sleep(1.0)
+
+    print("✅ Buffer ready! Starting training loop...")
+
+    # Main training loop
+    try:
+        while step < master_config["grpo"]["max_num_steps"]:
+            print(
+                f"\n{'=' * 25} Step {step + 1}/{master_config['grpo']['max_num_steps']} {'=' * 25}"
+            )
+            maybe_gpu_profile_step(policy, step + 1)
+            if policy != policy_generation:
+                maybe_gpu_profile_step(policy_generation, step + 1)
+
+            with timer.time("total_step_time"):
+                # Sample trajectories from replay buffer
+                print("📦 Sampling from replay buffer...")
+                with timer.time("exposed_generation"):
+                    buffer_size_current = ray.get(replay_buffer.size.remote())
+                    print(
+                        f"📊 Step coordination: training_step={step}, max_age={max_trajectory_age_steps}, buffer_size={buffer_size_current}"
+                    )
+
+                    # Sample the required number of per-prompt groups.
+                    num_prompt_groups_needed = master_config["grpo"][
+                        "num_prompts_per_step"
+                    ]
+                    sample_result = ray.get(
+                        replay_buffer.sample.remote(
+                            num_prompt_groups=num_prompt_groups_needed,
+                            current_weight_version=weight_version,
+                            max_age_steps=max_trajectory_age_steps,
+                        )
+                    )
+
+                    if (
+                        sample_result is None
+                        or len(sample_result["trajectories"])
+                        != num_prompt_groups_needed
+                    ):
+                        print(
+                            "⏳ Buffer empty or not enough groups to form a full step, waiting..."
+                        )
+
+                        # Get buffer debug info to help diagnose the issue
+                        buffer_debug = ray.get(replay_buffer.get_debug_info.remote())
+                        buffer_size = buffer_debug["total_trajectories"]
+
+                        if buffer_size > 0:
+                            print(
+                                f"🔍 Debug: Buffer has {buffer_size} trajectories but sampling requires exactly {num_prompt_groups_needed}."
+                            )
+                            print(f"   Current weight version: {weight_version}")
+                            print(f"   Max trajectory age: {max_trajectory_age_steps}")
+                            print(
+                                f"   Trajectory versions in buffer: {buffer_debug['trajectory_versions']}"
+                            )
+
+                        time.sleep(0.5)
+                        continue
+
+                    # Extract trajectories and metadata from sample result
+                    trajectories = sample_result["trajectories"]
+                    avg_trajectory_age = sample_result["avg_trajectory_age"]
+
+                    print(
+                        f"✅ Sampled {len(trajectories)} trajectory groups from buffer (avg age: {avg_trajectory_age:.2f} steps)"
+                    )
+
+                    # Concatenate per-prompt groups into a single training batch
+                    per_prompt_batches = [t["batch"] for t in trajectories]
+                    repeated_batch = BatchedDataDict.from_batches(per_prompt_batches)
+                    # Aggregate rollout metrics across groups (simple mean where applicable)
+                    rollout_metrics = {}
+                    for t in trajectories:
+                        for k, v in t["rollout_metrics"].items():
+                            rollout_metrics.setdefault(k, []).append(v)
+                    # TODO: this simple averaging might cause misleading information for such data as max_gen_tokens, etc.
+                    rollout_metrics = {
+                        k: (sum(v) / len(v) if isinstance(v[0], (int, float)) else v)
+                        for k, v in rollout_metrics.items()
+                    }
+
+                # Enforce fixed training batch: num_prompts_per_step * num_generations_per_prompt
+                expected_batch_size = (
+                    master_config["grpo"]["num_prompts_per_step"]
+                    * master_config["grpo"]["num_generations_per_prompt"]
+                )
+                if repeated_batch.size != expected_batch_size:
+                    print(
+                        f"❌ Unexpected training batch size: got {repeated_batch.size}, expected {expected_batch_size}. Skipping step and waiting for correct buffer content."
+                    )
+                    time.sleep(0.5)
+                    continue
+
+                # Optional sanity: ensure DP divisibility to avoid sharding issues
+                dp_size = policy.sharding_annotations.get_axis_size("data_parallel")
+                if expected_batch_size % dp_size != 0:
+                    raise AssertionError(
+                        f"Configuration error: (num_prompts_per_step * num_generations_per_prompt) = {expected_batch_size} must be divisible by data_parallel size {dp_size}."
+                    )
+
+                print(f"Got trajectory batch (size: {repeated_batch.size})")
+
+                print("▶ Processing rewards...")
+                with timer.time("reward_calculation"):
+                    prompt_only_message_logs = []
+                    for message_log in repeated_batch["message_log"]:
+                        prompt_only_log = []
+                        for message in message_log:
+                            if message["role"] == "user" or message["role"] == "system":
+                                prompt_only_log.append(message)
+                        prompt_only_message_logs.append(prompt_only_log)
+
+                    prompt_batched_flat, prompt_input_lengths = (
+                        batched_message_log_to_flat_message(
+                            prompt_only_message_logs,
+                            pad_value_dict={"token_ids": tokenizer.pad_token_id},
+                        )
+                    )
+                    prompt_only_ids = prompt_batched_flat["token_ids"]
+
+                    rewards = repeated_batch["total_reward"]
+
+                    print("▶ Computing advantages...")
+
+                    baseline, std = calculate_baseline_and_std_per_prompt(
+                        prompt_only_ids,
+                        rewards,
+                        torch.ones_like(rewards),
+                        leave_one_out_baseline=master_config["grpo"][
+                            "use_leave_one_out_baseline"
+                        ],
+                    )
+                    advantages = (rewards - baseline).unsqueeze(-1)
+
+                    print(
+                        f"  📊 Rewards stats: min={rewards.min():.4f}, max={rewards.max():.4f}, mean={rewards.mean():.4f}, std={rewards.std():.4f}"
+                    )
+                    print(
+                        f"  📊 Baseline stats: min={baseline.min():.4f}, max={baseline.max():.4f}, mean={baseline.mean():.4f}"
+                    )
+                    print(
+                        f"  📊 Advantages stats: min={advantages.min():.4f}, max={advantages.max():.4f}, mean={advantages.mean():.4f}, std={advantages.std():.4f}"
+                    )
+
+                    if master_config["grpo"]["normalize_rewards"]:
+                        advantages = normalize_advantages_with_epsilon(
+                            advantages=advantages,
+                            std=std,
+                        )
+
+                        print(
+                            f"  📊 Normalized advantages stats: min={advantages.min():.4f}, max={advantages.max():.4f}, mean={advantages.mean():.4f}, std={advantages.std():.4f}"
+                        )
+
+                # Prepare training data (same as sync version)
+                with timer.time("data_processing"):
+                    # Add loss mask and advantages to each message
+                    for i, message_log in enumerate(repeated_batch["message_log"]):
+                        for j, message in enumerate(message_log):
+                            if message["role"] == "assistant":
+                                message["token_loss_mask"] = torch.ones_like(
+                                    message["token_ids"]
+                                )
+                            else:
+                                message["token_loss_mask"] = torch.zeros_like(
+                                    message["token_ids"]
+                                )
+                            if "generation_logprobs" not in message:
+                                message["generation_logprobs"] = torch.zeros_like(
+                                    message["token_ids"], dtype=torch.float32
+                                )
+                            message["advantages"] = advantages[i].expand(
+                                message["token_ids"].shape
+                            )
+
+                    # Convert to flat format for training
+                    flat_messages, input_lengths = batched_message_log_to_flat_message(
+                        repeated_batch["message_log"],
+                        pad_value_dict={"token_ids": tokenizer.pad_token_id},
+                        make_sequence_length_divisible_by=master_config["policy"][
+                            "make_sequence_length_divisible_by"
+                        ],
+                    )
+
+                    # Create training data
+                    train_data = BatchedDataDict[ClippedPGLossDataDict](
+                        {
+                            "input_ids": flat_messages["token_ids"],
+                            "input_lengths": input_lengths,
+                            "advantages": flat_messages["advantages"],
+                            "generation_logprobs": flat_messages["generation_logprobs"],
+                            "token_mask": flat_messages["token_loss_mask"],
+                            "sample_mask": repeated_batch["loss_multiplier"],
+                        }
+                    )
+                    train_data.to("cpu")
+
+                # Training phase (same as sync version)
+                print("▶ Preparing for logprob inference...")
+                with timer.time("logprob_inference_prep"):
+                    policy.prepare_for_lp_inference()
+
+                print("▶ Computing logprobs...")
+                with timer.time("policy_and_reference_logprobs"):
+                    fprop_logprobs = policy.get_logprobs(train_data)["logprobs"]
+                    reference_logprobs = policy.get_reference_policy_logprobs(
+                        train_data
+                    )["reference_logprobs"]
+                    train_data["prev_logprobs"] = fprop_logprobs
+                    train_data["reference_policy_logprobs"] = reference_logprobs
+
+                print("▶ Preparing for training...")
+                with timer.time("training_prep"):
+                    policy.prepare_for_training()
+                    POLICY_GENERATION_STALE = True
+
+                print("▶ Training policy...")
+                with timer.time("policy_training"):
+                    train_results = policy.train(train_data, loss_fn)
+
+                print("🔄 Synchronizing policy weights to trajectory collector…")
+                if NEED_REFIT:
+                    # Measure pending-generation wait as exposed_generation time
+                    print("🔄 Coordinating with trajectory collector before refit...")
+                    with timer.time("exposed_generation"):
+                        ray.get(trajectory_collector.prepare_for_refit.remote())
+
+                    # Only the actual refit/weight transfer should be counted as weight_sync
+                    print("🔄 Performing policy generation refit...")
+                    with timer.time("weight_sync"):
+                        refit_policy_generation(
+                            policy, policy_generation, colocated_inference
+                        )
+                        POLICY_GENERATION_STALE = False
+
+                        # Update weight version before resuming trajectory collection so that all trajectories are updated with the new correct weight version
+                        weight_version += 1
+                        trajectory_collector.set_weight_version.remote(weight_version)
+                        trajectory_collector.resume_after_refit.remote()
+
+                # Validation
+                val_metrics, validation_timings = None, None
+                is_last_step = step + 1 == master_config["grpo"]["max_num_steps"]
+
+                if val_period > 0 and (step + 1) % val_period == 0:
+                    # Pause trajectory collection during validation to reduce memory pressure
+                    trajectory_collector.pause.remote()
+
+                    if NEED_REFIT and POLICY_GENERATION_STALE:
+                        refit_policy_generation(
+                            policy, policy_generation, colocated_inference
+                        )
+                        POLICY_GENERATION_STALE = False
+                    else:
+                        policy_generation.prepare_for_generation()
+                    val_metrics, validation_timings = validate(
+                        policy_generation,
+                        val_dataloader,
+                        tokenizer,
+                        val_task_to_env,
+                        step=step + 1,
+                        master_config=master_config,
+                    )
+                    policy_generation.finish_generation()
+                    logger.log_metrics(
+                        validation_timings, step + 1, prefix="timing/validation"
+                    )
+                    logger.log_metrics(val_metrics, step + 1, prefix="validation")
+
+                    # Explicit GPU memory cleanup after validation in async mode
+                    import gc
+
+                    gc.collect()
+                    torch.cuda.empty_cache()
+
+                    # Resume trajectory collection after validation
+                    trajectory_collector.resume.remote()
+                # Get flat advantages and token mask for masked metrics computation
+                flat_advantages = flat_messages["advantages"]
+                flat_token_mask = flat_messages["token_loss_mask"]
+
+                # Filter advantages using token mask (only valid response tokens)
+                response_advantages = torch.masked_select(
+                    flat_advantages, flat_token_mask.bool()
+                )
+
+                metrics = {
+                    "loss": train_results["loss"].numpy(),
+                    "reward": rewards.numpy(),
+                    "grad_norm": train_results["grad_norm"].numpy(),
+                    "mean_prompt_length": repeated_batch["length"].numpy(),
+                    "total_num_tokens": input_lengths.numpy(),
+                    # Add masked advantages tracking metrics (only for valid response tokens)
+                    "advantages/mean": torch.mean(response_advantages).detach().item()
+                    if response_advantages.numel() > 0
+                    else 0.0,
+                    "advantages/max": torch.max(response_advantages).detach().item()
+                    if response_advantages.numel() > 0
+                    else 0.0,
+                    "advantages/min": torch.min(response_advantages).detach().item()
+                    if response_advantages.numel() > 0
+                    else 0.0,
+                }
+                metrics.update(train_results["all_mb_metrics"])
+                for k, v in metrics.items():
+                    if k in {
+                        "lr",
+                        "wd",
+                        "reward",
+                        "global_valid_seqs",
+                        "global_valid_toks",
+                        "mean_prompt_length",
+                    }:
+                        metrics[k] = np.mean(v).item()
+                    else:
+                        metrics[k] = np.sum(v).item()
+                metrics.update(rollout_metrics)
+                total_valid_tokens += metrics["global_valid_toks"]
+
+                # Checkpointing (same as sync version)
+                consumed_samples += master_config["grpo"]["num_prompts_per_step"]
+                timeout.mark_iteration()
+
+                should_save_by_step = (
+                    is_last_step
+                    or (step + 1) % master_config["checkpointing"]["save_period"] == 0
+                )
+                # +1 because step is 0-indexed
+                # Check if timeout-based checkpointing is enabled in config.
+                should_save_by_timeout = timeout.check_save()
+
+                if master_config["checkpointing"]["enabled"] and (
+                    should_save_by_step or should_save_by_timeout
+                ):
+                    policy.prepare_for_training()
+
+                    grpo_save_state["current_step"] = step + 1
+                    grpo_save_state["total_valid_tokens"] = total_valid_tokens
+                    if val_metrics is not None:
+                        grpo_save_state["val_reward"] = val_metrics["accuracy"]
+                    elif "val_reward" in grpo_save_state:
+                        del grpo_save_state["val_reward"]
+                    grpo_save_state["consumed_samples"] = consumed_samples
+
+                    full_metric_name = master_config["checkpointing"]["metric_name"]
+                    if full_metric_name is not None:
+                        assert full_metric_name.startswith(
+                            "train:"
+                        ) or full_metric_name.startswith("val:"), (
+                            f"metric_name={full_metric_name} must start with 'val:' or 'train:',\n"
+                            f'followed by the corresponding name in the "val" or "train" metrics dictionary.'
+                            f"  If you are using an old config, please updated checkpointing.metric_name to the new format, "
+                            f" e.g. 'val_reward --> 'val:accuracy'"
+                        )
+                        prefix, metric_name = full_metric_name.split(":", 1)
+                        metrics_source = metrics if prefix == "train" else val_metrics
+                        if not metrics_source:
+                            warnings.warn(
+                                f"You asked to save checkpoints based on {metric_name} but no {prefix} metrics were collected. "
+                                "This checkpoint will not be saved as top-k.",
+                                stacklevel=2,
+                            )
+                            if full_metric_name in grpo_save_state:
+                                del grpo_save_state[full_metric_name]
+                        elif metric_name not in metrics_source:
+                            raise ValueError(
+                                f"Metric {metric_name} not found in {prefix} metrics"
+                            )
+                        else:
+                            grpo_save_state[full_metric_name] = metrics_source[
+                                metric_name
+                            ]
+
+                    with timer.time("checkpointing"):
+                        print(f"Saving checkpoint for step {step + 1}...")
+                        checkpoint_path = checkpointer.init_tmp_checkpoint(
+                            step + 1, grpo_save_state, master_config
+                        )
+                        policy.save_checkpoint(
+                            weights_path=os.path.join(
+                                checkpoint_path, "policy", "weights"
+                            ),
+                            optimizer_path=os.path.join(
+                                checkpoint_path, "policy", "optimizer"
+                            ),
+                            tokenizer_path=os.path.join(
+                                checkpoint_path, "policy", "tokenizer"
+                            ),
+                            checkpointing_cfg=master_config["checkpointing"],
+                        )
+                        # Get dataloader state from trajectory collector
+                        actual_dataloader_state = ray.get(
+                            trajectory_collector.get_dataloader_state.remote()
+                        )
+                        torch.save(
+                            actual_dataloader_state,
+                            os.path.join(checkpoint_path, "train_dataloader.pt"),
+                        )
+                        checkpointer.finalize_checkpoint(checkpoint_path)
+                    policy.offload_after_refit()
+
+            log_data = {"content": flat_messages["content"]}
+            log_data["rewards"] = rewards.tolist()
+            log_data["generation_logprobs"] = train_data["generation_logprobs"].tolist()
+            log_data["prev_logprobs"] = train_data["prev_logprobs"].tolist()
+            log_data["input_lengths"] = input_lengths.tolist()
+            logger.log_batched_dict_as_jsonl(
+                log_data, f"train_data_step{step + 1}.jsonl"
+            )
+
+            timing_metrics: dict[str, float] = timer.get_timing_metrics(
+                reduction_op="sum"
+            )
+
+            # Add buffer stats
+            buffer_size_current = ray.get(replay_buffer.size.remote())
+            metrics["buffer_size"] = buffer_size_current
+            metrics["avg_trajectory_age"] = avg_trajectory_age
+
+            print("\n📊 Training Results:")
+            print(f"  • Loss: {metrics['loss']:.4f}")
+            print(f"  • Generation KL Error: {metrics['gen_kl_error']:.4f}")
+            print(f"  • Avg Reward: {np.mean(rewards.numpy()):.4f}")
+            print(f"  • Buffer Size: {buffer_size_current}")
+            print(f"  • Avg Trajectory Age: {avg_trajectory_age:.2f} steps")
+
+            print("\n⏱️  Timing:")
+            total_time = timing_metrics.get("total_step_time", 0)
+            print(f"  • Total step time: {total_time:.2f}s")
+            for k, v in sorted(
+                timing_metrics.items(), key=lambda item: item[1], reverse=True
+            ):
+                if k != "total_step_time":
+                    percent = (v / total_time * 100) if total_time > 0 else 0
+                    print(f"  • {k}: {v:.2f}s ({percent:.1f}%)")
+
+            total_num_gpus = (
+                master_config["cluster"]["num_nodes"]
+                * master_config["cluster"]["gpus_per_node"]
+            )
+            timing_metrics["valid_tokens_per_sec_per_gpu"] = (
+                metrics["global_valid_toks"] / total_time / total_num_gpus
+            )
+            performance_metrics = print_performance_metrics(
+                train_results, metrics, timing_metrics, master_config
+            )
+
+            logger.log_metrics(performance_metrics, step + 1, prefix="performance")
+            logger.log_metrics(metrics, step + 1, prefix="train")
+            logger.log_metrics(timing_metrics, step + 1, prefix="timing/train")
+
+            timer.reset()
+            step += 1
+            if should_save_by_timeout:
+                print("Timeout has been reached, stopping training early", flush=True)
+                return
+            if step >= master_config["grpo"]["max_num_steps"]:
+                print(
+                    "Max number of steps has been reached, stopping training early",
+                    flush=True,
+                )
+                return
+
+    except Exception as e:
+        print(f"❌ Error in async loop: {e}")
+        import traceback
+
+        traceback.print_exc()
+
+    finally:
+        # Clean up
+        print("🛑 Stopping trajectory collection...")
+        try:
+            ray.kill(trajectory_collector)
+        except Exception as e:
+            print(f"Error stopping trajectory collector: {e}")
+
+        try:
+            ray.kill(replay_buffer)
+        except Exception as e:
+            print(f"Error stopping replay buffer: {e}")
+
+        print("Async GRPO training complete!")
diff --git a/nemo_rl/algorithms/loss_functions.py b/nemo_rl/algorithms/loss_functions.py
old mode 100644
new mode 100755
index 923e836554..5ad8b460d0
--- a/nemo_rl/algorithms/loss_functions.py
+++ b/nemo_rl/algorithms/loss_functions.py
@@ -11,18 +11,21 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Any, Optional, TypedDict, TypeVar
+from typing import Any, NotRequired, Optional, TypedDict, TypeVar
 
 import torch
+import torch.distributed
 
 from nemo_rl.algorithms.interfaces import LossFunction, LossType
-from nemo_rl.algorithms.utils import (
-    calculate_kl_penalty_joschu2020,
-    masked_mean,
-)
+from nemo_rl.algorithms.utils import calculate_kl, masked_mean
 from nemo_rl.distributed.batched_data_dict import BatchedDataDict
-from nemo_rl.distributed.model_utils import from_parallel_logits_to_logprobs
-from nemo_rl.models.dtensor.parallelize import (
+from nemo_rl.distributed.model_utils import (
+    ChunkedDistributedEntropy,
+    ChunkedDistributedGatherLogprob,
+    _get_tokens_on_this_cp_rank,
+    allgather_cp_sharded_tensor,
+    from_parallel_logits_to_logprobs,
+    gather_logits_at_global_indices,
     get_logprobs_from_vocab_parallel_logits,
 )
 
@@ -31,12 +34,22 @@
 
 class ClippedPGLossConfig(TypedDict):
     reference_policy_kl_penalty: float
+    reference_policy_kl_type: str
+    kl_input_clamp_value: float | None
+    kl_output_clamp_value: float | None
     ratio_clip_min: float
     ratio_clip_max: float
-    ratio_clip_c: float
+    # Dual-clipping value (should be >1 if enabled; usually set to 3 empirically). None to disable.
+    ratio_clip_c: float | None
     use_on_policy_kl_approximation: bool
     use_importance_sampling_correction: bool
+    truncated_importance_sampling_ratio: float | None
     token_level_loss: bool
+    # If True, apply the off-policy importance-sampling correction at the
+    # sequence level (one weight per generated sample), as in GSPO.
+    # If False (default), correction is applied at the token level as in the
+    # original GRPO paper.
+    sequence_level_importance_ratios: NotRequired[bool]
 
 
 class ClippedPGLossDataDict(TypedDict):
@@ -60,6 +73,7 @@ class ClippedPGLossFn(LossFunction):
     - PPO (Clipped) - https://arxiv.org/abs/1707.06347
     - GRPO - https://arxiv.org/abs/2402.03300
     - REINFORCE/RLOO (set disable_ppo_ratio = True and ignores ratio_clip_min/ratio_clip_max) - https://arxiv.org/abs/2402.14740
+    - GSPO (set sequence_level_importance_ratios = True and token_level_loss = False) - https://arxiv.org/abs/2507.18071
 
     Formula:
     L(θ) = E_t [ min(r_t(θ) * A_t, clip(r_t(θ), 1-ε, 1+ε) * A_t) ] - β * KL(π_θ || π_ref)
@@ -96,15 +110,36 @@ def __init__(self, cfg: ClippedPGLossConfig):
         self.ratio_clip_max = cfg["ratio_clip_max"]
         self.ratio_clip_c = cfg["ratio_clip_c"]  # set to None to disable dual-clipping
         self.reference_policy_kl_penalty = cfg["reference_policy_kl_penalty"]
+        self.reference_policy_kl_type = cfg["reference_policy_kl_type"]
+        self.kl_input_clamp_value = cfg["kl_input_clamp_value"]
+        self.kl_output_clamp_value = cfg["kl_output_clamp_value"]
         self.disable_ppo_ratio = cfg.get("disable_ppo_ratio", False)
         self.use_on_policy_kl_approximation = cfg["use_on_policy_kl_approximation"]
         self.use_importance_sampling_correction = cfg[
             "use_importance_sampling_correction"
         ]
-
+        self.truncated_importance_sampling_ratio = cfg[
+            "truncated_importance_sampling_ratio"
+        ]
+        # Whether to compute importance weights per-sequence instead of per-token.
+        self.sequence_level_importance_ratios = cfg.get(
+            "sequence_level_importance_ratios",
+            False,
+        )
         self.loss_type = (
             LossType.TOKEN_LEVEL if cfg["token_level_loss"] else LossType.SEQUENCE_LEVEL
         )
+        if self.sequence_level_importance_ratios:
+            assert self.loss_type == LossType.SEQUENCE_LEVEL, (
+                "sequence-level importance sampling (e.g. GSPO) is mutually exclusive with token-level loss"
+            )
+        if self.truncated_importance_sampling_ratio is not None:
+            assert self.use_importance_sampling_correction, (
+                "truncated_importance_sampling_ratio is only supported when use_importance_sampling_correction is True"
+            )
+            assert self.truncated_importance_sampling_ratio > 0, (
+                "truncated_importance_sampling_ratio should be positive"
+            )
 
     def __call__(
         self,
@@ -137,6 +172,60 @@ def __call__(
             global_normalization_factor=global_valid_toks,
         ).item()
 
+        # gen-kl: kl(P_gen || P_train)
+        # where log_ratio = prev_logprobs - generation_logprobs
+        gen_kl_error = calculate_kl(
+            logprobs=generation_logprobs,
+            logprobs_reference=prev_logprobs,
+            kl_type=self.reference_policy_kl_type,
+            input_clamp_value=None,
+            output_clamp_value=None,
+        )
+        gen_kl_error = masked_mean(
+            gen_kl_error,
+            mask,
+            global_normalization_factor=global_valid_toks,
+        ).item()
+
+        # policy-kl: kl(P_train || P_gen)
+        # where log_ratio = generation_logprobs - prev_logprobs
+        policy_kl_error = calculate_kl(
+            logprobs=prev_logprobs,
+            logprobs_reference=generation_logprobs,
+            kl_type=self.reference_policy_kl_type,
+            input_clamp_value=None,
+            output_clamp_value=None,
+        )
+        policy_kl_error = masked_mean(
+            policy_kl_error,
+            mask,
+            global_normalization_factor=global_valid_toks,
+        ).item()
+
+        # Jensen-Shannon divergence
+        # M = 0.5 * (P_train + P_gen)
+        # JSD = 0.5 * KL(P_train || M) + 0.5 * KL(P_gen || M)
+        log_mixture = torch.log(
+            0.5 * torch.exp(prev_logprobs) + 0.5 * torch.exp(generation_logprobs)
+        )
+        # KL(P_train || M)
+        kl_prev_to_mixture = (
+            torch.exp(prev_logprobs - log_mixture) - (prev_logprobs - log_mixture) - 1
+        )
+
+        # KL(P_gen || M)
+        kl_gen_to_mixture = (
+            torch.exp(generation_logprobs - log_mixture)
+            - (generation_logprobs - log_mixture)
+            - 1
+        )
+
+        js_divergence_error = masked_mean(
+            0.5 * kl_prev_to_mixture + 0.5 * kl_gen_to_mixture,
+            mask,
+            global_normalization_factor=global_valid_toks,
+        ).item()
+
         next_token_logits = next_token_logits.to(torch.float32)
 
         if vocab_parallel_group is not None:
@@ -185,9 +274,12 @@ def __call__(
             kl = (
                 kl_importance_weights
                 * self.reference_policy_kl_penalty
-                * calculate_kl_penalty_joschu2020(
-                    logprobs_policy=curr_logprobs,
+                * calculate_kl(
+                    logprobs=curr_logprobs,
                     logprobs_reference=reference_policy_logprobs,
+                    kl_type=self.reference_policy_kl_type,
+                    input_clamp_value=self.kl_input_clamp_value,
+                    output_clamp_value=self.kl_output_clamp_value,
                 )
             )
             if self.loss_type == LossType.TOKEN_LEVEL:
@@ -205,7 +297,17 @@ def __call__(
 
         # Calculate clipped loss function if ppo ratio is enabled.
         if not self.disable_ppo_ratio:
-            ratios = (curr_logprobs - prev_logprobs).exp()
+            log_ratios = curr_logprobs - prev_logprobs
+            if self.sequence_level_importance_ratios:
+                seq_log_ratio_mean = masked_mean(
+                    log_ratios,
+                    token_mask,
+                    dim=-1,
+                ).unsqueeze(-1)
+                seq_ratio = seq_log_ratio_mean.exp()
+                ratios = seq_ratio.repeat(1, advantages.shape[1])
+            else:
+                ratios = log_ratios.exp()
             ratios_clamped = ratios.clamp(
                 1.0 - self.ratio_clip_min, 1.0 + self.ratio_clip_max
             )
@@ -229,11 +331,35 @@ def __call__(
                 advantages < 0, torch.min(clip_loss, loss3), clip_loss
             )
 
+        # -------------------------------------------------------------
+        # Off-policy (actor) importance-sampling correction
+        # -------------------------------------------------------------
         # See: docs/guides/grpo.md#importance-sampling-correction
-        actor_importance_weights = torch.exp(prev_logprobs - generation_logprobs)
-        actor_importance_weights = torch.nan_to_num(
-            actor_importance_weights, nan=0.0, posinf=0.0, neginf=0.0
-        )
+        if self.sequence_level_importance_ratios:
+            # importance weight w_i = exp(Σ_t (log π_actor − log π_behaviour))
+            seq_lp_diff = ((prev_logprobs - generation_logprobs) * mask).sum(dim=-1)
+            actor_importance_weights = torch.exp(seq_lp_diff).detach()
+            actor_importance_weights = torch.nan_to_num(
+                actor_importance_weights, nan=0.0, posinf=0.0, neginf=0.0
+            )
+            # Broadcast to token dimension so we can reuse existing reduction
+            actor_importance_weights_expanded = actor_importance_weights.unsqueeze(-1)
+        else:
+            # Token-level correction
+            actor_importance_weights_expanded = torch.exp(
+                prev_logprobs - generation_logprobs
+            )
+            actor_importance_weights_expanded = torch.nan_to_num(
+                actor_importance_weights_expanded, nan=0.0, posinf=0.0, neginf=0.0
+            )
+        # TIS see https://fengyao.notion.site/off-policy-rl
+        if self.truncated_importance_sampling_ratio is not None:
+            actor_importance_weights_expanded = torch.clamp(
+                actor_importance_weights_expanded,
+                max=self.truncated_importance_sampling_ratio,
+            )
+        actor_importance_weights = actor_importance_weights_expanded
+        del actor_importance_weights_expanded
         if self.use_importance_sampling_correction:
             importance_weights_to_use = actor_importance_weights
         else:
@@ -256,12 +382,20 @@ def __call__(
                 global_normalization_factor=global_valid_seqs,
             )
 
+        # Metric: sampling importance ratio (mean over samples)
         # See: docs/guides/grpo.md#sampling-importance-ratio
-        sample_importance_ratio = masked_mean(
-            actor_importance_weights,
-            mask,
-            global_normalization_factor=global_valid_toks,
-        )
+        if self.sequence_level_importance_ratios:
+            sample_importance_ratio = masked_mean(
+                actor_importance_weights,
+                sample_mask,
+                global_normalization_factor=global_valid_seqs,
+            )
+        else:
+            sample_importance_ratio = masked_mean(
+                actor_importance_weights,
+                mask,
+                global_normalization_factor=global_valid_toks,
+            )
 
         # Approximating entropy as E_{s ~ \pi_{gen}(s)}[-(\pi_{curr}/\pi_{gen})log(\pi_{curr}(s))]
         # See more details and other metrics in docs/guides/grpo.md#metrics
@@ -296,6 +430,9 @@ def __call__(
                 "probs_ratio_clamped": probs_ratio_clamped,
                 "kl_penalty": kl.item() / self.reference_policy_kl_penalty if kl else 0,
                 "token_mult_prob_error": mult_prob_error,
+                "gen_kl_error": gen_kl_error,
+                "policy_kl_error": policy_kl_error,
+                "js_divergence_error": js_divergence_error,
                 "sampling_importance_ratio": sample_importance_ratio.item(),
                 "num_valid_samples": sample_mask.sum().item(),
                 "approx_entropy": seq_entropy_approx.item(),
@@ -325,6 +462,7 @@ def __call__(
         token_mask = data["token_mask"][:, 1:]
         sample_mask = data["sample_mask"]
         mask = token_mask * sample_mask.unsqueeze(-1)
+        seq_index = data.get("seq_index", None)
 
         next_token_logits = next_token_logits.to(torch.float32)
 
@@ -346,7 +484,7 @@ def __call__(
             token_logprobs = token_logprobs[:, : data["input_ids"].shape[1] - 1]
         elif isinstance(next_token_logits, torch.distributed.tensor.DTensor):
             token_logprobs = get_logprobs_from_vocab_parallel_logits(
-                next_token_logits, data["input_ids"]
+                next_token_logits, data["input_ids"], seq_index=seq_index
             )
         else:
             next_tokens = data["input_ids"][:, 1:].cuda()  # Skip first token
@@ -381,6 +519,110 @@ def __call__(
         }
 
 
+class PreferenceLossDataDict(TypedDict):
+    """Required keys for the preference loss function."""
+
+    input_ids: torch.Tensor
+    token_mask: torch.Tensor
+    sample_mask: torch.Tensor
+
+
+class PreferenceLoss(LossFunction):
+    """Preference Loss function.
+
+    Optimizes the model to prefer chosen responses over rejected ones
+
+    The preference loss is computed as:
+    L_pref(θ) = -E[log(σ(β * (r_chosen - r_rejected)))]
+
+    where:
+    - σ is the sigmoid function
+    - β is a scaling factor (ex: `reference_policy_kl_penalty` in DPO)
+    - r_chosen and r_rejected are the rewards for chosen and rejected responses
+
+    Returns:
+        tuple[torch.Tensor, dict]: A tuple containing:
+            - The preference loss value
+            - A dictionary with metrics including:
+                - loss: Preference loss
+                - accuracy: Fraction of examples where chosen response has higher reward
+    """
+
+    def __init__(self):
+        self.loss_type = LossType.SEQUENCE_LEVEL
+
+    def split_output_tensor(self, tensor: Tensor) -> tuple[Tensor, Tensor]:
+        # tensor is of shape (2*micro_batch_size,)
+        return tensor[::2], tensor[1::2]
+
+    def _preference_loss(
+        self,
+        rewards: Tensor,
+        sample_mask: Tensor,
+        global_valid_seqs: Tensor,
+        beta: float = 1.0,
+    ) -> tuple[Tensor, Tensor, Tensor, Tensor]:
+        rewards_chosen, rewards_rejected = self.split_output_tensor(rewards)
+        rewards_delta = rewards_chosen - rewards_rejected
+
+        per_sample_loss = (
+            -torch.nn.functional.logsigmoid(beta * rewards_delta) * sample_mask[::2]
+        )  ## zero out invalid samples
+
+        ## divide by 2 because each preference example corresponds to 2 samples (chosen, rejected)
+        return (
+            masked_mean(
+                per_sample_loss,
+                sample_mask[::2],
+                global_normalization_factor=global_valid_seqs / 2,
+            ),
+            masked_mean(
+                rewards_chosen > rewards_rejected,
+                sample_mask[::2],
+                global_normalization_factor=global_valid_seqs / 2,
+            ),
+            masked_mean(
+                rewards_chosen,
+                sample_mask[::2],
+                global_normalization_factor=global_valid_seqs / 2,
+            ),
+            masked_mean(
+                rewards_rejected,
+                sample_mask[1::2],
+                global_normalization_factor=global_valid_seqs / 2,
+            ),
+        )
+
+    def __call__(
+        self,
+        rewards: Tensor,
+        data: BatchedDataDict[PreferenceLossDataDict],
+        global_valid_seqs: Tensor,
+        global_valid_toks: Tensor | None,
+    ) -> tuple[torch.Tensor, dict[str, Any]]:
+        sample_mask = data["sample_mask"]
+
+        rewards = rewards.squeeze(-1)
+
+        (
+            preference_loss,
+            accuracy,
+            rewards_chosen_mean,
+            rewards_rejected_mean,
+        ) = self._preference_loss(rewards, sample_mask, global_valid_seqs)
+
+        ## divide by 2 because we're summing over (chosen, rejected) pairs
+        num_valid_samples = sample_mask.sum() / 2
+
+        return preference_loss, {
+            "loss": preference_loss.item(),
+            "accuracy": accuracy.item(),
+            "rewards_chosen_mean": rewards_chosen_mean.item(),
+            "rewards_rejected_mean": rewards_rejected_mean.item(),
+            "num_valid_samples": num_valid_samples.item(),
+        }
+
+
 class DPOLossConfig(TypedDict):
     reference_policy_kl_penalty: float
     preference_loss_weight: float
@@ -398,7 +640,7 @@ class DPOLossDataDict(TypedDict):
     sample_mask: torch.Tensor
 
 
-class DPOLossFn(LossFunction):
+class DPOLossFn(PreferenceLoss):
     """Direct Preference Optimization (DPO) loss function.
 
     This loss function implements the DPO algorithm as described in:
@@ -464,10 +706,7 @@ def __init__(self, cfg: DPOLossConfig):
 
         self.loss_type = LossType.SEQUENCE_LEVEL
 
-    def split_output_tensor(self, tensor: Tensor) -> tuple[Tensor, Tensor]:
-        return tensor[::2], tensor[1::2]
-
-    def _preference_loss(
+    def _dpo_loss(
         self,
         next_token_logits: Tensor,
         data: BatchedDataDict[DPOLossDataDict],
@@ -479,6 +718,7 @@ def _preference_loss(
         ## TODO(@ashors): there's some duplicate code here with the NLLLoss function. We should refactor
         token_mask = data["token_mask"][:, 1:]
         sample_mask = data["sample_mask"]
+        seq_index = data.get("seq_index", None)
 
         next_token_logits = next_token_logits.to(torch.float32)
         if vocab_parallel_group is not None:
@@ -498,7 +738,7 @@ def _preference_loss(
             token_logprobs = token_logprobs[:, : data["input_ids"].shape[1] - 1]
         elif isinstance(next_token_logits, torch.distributed.tensor.DTensor):
             token_logprobs = get_logprobs_from_vocab_parallel_logits(
-                next_token_logits, data["input_ids"]
+                next_token_logits, data["input_ids"], seq_index=seq_index
             )
         else:
             next_tokens = data["input_ids"][:, 1:].cuda()  # Skip first token
@@ -518,41 +758,12 @@ def _preference_loss(
         if self.preference_average_log_probs:
             rewards = rewards / token_mask.sum(-1).clamp(min=1)
 
-        rewards_chosen, rewards_rejected = self.split_output_tensor(rewards)
-        rewards_delta = rewards_chosen - rewards_rejected
-
-        per_sample_loss = (
-            -torch.nn.functional.logsigmoid(
-                self.reference_policy_kl_penalty * rewards_delta
-            )
-            * sample_mask[::2]
-        )  ## zero out invalid samples
-
-        ## divide by 2 because each preference example corresponds to 2 samples (chosen, rejected)
-        return (
-            masked_mean(
-                per_sample_loss,
-                sample_mask[::2],
-                global_normalization_factor=global_valid_seqs / 2,
-            ),
-            masked_mean(
-                rewards_chosen > rewards_rejected,
-                sample_mask[::2],
-                global_normalization_factor=global_valid_seqs / 2,
-            ),
-            masked_mean(
-                rewards_chosen,
-                sample_mask[::2],
-                global_normalization_factor=global_valid_seqs / 2,
-            ),
-            masked_mean(
-                rewards_rejected,
-                sample_mask[1::2],
-                global_normalization_factor=global_valid_seqs / 2,
-            ),
+        return self._preference_loss(
+            rewards, sample_mask, global_valid_seqs, self.reference_policy_kl_penalty
         )
 
-    def __call__(
+    # TODO a cleaner typing fix would be required (probably that DPOLossFn should not inherit from PreferenceLoss)
+    def __call__(  # type: ignore
         self,
         next_token_logits: Tensor,
         data: BatchedDataDict[DPOLossDataDict],
@@ -590,7 +801,7 @@ def __call__(
             accuracy,
             rewards_chosen_mean,
             rewards_rejected_mean,
-        ) = self._preference_loss(
+        ) = self._dpo_loss(
             next_token_logits,
             data,
             global_valid_seqs,
@@ -696,3 +907,252 @@ def __call__(
                 metrics_accum[k] += v
 
         return loss_accum, metrics_accum
+
+
+class DistillationLossConfig(TypedDict):
+    kl_type: str
+    mixed_kl_weight: float
+    zero_outside_topk: bool
+
+
+class DistillationLossDataDict(TypedDict):
+    input_ids: torch.Tensor
+    input_lengths: torch.Tensor
+    token_mask: torch.Tensor
+    sample_mask: torch.Tensor
+    teacher_topk_logits: torch.Tensor
+    teacher_topk_indices: torch.Tensor
+
+
+class DistillationLossFn(LossFunction):
+    """Distillation loss function."""
+
+    def __init__(self, cfg: DistillationLossConfig):
+        self.kl_type = cfg["kl_type"]
+        self.mixed_kl_weight = cfg["mixed_kl_weight"]
+        self.zero_outside_topk = cfg["zero_outside_topk"]
+        self.log_infinitesimal = -100
+        self.loss_type = LossType.TOKEN_LEVEL
+
+        assert self.kl_type in ["forward", "reverse", "mixed"], "Invalid KL type"
+        assert self.mixed_kl_weight >= 0 and self.mixed_kl_weight <= 1, (
+            "Invalid mixed KL weight"
+        )
+
+    def __call__(
+        self,
+        next_token_logits: torch.Tensor,
+        data: DistillationLossDataDict,
+        global_valid_seqs: torch.Tensor,
+        global_valid_toks: torch.Tensor,
+        vocab_parallel_rank: Optional[int] = None,
+        vocab_parallel_group: Optional[torch.distributed.ProcessGroup] = None,
+        context_parallel_group: Optional[torch.distributed.ProcessGroup] = None,
+    ) -> tuple[torch.Tensor, dict[str, Any]]:
+        """Compute distillation loss between teacher and student logits."""
+        # Basic shapes
+        input_ids = data["input_ids"]
+        batch_size = input_ids.shape[0]
+
+        # CP support: get CP group and size
+        cp_group = context_parallel_group
+        cp_size = 1 if cp_group is None else torch.distributed.get_world_size(cp_group)
+
+        # Ensure float32 for stability (match other losses)
+        next_token_logits = next_token_logits.to(torch.float32)
+        per_token_kl = None
+        # Preferred truncated-KL path: teacher provides top-k support per position
+        teacher_topk_logits = data["teacher_topk_logits"]  # [B, S, k]
+        teacher_topk_indices = data["teacher_topk_indices"]  # [B, S, k]
+
+        if teacher_topk_indices.shape[-1] <= 0:
+            raise ValueError(
+                f"topk must be positive, got {teacher_topk_indices.shape[-1]}. "
+                "topk=0 is not supported as it would result in empty tensor operations."
+            )
+
+        # Determine processing path and setup variables
+        if vocab_parallel_group is not None:
+            assert vocab_parallel_rank is not None, (
+                "vocab_parallel_rank must be provided when vocab_parallel_group is provided"
+            )
+            V_local = int(next_token_logits.shape[-1])
+            vocab_start_index = vocab_parallel_rank * V_local
+            vocab_end_index = (vocab_parallel_rank + 1) * V_local
+            parallel_group = vocab_parallel_group
+            logits_tensor = next_token_logits
+        elif isinstance(next_token_logits, torch.distributed.tensor.DTensor):
+            device_mesh = next_token_logits.device_mesh
+            tp_group = device_mesh.get_group("tp")
+            tp_rank = tp_group.rank()
+            local_student_logits = next_token_logits.to_local()
+            V_local = int(local_student_logits.shape[-1])
+            vocab_start_index = tp_rank * V_local
+            vocab_end_index = (tp_rank + 1) * V_local
+            parallel_group = tp_group
+            logits_tensor = local_student_logits
+            teacher_topk_indices = teacher_topk_indices.to(local_student_logits.device)
+            # For DTensor, derive CP group/size from the device mesh to ensure CP-aware alignment
+            if (
+                device_mesh.mesh_dim_names is not None
+                and "cp" in device_mesh.mesh_dim_names
+            ):
+                cp_group = device_mesh.get_group("cp")
+                cp_size = cp_group.size()
+            else:
+                cp_group = None
+                cp_size = 1
+        else:
+            parallel_group = None
+            logits_tensor = next_token_logits
+
+        # Process based on zero_outside_topk setting
+        if self.zero_outside_topk and parallel_group is not None:
+            # Distributed processing with chunking
+            indices_local = teacher_topk_indices
+            pad_len = 0
+            if cp_size > 1:
+                pad_len = logits_tensor.shape[1] * cp_size - indices_local.shape[1]
+                if pad_len > 0:
+                    indices_local = torch.nn.functional.pad(
+                        indices_local, (0, 0, 0, pad_len), value=0
+                    )
+                cp_rank = torch.distributed.get_rank(cp_group)
+                indices_local = _get_tokens_on_this_cp_rank(
+                    indices_local, cp_rank, cp_size, seq_dim=1
+                )
+
+            S_local = int(logits_tensor.shape[1])
+            chunk_size = max(1, min(S_local, 1024))
+            student_topk_logprobs = ChunkedDistributedGatherLogprob.apply(  # type: ignore
+                logits_tensor,
+                indices_local,
+                vocab_start_index,
+                vocab_end_index,
+                chunk_size,
+                parallel_group,
+                False,
+            )
+
+            if self.kl_type != "forward":
+                H_all = ChunkedDistributedEntropy.apply(  # type: ignore
+                    logits_tensor,
+                    chunk_size,
+                    parallel_group,
+                    False,
+                )
+
+            if cp_size > 1:
+                student_topk_logprobs = allgather_cp_sharded_tensor(
+                    student_topk_logprobs, cp_group, seq_dim=1
+                )
+                if self.kl_type != "forward":
+                    H_all = allgather_cp_sharded_tensor(H_all, cp_group, seq_dim=1)
+                if pad_len > 0:
+                    student_topk_logprobs = student_topk_logprobs[:, :-pad_len, :]
+                    if self.kl_type != "forward":
+                        H_all = H_all[:, :-pad_len]
+        elif self.zero_outside_topk:
+            # Non-distributed processing
+            student_logprobs = torch.nn.functional.log_softmax(logits_tensor, dim=-1)
+            student_topk_logprobs = student_logprobs.gather(
+                dim=-1, index=teacher_topk_indices.to(student_logprobs.device)
+            )
+            if self.kl_type != "forward":
+                H_all = (student_logprobs.exp() * student_logprobs).sum(-1)
+        else:
+            # Gather logits at global indices
+            if (parallel_group is not None) or (cp_size > 1):
+                student_topk_logits = gather_logits_at_global_indices(
+                    logits_tensor,
+                    teacher_topk_indices,
+                    tp_group=parallel_group,
+                    cp_group=cp_group,
+                    vocab_start_index=(
+                        vocab_start_index if parallel_group is not None else 0
+                    ),
+                    vocab_end_index=(
+                        vocab_end_index
+                        if parallel_group is not None
+                        else int(logits_tensor.shape[-1])
+                    ),
+                )
+            else:
+                student_topk_logits = logits_tensor.gather(
+                    dim=-1, index=teacher_topk_indices.to(logits_tensor.device)
+                )
+            student_topk_logprobs = torch.nn.functional.log_softmax(
+                student_topk_logits, dim=-1
+            )
+
+        # Move teacher tensors to the same device/dtype as student_topk_logits
+        teacher_topk_logits = teacher_topk_logits.to(
+            student_topk_logprobs.device, dtype=student_topk_logprobs.dtype
+        )
+        teacher_topk_logprobs = torch.nn.functional.log_softmax(
+            teacher_topk_logits, dim=-1
+        )
+
+        # Single point of next-token alignment after TP/CP processing
+        teacher_topk_logprobs = teacher_topk_logprobs[:, :-1, :]
+        student_topk_logprobs = student_topk_logprobs[:, :-1, :]
+        if self.zero_outside_topk and self.kl_type != "forward":
+            # Align H_all with next-token prediction
+            H_all = H_all[:, :-1]
+
+        student_probs = student_topk_logprobs.exp()  # [B, S-1, k]
+        teacher_probs = teacher_topk_logprobs.exp()  # [B, S-1, k]
+
+        loss_correction_term = torch.zeros_like(student_probs[..., 0])  # [B, S-1]
+        if self.zero_outside_topk and self.kl_type != "forward":
+            H_rest = H_all - (student_probs * student_topk_logprobs).sum(-1)
+            P_rest = 1 - (student_probs.sum(-1))
+            # The entropy and prob of the rest of the tokens [B, S-1]
+            loss_correction_term = H_rest - self.log_infinitesimal * P_rest  # [B, S-1]
+            if self.kl_type == "mixed":
+                loss_correction_term = loss_correction_term * (
+                    1.0 - self.mixed_kl_weight
+                )
+
+        if self.kl_type == "forward":
+            per_token_kl = teacher_probs * (
+                teacher_topk_logprobs - student_topk_logprobs
+            )
+        elif self.kl_type == "reverse":
+            per_token_kl = student_probs * (
+                student_topk_logprobs - teacher_topk_logprobs
+            )
+        else:
+            # mixed KL
+            kl_forward = teacher_probs * (teacher_topk_logprobs - student_topk_logprobs)
+            kl_reverse = student_probs * (student_topk_logprobs - teacher_topk_logprobs)
+            per_token_kl = (
+                self.mixed_kl_weight * kl_forward
+                + (1.0 - self.mixed_kl_weight) * kl_reverse
+            )
+
+        per_token_kl = per_token_kl.sum(dim=-1) + loss_correction_term  # [B, S-1]
+
+        # Masking and reduction
+        if "token_mask" in data and "sample_mask" in data:
+            token_mask = data["token_mask"][:, 1:]
+            sample_mask = data["sample_mask"]
+            # Align mask length to current per_token_kl
+            max_len = per_token_kl.shape[1]
+            token_mask = token_mask[:, :max_len]
+            mask = token_mask * sample_mask.unsqueeze(-1)  # [B, S-1]
+            # align mask shape to per_token_kl
+            kl_loss = masked_mean(
+                per_token_kl,
+                mask,
+                global_normalization_factor=global_valid_toks,
+            )
+        else:
+            kl_loss = per_token_kl.mean()
+
+        metrics = {
+            "loss": float(kl_loss.item()) if kl_loss.ndim == 0 else kl_loss,
+            "num_valid_samples": int(batch_size),
+        }
+
+        return kl_loss, metrics
diff --git a/nemo_rl/algorithms/reward_functions.py b/nemo_rl/algorithms/reward_functions.py
new file mode 100644
index 0000000000..b4f2ad4d70
--- /dev/null
+++ b/nemo_rl/algorithms/reward_functions.py
@@ -0,0 +1,103 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import (
+    NotRequired,
+    TypedDict,
+    TypeVar,
+)
+
+import torch
+
+from nemo_rl.distributed.batched_data_dict import BatchedDataDict
+
+Tensor = TypeVar("Tensor", bound=torch.Tensor)
+
+
+class RewardShapingConfig(TypedDict):
+    """Configuration for reward function processing.
+
+    This configuration enables custom reward shaping, currently supporting DAPO-style
+    penalties for responses that exceed the maximum response length threshold.
+    """
+
+    enabled: bool
+
+    # The length of the buffer to penalize responses that exceed the maximum response length threshold.
+    # Responses of length greater than overlong_buffer_length + max_response_length will
+    # receive the maximum penalty.
+    overlong_buffer_length: NotRequired[int]
+
+    # The penalty for responses that exceed the maximum response length threshold.
+    overlong_buffer_penalty: NotRequired[float]
+
+    # The maximum response length threshold. Responses exceeding this length will be penalized.
+    max_response_length: NotRequired[int]
+
+
+def apply_reward_shaping(
+    batch: BatchedDataDict, cfg: RewardShapingConfig
+) -> BatchedDataDict:
+    """Process rewards by applying penalties for responses exceeding max_response_length. Currently, this function only supports DAPO reward shaping as illustrated in the DAPO paper : https://arxiv.org/pdf/2503.14476.
+
+    Nonetheless, it can be potentially extended to support any custom reward logic.
+    """
+    rewards = batch["total_reward"]
+    if not cfg["enabled"]:
+        return batch
+
+    # DAPO reward shaping requires overlong_buffer_length, overlong_buffer_penalty, and max_response_length to be set.
+    if (
+        cfg["overlong_buffer_length"] is None
+        or cfg["overlong_buffer_penalty"] is None
+        or cfg["max_response_length"] is None
+    ):
+        raise ValueError(
+            "Reward function is enabled but only DAPO reward shaping is currently supported. Please ensure overlong_buffer_length, overlong_buffer_penalty, and max_response_length are properly configured."
+        )
+
+    # Get the overlong_buffer_length, overlong_buffer_penalty and max_response_length
+    overlong_buffer_length = cfg["overlong_buffer_length"]
+    overlong_buffer_penalty = cfg["overlong_buffer_penalty"]
+    max_response_length = cfg["max_response_length"]
+    assert overlong_buffer_penalty >= 0, f"{overlong_buffer_penalty=} must be >=0"
+    # Calculate the expected response length
+    expected_response_length = max_response_length - overlong_buffer_length
+
+    assert len(batch["message_log"]) == len(rewards), (
+        "The number of messages in the batch must match the number of rewards"
+    )
+
+    updated_rewards = torch.zeros_like(rewards)
+    for i, message_log in enumerate(batch["message_log"]):
+        # Get the assistant response length (index 1 is the assistant response)
+        message_response_length = None
+        for message in message_log:
+            if message["role"] == "assistant":
+                message_response_length = message["token_ids"].shape[0]
+                break
+        assert message_response_length is not None, (
+            "Assistant response not found during reward shaping"
+        )
+
+        # Calculate the exceed length and the corresponding reward penalty
+        exceed_length = message_response_length - expected_response_length
+        overlong_reward = min(
+            -exceed_length / overlong_buffer_length * overlong_buffer_penalty, 0
+        )
+        updated_rewards[i] = rewards[i] + overlong_reward
+
+    # Update the rewards in the batch
+    batch["total_reward"] = updated_rewards
+
+    return batch
diff --git a/nemo_rl/algorithms/rm.py b/nemo_rl/algorithms/rm.py
new file mode 100644
index 0000000000..ad69cf3423
--- /dev/null
+++ b/nemo_rl/algorithms/rm.py
@@ -0,0 +1,684 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import warnings
+from collections import defaultdict
+from functools import partial
+from pathlib import Path
+from typing import Optional, TypedDict
+
+import numpy as np
+import torch
+from torchdata.stateful_dataloader import StatefulDataLoader
+from transformers import AutoTokenizer
+
+from nemo_rl.algorithms.loss_functions import (
+    PreferenceLoss,
+)
+from nemo_rl.algorithms.utils import maybe_pad_last_batch, set_seed
+from nemo_rl.data import DataConfig
+from nemo_rl.data.collate_fn import preference_collate_fn
+from nemo_rl.data.datasets import AllTaskProcessedDataset
+from nemo_rl.data.interfaces import TaskDataSpec
+from nemo_rl.distributed.virtual_cluster import ClusterConfig, RayVirtualCluster
+from nemo_rl.models.policy import PolicyConfig
+from nemo_rl.models.policy.interfaces import PolicyInterface
+from nemo_rl.models.policy.lm_policy import Policy
+from nemo_rl.utils.checkpoint import CheckpointingConfig, CheckpointManager
+from nemo_rl.utils.logger import Logger, LoggerConfig
+from nemo_rl.utils.nsys import maybe_gpu_profile_step
+from nemo_rl.utils.timer import TimeoutChecker, Timer
+
+
+class RMSaveState(TypedDict):
+    epoch: int  # Track current epoch
+    step: int  # Track step within current epoch
+    total_steps: int  # Track total number of steps across all epochs
+    consumed_samples: int
+    total_valid_tokens: int  # Track total number of non-padding tokens during training
+
+
+def _default_rm_save_state() -> RMSaveState:
+    return {
+        "epoch": 0,
+        "step": 0,
+        "total_steps": 0,
+        "consumed_samples": 0,
+        "total_valid_tokens": 0,
+    }
+
+
+class RMConfig(TypedDict):
+    max_num_steps: int
+    max_num_epochs: int
+    val_period: int
+    val_batches: int
+    val_global_batch_size: int
+    val_micro_batch_size: int
+    val_at_start: bool
+    seed: int
+
+
+class MasterConfig(TypedDict):
+    policy: PolicyConfig
+    data: DataConfig
+    rm: RMConfig
+    logger: LoggerConfig
+    cluster: ClusterConfig
+    checkpointing: CheckpointingConfig
+
+
+class RMValMetrics(TypedDict):
+    loss: float
+    accuracy: float
+    rewards_chosen_mean: float
+    rewards_rejected_mean: float
+    num_valid_samples: float
+
+
+# =======================================================
+# Setup & Initialization
+# =======================================================
+def setup(
+    master_config: MasterConfig,
+    tokenizer: AutoTokenizer,
+    train_dataset: AllTaskProcessedDataset,
+    val_dataset: dict[str, AllTaskProcessedDataset],
+) -> tuple[
+    Policy,
+    RayVirtualCluster,
+    StatefulDataLoader,
+    dict[str, StatefulDataLoader],
+    PreferenceLoss,
+    MasterConfig,
+    Logger,
+    TaskDataSpec,
+    RMSaveState,
+]:
+    """Main entry point for running RM algorithm.
+
+    Returns:
+        Tuple of policy, cluster, dataloader, tokenizer, loss_fn, math_env, master_config, logger
+    """
+    set_seed(master_config["rm"]["seed"])
+
+    # Extract individual configs for easier access
+    policy_config = master_config["policy"]
+    data_config = master_config["data"]
+    logger_config = master_config["logger"]
+    cluster_config = master_config["cluster"]
+    rm_config = master_config["rm"]
+
+    # ==========================
+    #         Logger
+    # ==========================
+    logger = Logger(logger_config)
+    logger.log_hyperparams(master_config)
+
+    # ==========================
+    #      Checkpointing
+    # ==========================
+    checkpointer = CheckpointManager(master_config["checkpointing"])
+    last_checkpoint_path = checkpointer.get_latest_checkpoint_path()
+    rm_save_state: Optional[RMSaveState] = checkpointer.load_training_info(
+        last_checkpoint_path
+    )
+
+    # ==========================
+    #           Data
+    # ==========================
+    train_dataloader = StatefulDataLoader(
+        train_dataset,
+        batch_size=policy_config["train_global_batch_size"],
+        shuffle=data_config["shuffle"],
+        collate_fn=partial(
+            preference_collate_fn,
+            tokenizer=tokenizer,
+            make_sequence_length_divisible_by=policy_config[
+                "make_sequence_length_divisible_by"
+            ],
+            add_loss_mask=False,
+        ),
+        drop_last=True,
+        num_workers=data_config["num_workers"],
+    )
+
+    if last_checkpoint_path is not None:
+        dataloader_state_dict = torch.load(
+            os.path.join(last_checkpoint_path, "train_dataloader.pt")
+        )
+        train_dataloader.load_state_dict(dataloader_state_dict)
+
+    val_dataloader = {
+        k: StatefulDataLoader(
+            v,
+            batch_size=rm_config["val_global_batch_size"],
+            shuffle=False,
+            collate_fn=partial(
+                preference_collate_fn,
+                tokenizer=tokenizer,
+                make_sequence_length_divisible_by=policy_config[
+                    "make_sequence_length_divisible_by"
+                ],
+                add_loss_mask=False,
+            ),
+            drop_last=False,
+            num_workers=data_config["num_workers"],
+        )
+        for k, v in val_dataset.items()
+    }
+
+    # ==========================
+    #          Cluster
+    # ==========================
+    print("\n▶ Setting up compute cluster...")
+    cluster = RayVirtualCluster(
+        name="rm_cluster",
+        bundle_ct_per_node_list=[cluster_config["gpus_per_node"]]
+        * cluster_config["num_nodes"],
+        use_gpus=True,
+        num_gpus_per_node=cluster_config["gpus_per_node"],
+        max_colocated_worker_groups=1,
+    )
+    print(f"  ✓ Ray cluster initialized with {cluster_config['num_nodes']} nodes")
+
+    # ==========================
+    #   Training
+    # ==========================
+    print("\n▶ Setting up model...")
+    if policy_config.get("megatron_cfg", {}).get("enabled", False):
+        total_train_iters = min(
+            rm_config["max_num_steps"],
+            rm_config["max_num_epochs"] * len(train_dataloader),
+        )
+        ## NOTE: we double the train_iters because effective batch size is doubled
+        ## for (chosen, rejected) pairs
+        policy_config["megatron_cfg"]["train_iters"] = total_train_iters * 2
+        if "scheduler" in policy_config["megatron_cfg"]:
+            for k in policy_config["megatron_cfg"]["scheduler"]:
+                if "iters" in k:
+                    policy_config["megatron_cfg"]["scheduler"][k] *= 2
+
+    policy = Policy(
+        cluster=cluster,
+        config=policy_config,
+        tokenizer=tokenizer,
+        weights_path=Path(last_checkpoint_path) / "policy" / "weights"
+        if last_checkpoint_path
+        else None,
+        optimizer_path=Path(last_checkpoint_path) / "policy" / "optimizer"
+        if last_checkpoint_path
+        else None,
+        init_optimizer=True,
+        init_reference_model=False,
+    )
+    # print the node IP and GPU ID of the policy workers for debugging
+    policy.print_node_ip_and_gpu_id()
+
+    loss_fn = PreferenceLoss()
+    print("  ✓ Model initialized")
+
+    print("\n" + "=" * 60)
+    print(" " * 18 + "SETUP COMPLETE")
+    print("=" * 60 + "\n")
+
+    return (
+        policy,
+        cluster,
+        train_dataloader,
+        val_dataloader,
+        loss_fn,
+        logger,
+        checkpointer,
+        rm_save_state,
+        master_config,
+    )
+
+
+# =======================================================
+# Training & Validation
+# =======================================================
+def validate(
+    policy: PolicyInterface,
+    val_dataloader: dict[str, StatefulDataLoader],
+    tokenizer,
+    loss_fn,
+    step: int,
+    master_config: MasterConfig,
+    val_batches: int,
+    val_batch_size: int,
+    val_mbs: int,
+    logger: Logger,
+):
+    val_metrics, validation_timings = {}, {}
+    for val_dataset_name, v in val_dataloader.items():
+        k_val_metrics, k_validation_timings = validate_one_dataset(
+            policy=policy,
+            val_dataloader=v,
+            loss_fn=loss_fn,
+            step=step,
+            master_config=master_config,
+            val_batches=val_batches,
+            val_batch_size=val_batch_size,
+            val_mbs=val_mbs,
+            dataset_name=val_dataset_name,
+        )
+        prefix = f"validation-{val_dataset_name}"
+
+        logger.log_metrics(k_val_metrics, step, prefix=prefix)
+        logger.log_metrics(k_validation_timings, step, prefix=f"timing/{prefix}")
+
+        for metric_name in RMValMetrics.__annotations__.keys():
+            if metric_name != "num_valid_samples":
+                val_metrics[f"{prefix}_{metric_name}"] = k_val_metrics[metric_name]
+        validation_timings[prefix + "_total_validation_time"] = k_validation_timings[
+            "total_validation_time"
+        ]
+
+    if len(validation_timings) > 0:
+        total_validation_time = sum(validation_timings.values())
+        logger.log_metrics(
+            {"total_validation_time": total_validation_time},
+            step,
+            prefix="timing/validation",
+        )
+        validation_timings["total_validation_time"] = total_validation_time
+
+    return val_metrics, validation_timings
+
+
+def validate_one_dataset(
+    policy: PolicyInterface,
+    val_dataloader: StatefulDataLoader,
+    loss_fn,
+    step: int,
+    master_config: MasterConfig,
+    val_batches: int,
+    val_batch_size: int,
+    val_mbs: int,
+    dataset_name: str,
+):
+    """Run validation on one validation dataset."""
+    if val_dataloader is None:
+        assert val_dataloader is not None or master_config["dpo"]["val_period"] == 0, (
+            "val_dataloader is None, so dpo.val_period must be 0"
+        )
+        print("  ⚠️ No validation dataloader provided, skipping validation")
+        return
+
+    timer = Timer()
+
+    with timer.time("total_validation_time"):
+        print(f"▶ Starting validation at step {step} for `{dataset_name}` set..")
+
+        # Show a progress indicator for validation
+        # val_total = len(val_dataloader)
+
+        dict_val_metrics = defaultdict(list)
+        num_valid_batches = 0
+        for batch_idx, val_batch in enumerate(val_dataloader):
+            # When running validation with drop_last=False, we might end up with a partial batch.
+            # In this case, we pad the batch to the next multiple of micro_batch_size * dp_size.
+            if val_batch.size < val_batch_size * 2:
+                dp_size = policy.sharding_annotations.get_axis_size("data_parallel")
+                val_batch = maybe_pad_last_batch(val_batch, dp_size, val_mbs * 2)
+
+            ## just run model fwd
+            val_results = policy.train(
+                val_batch,
+                loss_fn,
+                eval_mode=True,
+                gbs=val_batch.size,
+                # NOTE: we double the batch size because each preference example corresponds to a pair of
+                # examples, chosen and rejected, and the pair needs to be processed as part of the same microbatch.
+                mbs=val_mbs * 2,
+            )
+
+            if len(val_results["all_mb_metrics"]) == 0:
+                warnings.warn(
+                    "No validation metrics were collected for this batch."
+                    " This is likely because there were no valid samples."
+                )
+            else:
+                for metric_name in RMValMetrics.__annotations__.keys():
+                    dict_val_metrics[metric_name] += [
+                        sum(val_results["all_mb_metrics"][metric_name])
+                    ]
+
+                num_valid_batches += 1
+
+            if val_batches > 0 and batch_idx >= val_batches - 1:
+                break
+
+        if num_valid_batches > 0:
+            sum_num_valid_samples = sum(dict_val_metrics["num_valid_samples"])
+            val_metrics = RMValMetrics(
+                num_valid_samples=sum_num_valid_samples,
+                **{
+                    metric_name: sum(
+                        [
+                            value * weight
+                            for value, weight in zip(
+                                dict_val_metrics[metric_name],
+                                dict_val_metrics["num_valid_samples"],
+                            )
+                        ]
+                    )
+                    / sum_num_valid_samples
+                    for metric_name in RMValMetrics.__annotations__.keys()
+                    if metric_name != "num_valid_samples"
+                },
+            )
+        else:
+            warnings.warn(
+                "No validation metrics were collected."
+                " This is likely because there were no valid samples in the validation set."
+            )
+            val_metrics = RMValMetrics(
+                **{
+                    metric_name: 0.0
+                    for metric_name in RMValMetrics.__annotations__.keys()
+                }
+            )
+
+        # Calculate validation metrics
+        policy.prepare_for_training()
+
+    # Get timing metrics
+    timing_metrics = timer.get_timing_metrics(reduction_op="sum")
+    validation_time = timing_metrics.get("total_validation_time", 0)
+
+    if num_valid_batches > 0:
+        # Print summary of validation results
+        print(f"\n📊 Validation Results for `{dataset_name}` set:")
+        for metric_name in RMValMetrics.__annotations__.keys():
+            if metric_name != "num_valid_samples":
+                print(f"    • Validation {metric_name}: {val_metrics[metric_name]:.4f}")
+            else:
+                print(
+                    f"    • Validation num valid samples: {val_metrics['num_valid_samples']:.0f}"
+                )
+
+        # Print timing information
+        print(f"\n  ⏱️  Validation Timing for `{dataset_name}` set:")
+        validation_time = timing_metrics.get("total_validation_time", 0)
+        print(f"    • Total validation time: {validation_time:.2f}s")
+
+    # Make sure to reset the timer after validation
+    timer.reset()
+
+    return val_metrics, timing_metrics
+
+
+def rm_train(
+    policy,
+    train_dataloader,
+    val_dataloader,
+    tokenizer,
+    loss_fn,
+    master_config,
+    logger,
+    rm_task_spec,
+    checkpointer,
+    rm_save_state,
+):
+    # Run basic rm training
+    timer = Timer()
+    timeout = TimeoutChecker(
+        timeout=master_config["checkpointing"]["checkpoint_must_save_by"],
+        fit_last_save_time=True,
+    )
+    timeout.start_iterations()
+    if rm_save_state is None:
+        rm_save_state = _default_rm_save_state()
+        current_epoch = 0
+        current_step = 0
+        total_steps = 0
+        total_valid_tokens = 0
+    else:
+        current_epoch = rm_save_state["epoch"]
+        current_step = rm_save_state["step"]
+        total_steps = rm_save_state["total_steps"]
+        total_valid_tokens = rm_save_state.get(
+            "total_valid_tokens", 0
+        )  # Default to 0 for backward compatibility with older checkpoints
+
+    rm_config = master_config["rm"]
+    # Validation configuration
+    val_period = rm_config["val_period"]
+    val_at_start = rm_config["val_at_start"]
+    max_num_epochs = rm_config["max_num_epochs"]
+
+    # Run validation at the start if configured
+    if val_at_start and total_steps == 0:
+        print("\n🔍 Running initial validation...")
+        val_metrics, validation_timings = validate(
+            policy,
+            val_dataloader,
+            tokenizer,
+            loss_fn,
+            step=0,
+            master_config=master_config,
+            val_batches=rm_config["val_batches"],
+            val_batch_size=rm_config["val_global_batch_size"],
+            val_mbs=rm_config["val_micro_batch_size"],
+            logger=logger,
+        )
+
+    policy.prepare_for_training()
+
+    while current_epoch < max_num_epochs and (
+        master_config["rm"]["max_num_steps"] == -1
+        or total_steps < master_config["rm"]["max_num_steps"]
+    ):
+        print(f"\n{'=' * 25} Epoch {current_epoch + 1}/{max_num_epochs} {'=' * 25}")
+
+        for batch in train_dataloader:
+            print(
+                f"\n{'=' * 25} Step {current_step + 1}/{min(len(train_dataloader), master_config['rm']['max_num_steps'] if master_config['rm']['max_num_steps'] != -1 else len(train_dataloader))} {'=' * 25}"
+            )
+            maybe_gpu_profile_step(policy, total_steps + 1)
+            val_metrics, validation_timings = None, None
+
+            with timer.time("total_step_time"):
+                # Prepare batch and generate responses
+                print("▶ Taking a training step...")
+
+                train_results = policy.train(
+                    batch,
+                    loss_fn,
+                    eval_mode=False,
+                    ## NOTE: we double the batch size here because each preference example corresponds to a pair of
+                    ## examples, chosen and rejected, and the pair needs to be processed as part of the same microbatch.
+                    gbs=master_config["policy"]["train_global_batch_size"] * 2,
+                    mbs=master_config["policy"]["train_micro_batch_size"] * 2,
+                )
+
+                is_last_step = (
+                    master_config["rm"]["max_num_steps"] != -1
+                    and total_steps + 1 >= master_config["rm"]["max_num_steps"]
+                ) or (
+                    current_epoch + 1 == max_num_epochs
+                    and current_step + 1 == len(train_dataloader)
+                )
+
+                # Run validation if it's a validation step
+                if val_period > 0 and (total_steps + 1) % val_period == 0:
+                    val_metrics, validation_timings = validate(
+                        policy,
+                        val_dataloader,
+                        tokenizer,
+                        loss_fn,
+                        step=total_steps + 1,
+                        master_config=master_config,
+                        val_batches=rm_config["val_batches"],
+                        val_batch_size=rm_config["val_global_batch_size"],
+                        val_mbs=rm_config["val_micro_batch_size"],
+                        logger=logger,
+                    )
+                metrics = {
+                    "loss": train_results["loss"].numpy(),
+                    "grad_norm": train_results["grad_norm"].numpy(),
+                }
+                metrics.update(train_results["all_mb_metrics"])
+                for k, v in metrics.items():
+                    if k in {"lr", "wd", "global_valid_seqs", "global_valid_toks"}:
+                        metrics[k] = np.mean(v).item()
+                    else:
+                        metrics[k] = np.sum(v).item()
+                total_valid_tokens += metrics["global_valid_toks"]
+
+                ## Checkpointing
+                timeout.mark_iteration()
+
+                rm_save_state["consumed_samples"] += master_config["policy"][
+                    "train_global_batch_size"
+                ]
+
+                should_save_by_step = (
+                    is_last_step
+                    or (total_steps + 1) % master_config["checkpointing"]["save_period"]
+                    == 0
+                )
+                should_save_by_timeout = timeout.check_save()
+
+                if master_config["checkpointing"]["enabled"] and (
+                    should_save_by_step or should_save_by_timeout
+                ):
+                    ## +1 because step is 0-indexed
+                    rm_save_state["step"] = (current_step + 1) % len(train_dataloader)
+                    rm_save_state["total_steps"] = total_steps + 1
+                    rm_save_state["epoch"] = current_epoch
+                    rm_save_state["total_valid_tokens"] = total_valid_tokens
+                    # Remove outdated validation metrics
+                    for key in list(rm_save_state):
+                        if (
+                            key.startswith("val")
+                            and any(
+                                [
+                                    key.endswith(f"_{metric_name}")
+                                    for metric_name in RMValMetrics.__annotations__.keys()
+                                    if metric_name != "num_valid_samples"
+                                ]
+                            )
+                            and (val_metrics is None or key not in val_metrics)
+                        ):
+                            del rm_save_state[key]
+                    if val_metrics is not None:
+                        rm_save_state.update(val_metrics)
+
+                    full_metric_name = master_config["checkpointing"]["metric_name"]
+                    if full_metric_name is not None:
+                        assert full_metric_name.startswith(
+                            "train:"
+                        ) or full_metric_name.startswith("val:"), (
+                            f"metric_name={full_metric_name} must start with 'val:' or 'train:',\n"
+                            f'followed by the corresponding name in the "val" or "train" metrics dictionary.'
+                            f"  If you are using an old config, please updated checkpointing.metric_name to the new format, "
+                            f" e.g. 'val_loss --> 'val:validation-default_loss'"
+                        )
+                        prefix, metric_name = full_metric_name.split(":", 1)
+                        metrics_source = metrics if prefix == "train" else val_metrics
+                        if not metrics_source:
+                            warnings.warn(
+                                f"You asked to save checkpoints based on {metric_name} but no {prefix} metrics were collected. "
+                                "This checkpoint will not be saved as top-k.",
+                                stacklevel=2,
+                            )
+                            if full_metric_name in rm_save_state:
+                                del rm_save_state[full_metric_name]
+                        elif metric_name not in metrics_source:
+                            raise ValueError(
+                                f"Metric {metric_name} not found in {prefix} metrics"
+                            )
+                        else:
+                            rm_save_state[full_metric_name] = metrics_source[
+                                metric_name
+                            ]
+
+                    with timer.time("checkpointing"):
+                        print(f"Saving checkpoint for step {total_steps + 1}...")
+                        checkpoint_path = checkpointer.init_tmp_checkpoint(
+                            total_steps + 1, rm_save_state, master_config
+                        )
+
+                        policy.save_checkpoint(
+                            weights_path=os.path.join(
+                                checkpoint_path, "policy", "weights"
+                            ),
+                            optimizer_path=os.path.join(
+                                checkpoint_path, "policy", "optimizer"
+                            ),
+                            tokenizer_path=os.path.join(
+                                checkpoint_path, "policy", "tokenizer"
+                            ),
+                            checkpointing_cfg=master_config["checkpointing"],
+                        )
+                        torch.save(
+                            train_dataloader.state_dict(),
+                            os.path.join(checkpoint_path, "train_dataloader.pt"),
+                        )
+                        checkpointer.finalize_checkpoint(checkpoint_path)
+
+            timing_metrics = timer.get_timing_metrics(reduction_op="sum")
+
+            print("\n📊 Training Results:")
+            for metric_name in RMValMetrics.__annotations__.keys():
+                if metric_name != "num_valid_samples":
+                    print(f"  • {metric_name}: {float(metrics[metric_name]):.4f}")
+                else:
+                    print(f"  • num valid samples: {float(metrics[metric_name]):.0f}")
+
+            print("\n⏱️  Timing:")
+            # Display total time first, separately
+            total_time = timing_metrics.get("total_step_time", 0)
+            print(f"  • Total step time: {total_time:.2f}s")
+
+            # Display all other timing metrics (if any)
+            for k, v in sorted(
+                timing_metrics.items(), key=lambda item: item[1], reverse=True
+            ):
+                if k != "total_step_time":
+                    percent = (v / total_time * 100) if total_time > 0 else 0
+                    print(f"  • {k}: {v:.2f}s ({percent:.1f}%)")
+
+            total_num_gpus = (
+                master_config["cluster"]["num_nodes"]
+                * master_config["cluster"]["gpus_per_node"]
+            )
+            timing_metrics["valid_tokens_per_sec_per_gpu"] = (
+                metrics["global_valid_toks"] / total_time / total_num_gpus
+            )
+            logger.log_metrics(metrics, total_steps + 1, prefix="train")
+            logger.log_metrics(timing_metrics, total_steps + 1, prefix="timing/train")
+
+            timer.reset()
+            current_step += 1
+            total_steps += 1
+
+            if should_save_by_timeout:
+                print("Timeout has been reached, stopping training early", flush=True)
+                return
+            if (
+                master_config["rm"]["max_num_steps"] != -1
+                and total_steps >= master_config["rm"]["max_num_steps"]
+            ):
+                print(
+                    "Max number of steps has been reached, stopping training early",
+                    flush=True,
+                )
+                return
+
+        current_epoch += 1
+        current_step = 0  # Reset step counter for new epoch
diff --git a/nemo_rl/algorithms/sft.py b/nemo_rl/algorithms/sft.py
index 804909c2c4..ae93145b83 100644
--- a/nemo_rl/algorithms/sft.py
+++ b/nemo_rl/algorithms/sft.py
@@ -19,14 +19,15 @@
 import numpy as np
 import torch
 from torchdata.stateful_dataloader import StatefulDataLoader
-from transformers import AutoTokenizer
+from transformers import AutoTokenizer, PreTrainedTokenizerBase
 
 from nemo_rl.algorithms.loss_functions import (
     NLLLoss,
 )
-from nemo_rl.algorithms.utils import set_seed
+from nemo_rl.algorithms.utils import maybe_pad_last_batch, set_seed
 from nemo_rl.data import DataConfig
-from nemo_rl.data.datasets import AllTaskProcessedDataset, rl_collate_fn
+from nemo_rl.data.collate_fn import rl_collate_fn
+from nemo_rl.data.datasets import AllTaskProcessedDataset
 from nemo_rl.data.interfaces import TaskDataSpec
 from nemo_rl.data.llm_message_utils import (
     add_loss_mask_to_message_log,
@@ -40,7 +41,7 @@
 from nemo_rl.utils.checkpoint import CheckpointingConfig, CheckpointManager
 from nemo_rl.utils.logger import Logger, LoggerConfig
 from nemo_rl.utils.nsys import maybe_gpu_profile_step
-from nemo_rl.utils.timer import Timer
+from nemo_rl.utils.timer import TimeoutChecker, Timer
 
 
 class SFTSaveState(TypedDict):
@@ -49,6 +50,7 @@ class SFTSaveState(TypedDict):
     total_steps: int  # Track total number of steps across all epochs
     val_loss: NotRequired[float]  # Optional field - may not be present during training
     consumed_samples: int
+    total_valid_tokens: int  # Track total number of non-padding tokens during training
 
 
 def _default_sft_save_state() -> SFTSaveState:
@@ -57,6 +59,7 @@ def _default_sft_save_state() -> SFTSaveState:
         "step": 0,
         "total_steps": 0,
         "consumed_samples": 0,
+        "total_valid_tokens": 0,
     }
 
 
@@ -134,9 +137,10 @@ def setup(
     train_dataloader = StatefulDataLoader(
         train_dataset,
         batch_size=policy_config["train_global_batch_size"],
-        shuffle=True,
+        shuffle=data_config["shuffle"],
         collate_fn=rl_collate_fn,
         drop_last=True,
+        num_workers=data_config["num_workers"],
     )
 
     if last_checkpoint_path is not None:
@@ -150,7 +154,8 @@ def setup(
         batch_size=sft_config["val_global_batch_size"],
         shuffle=False,
         collate_fn=rl_collate_fn,
-        drop_last=True,
+        drop_last=False,
+        num_workers=data_config["num_workers"],
     )
 
     # ==========================
@@ -171,10 +176,23 @@ def setup(
     #   Training
     # ==========================
     print("\n▶ Setting up model...")
+    if policy_config.get("megatron_cfg", {}).get("enabled", False):
+        total_train_iters = min(
+            sft_config["max_num_steps"],
+            sft_config["max_num_epochs"] * len(train_dataloader),
+        )
+        policy_config["megatron_cfg"]["train_iters"] = total_train_iters
+    # check if tokenizer is a processor (e.g. for VLMs)
+    processor = None
+    if not isinstance(tokenizer, PreTrainedTokenizerBase):
+        processor = tokenizer
+        tokenizer = processor.tokenizer
+
     policy = Policy(
         cluster=cluster,
         config=policy_config,
         tokenizer=tokenizer,
+        processor=processor,
         weights_path=Path(last_checkpoint_path) / "policy" / "weights"
         if last_checkpoint_path
         else None,
@@ -184,6 +202,9 @@ def setup(
         init_optimizer=True,
         init_reference_model=False,
     )
+    # print the node IP and GPU ID of the policy workers for debugging
+    policy.print_node_ip_and_gpu_id()
+
     loss_fn = NLLLoss()
     print("  ✓ Model initialized")
 
@@ -221,6 +242,9 @@ def validate(
 ):
     """Run validation on the validation dataset."""
     if val_dataloader is None:
+        assert val_dataloader is not None or master_config["dpo"]["val_period"] == 0, (
+            "val_dataloader is None, so dpo.val_period must be 0"
+        )
         print("  ⚠️ No validation dataloader provided, skipping validation")
         return
 
@@ -233,7 +257,7 @@ def validate(
         # val_total = len(val_dataloader)
 
         val_metrics = {"val_loss": 0.0}
-        num_valid_batches = 0
+        sum_num_valid_tokens = 0
 
         policy.prepare_for_training()
         for batch_idx, val_batch in enumerate(val_dataloader):
@@ -260,12 +284,20 @@ def validate(
                 }
             )
 
+            # update multimodal data
+            val_data.update(cat_and_padded.get_multimodal_dict(as_tensors=False))
+            # When running validation with drop_last=False, we might end up with a partial batch.
+            # Check if we need to pad the final batch to make it divisible by micro_batch_size * dp_size.
+            if val_data.size < val_batch_size:
+                dp_size = policy.sharding_annotations.get_axis_size("data_parallel")
+                val_data = maybe_pad_last_batch(val_data, dp_size, val_mbs)
+
             ## just run model fwd
             val_results = policy.train(
                 val_data,
                 loss_fn,
                 eval_mode=True,
-                gbs=val_batch_size,
+                gbs=val_data.size,
                 mbs=val_mbs,
             )
 
@@ -275,14 +307,17 @@ def validate(
                     " This is likely because there were no valid samples."
                 )
             else:
-                val_metrics["val_loss"] += float(val_results["loss"])
-                num_valid_batches += 1
+                num_valid_tokens = (
+                    val_data["sample_mask"].unsqueeze(-1) * val_data["token_mask"]
+                ).sum()
+                val_metrics["val_loss"] += float(val_results["loss"]) * num_valid_tokens
+                sum_num_valid_tokens += num_valid_tokens
 
             if val_batches > 0 and batch_idx >= val_batches - 1:
                 break
 
-        if num_valid_batches > 0:
-            val_metrics["val_loss"] /= num_valid_batches
+        if sum_num_valid_tokens > 0:
+            val_metrics["val_loss"] /= sum_num_valid_tokens
         else:
             warnings.warn(
                 "No validation metrics were collected."
@@ -296,7 +331,7 @@ def validate(
     timing_metrics = timer.get_timing_metrics(reduction_op="sum")
     validation_time = timing_metrics.get("total_validation_time", 0)
 
-    if num_valid_batches > 0:
+    if sum_num_valid_tokens > 0:
         # Print summary of validation results
         print("\n📊 Validation Results:")
         print(f"    • Validation loss: {val_metrics['val_loss']:.4f}")
@@ -326,16 +361,25 @@ def sft_train(
 ) -> None:
     # Run basic sft training
     timer = Timer()
+    timeout = TimeoutChecker(
+        timeout=master_config["checkpointing"]["checkpoint_must_save_by"],
+        fit_last_save_time=True,
+    )
+    timeout.start_iterations()
 
     if sft_save_state is None:
         sft_save_state = _default_sft_save_state()
         current_epoch = 0
         current_step = 0
         total_steps = 0
+        total_valid_tokens = 0
     else:
         current_epoch = sft_save_state["epoch"]
         current_step = sft_save_state["step"]
         total_steps = sft_save_state["total_steps"]
+        total_valid_tokens = sft_save_state.get(
+            "total_valid_tokens", 0
+        )  # Default to 0 for backward compatibility with older checkpoints
 
     sft_config = master_config["sft"]
     # Validation configuration
@@ -403,9 +447,13 @@ def sft_train(
                             "sample_mask": batch["loss_multiplier"],
                         }
                     )
+                    train_data.update(
+                        cat_and_padded.get_multimodal_dict(as_tensors=False)
+                    )
 
                 print("▶ Taking a training step...")
-                train_results = policy.train(train_data, loss_fn)
+                with timer.time("policy_training"):
+                    train_results = policy.train(train_data, loss_fn)
 
                 is_last_step = total_steps + 1 >= master_config["sft"][
                     "max_num_steps"
@@ -434,35 +482,68 @@ def sft_train(
                     logger.log_metrics(
                         val_metrics, total_steps + 1, prefix="validation"
                     )
+                metrics = {
+                    "loss": train_results["loss"].numpy(),
+                    "grad_norm": train_results["grad_norm"].numpy(),
+                }
+                metrics.update(train_results["all_mb_metrics"])
+                for k, v in metrics.items():
+                    if k in {"lr", "wd", "global_valid_seqs", "global_valid_toks"}:
+                        metrics[k] = np.mean(v).item()
+                    else:
+                        metrics[k] = np.sum(v).item()
+                total_valid_tokens += metrics["global_valid_toks"]
 
                 ## Checkpointing
                 sft_save_state["consumed_samples"] += master_config["policy"][
                     "train_global_batch_size"
                 ]
-                if master_config["checkpointing"]["enabled"] and (
+                timeout.mark_iteration()
+                should_save_by_step = (
                     is_last_step
                     or (total_steps + 1) % master_config["checkpointing"]["save_period"]
                     == 0
+                )
+                # +1 because step is 0-indexed
+                # Check if timeout-based checkpointing is enabled in config.
+                should_save_by_timeout = timeout.check_save()
+
+                if master_config["checkpointing"]["enabled"] and (
+                    should_save_by_step or should_save_by_timeout
                 ):
-                    ## +1 because step is 0-indexed
                     sft_save_state["step"] = (current_step + 1) % len(train_dataloader)
                     sft_save_state["total_steps"] = total_steps + 1
                     sft_save_state["epoch"] = current_epoch
-                    if val_metrics is not None:
-                        sft_save_state["val_loss"] = val_metrics["val_loss"]
-                    elif "val_loss" in sft_save_state:
-                        del sft_save_state["val_loss"]
-
-                    if master_config["checkpointing"]["metric_name"] is not None:
-                        if (
-                            master_config["checkpointing"]["metric_name"]
-                            not in sft_save_state
-                        ):
+                    sft_save_state["total_valid_tokens"] = total_valid_tokens
+
+                    full_metric_name = master_config["checkpointing"]["metric_name"]
+                    if full_metric_name is not None:
+                        assert full_metric_name.startswith(
+                            "train:"
+                        ) or full_metric_name.startswith("val:"), (
+                            f"metric_name={full_metric_name} must start with 'val:' or 'train:',\n"
+                            f'followed by the corresponding name in the "val" or "train" metrics dictionary.'
+                            f"  If you are using an old config, please updated checkpointing.metric_name to the new format, "
+                            f" e.g. 'val_loss --> 'val:val_loss'"
+                        )
+                        prefix, metric_name = full_metric_name.split(":", 1)
+                        metrics_source = metrics if prefix == "train" else val_metrics
+                        if not metrics_source:
                             warnings.warn(
-                                f"You asked to save checkpoints based on {master_config['checkpointing']['metric_name']} but the metric is not found in the save state. "
-                                "Saving most recent k checkpoints instead."
+                                f"You asked to save checkpoints based on {metric_name} but no {prefix} metrics were collected. "
+                                "This checkpoint will not be saved as top-k.",
+                                stacklevel=2,
                             )
-                            master_config["checkpointing"]["metric_name"] = None
+                            if full_metric_name in sft_save_state:
+                                del sft_save_state[full_metric_name]
+                        elif metric_name not in metrics_source:
+                            raise ValueError(
+                                f"Metric {metric_name} not found in {prefix} metrics"
+                            )
+                        else:
+                            sft_save_state[full_metric_name] = metrics_source[
+                                metric_name
+                            ]
 
                     with timer.time("checkpointing"):
                         print(f"Saving checkpoint for step {total_steps + 1}...")
@@ -480,6 +561,7 @@ def sft_train(
                             tokenizer_path=os.path.join(
                                 checkpoint_path, "policy", "tokenizer"
                             ),
+                            checkpointing_cfg=master_config["checkpointing"],
                         )
                         torch.save(
                             train_dataloader.state_dict(),
@@ -487,21 +569,26 @@ def sft_train(
                         )
                         checkpointer.finalize_checkpoint(checkpoint_path)
 
-            losses = train_results["loss"]
-            metrics = {
-                "loss": train_results["loss"].numpy(),
-                "grad_norm": train_results["grad_norm"].numpy(),
-            }
-            metrics.update(train_results["all_mb_metrics"])
-            for k, v in metrics.items():
-                if k in {"lr", "wd", "global_valid_seqs", "global_valid_toks"}:
-                    metrics[k] = np.mean(v).item()
-                else:
-                    metrics[k] = np.sum(v).item()
             timing_metrics = timer.get_timing_metrics(reduction_op="sum")
 
             print("\n📊 Training Results:")
             print(f"  • Loss: {float(metrics['loss']):.4f}")
+            if "total_flops" in train_results:
+                total_tflops = (
+                    train_results["total_flops"]
+                    / timing_metrics["policy_training"]
+                    / 1e12
+                )
+                num_ranks = train_results["num_ranks"]
+                print(
+                    f"  • Training FLOPS: {total_tflops:.2f} TFLOPS ({total_tflops / num_ranks:.2f} TFLOPS per rank)"
+                )
+                if "theoretical_tflops" in train_results:
+                    theoretical_tflops = train_results["theoretical_tflops"]
+                    print(
+                        f"  • Training Model Floating Point Utilization: {100 * total_tflops / theoretical_tflops:.2f}%"
+                    )
+                    metrics["train_fp_utilization"] = total_tflops / theoretical_tflops
             print("\n⏱️  Timing:")
             # Display total time first, separately
             total_time = timing_metrics.get("total_step_time", 0)
@@ -515,6 +602,13 @@ def sft_train(
                     percent = (v / total_time * 100) if total_time > 0 else 0
                     print(f"  • {k}: {v:.2f}s ({percent:.1f}%)")
 
+            total_num_gpus = (
+                master_config["cluster"]["num_nodes"]
+                * master_config["cluster"]["gpus_per_node"]
+            )
+            timing_metrics["valid_tokens_per_sec_per_gpu"] = (
+                metrics["global_valid_toks"] / total_time / total_num_gpus
+            )
             logger.log_metrics(metrics, total_steps + 1, prefix="train")
             logger.log_metrics(timing_metrics, total_steps + 1, prefix="timing/train")
 
@@ -522,7 +616,14 @@ def sft_train(
             current_step += 1
             total_steps += 1
 
+            if should_save_by_timeout:
+                print("Timeout has been reached, stopping training early", flush=True)
+                return
             if total_steps >= master_config["sft"]["max_num_steps"]:
+                print(
+                    "Max number of steps has been reached, stopping training early",
+                    flush=True,
+                )
                 return
 
         current_epoch += 1
diff --git a/nemo_rl/algorithms/utils.py b/nemo_rl/algorithms/utils.py
index 6d634e3ceb..1a28f5f690 100644
--- a/nemo_rl/algorithms/utils.py
+++ b/nemo_rl/algorithms/utils.py
@@ -11,31 +11,68 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+import math
 import random
 import warnings
-from functools import wraps
+from functools import partial, wraps
 from typing import Optional
 
 import numpy as np
 import torch
-from transformers import AutoTokenizer, PreTrainedTokenizerBase
+from transformers import (
+    AutoProcessor,
+    AutoTokenizer,
+    PreTrainedTokenizerBase,
+)
 
-from nemo_rl.data import hf_datasets
+from nemo_rl.data.chat_templates import COMMON_CHAT_TEMPLATES
 from nemo_rl.models.policy import TokenizerConfig
 
 
-def calculate_kl_penalty_joschu2020(
-    logprobs_policy: torch.Tensor, logprobs_reference: torch.Tensor
+def calculate_kl(
+    logprobs: torch.Tensor,
+    logprobs_reference: torch.Tensor,
+    kl_type: str = "k3",
+    input_clamp_value: float | None = 20.0,
+    output_clamp_value: float | None = 10.0,
 ) -> torch.Tensor:
-    """Calculates a per-token estimate of the KL Divergence between two log_probs.
+    """Calculates a per-token estimate of the KL Divergence between two logprobs.
+
+    From Schulman 2020, http://joschu.net/blog/kl-approx.html.
 
-    From Schulman 2020, always positive.
+    Args:
+        logprobs: torch.Tensor (b, s)
+        logprobs_reference: torch.Tensor (b, s)
+        kl_type: Type of KL approximation to use. Valid values: "k1", "k2", "k3".
+        input_clamp_value: Optional clamping value for logr to prevent numerical instability.
+                           If None, no clamping is applied.
+        output_clamp_value: Optional clamping value for kl to prevent numerical instability.
+                           If None, no clamping is applied.
 
-    logprobs_policy:    torch.Tensor (b, s)
-    logprobs_reference: torch.Tensor (b, s)
+    Returns:
+        torch.Tensor: Per-token KL penalty values (b, s)
     """
-    r = logprobs_reference - logprobs_policy
-    return torch.exp(r) - r - 1
+    logr = logprobs_reference - logprobs
+    if input_clamp_value is not None:
+        logr = logr.clamp(min=-input_clamp_value, max=input_clamp_value)
+
+    if kl_type == "k1":
+        kl = -logr
+
+    elif kl_type == "k2":
+        kl = torch.square(logr) / 2
+
+    elif kl_type == "k3":
+        kl = torch.exp(logr) - 1 - logr
+
+    else:
+        raise ValueError(f"Invalid KL type: {kl_type}")
+
+    if output_clamp_value is not None:
+        kl = kl.clamp(min=-output_clamp_value, max=output_clamp_value)
+
+    return kl
 
 
 def calculate_baseline_and_std_per_prompt(
@@ -62,11 +99,12 @@ def calculate_baseline_and_std_per_prompt(
 
     baseline = torch.zeros_like(rewards)
     sq_baseline = torch.zeros_like(rewards)
+    std = torch.zeros_like(rewards)
     device_ordinal = rewards.get_device()
     if device_ordinal == -1:
         reward_device = torch.device("cpu")
     else:
-        reward_device = torch.device(reward_device)
+        reward_device = torch.device(f"cuda:{device_ordinal}")
 
     for i in range(len(unique_prompts)):
         is_matching_prompt = (prompts == unique_prompts[i]).all(1)
@@ -105,8 +143,15 @@ def calculate_baseline_and_std_per_prompt(
 
             baseline[prompt_idx] = prompt_baseline
             sq_baseline[prompt_idx] = prompt_baseline_square
+            std[prompt_idx] = (
+                (
+                    (prompt_baseline_square - prompt_baseline.square())
+                    * (num_valid / (num_valid - 1))
+                )
+                .sqrt()
+                .nan_to_num(0)
+            )
 
-    std = (sq_baseline - baseline.square()).sqrt().nan_to_num(0)
     return baseline, std
 
 
@@ -144,7 +189,9 @@ def set_seed(seed: int) -> None:
     torch.cuda.manual_seed_all(seed)
 
 
-def get_tokenizer(tokenizer_config: TokenizerConfig) -> PreTrainedTokenizerBase:
+def get_tokenizer(
+    tokenizer_config: TokenizerConfig, get_processor: bool = False
+) -> PreTrainedTokenizerBase:
     """Get the tokenizer and set pad token to eos token if it is not already set.
 
     This function initializes a tokenizer from the Hugging Face transformers library
@@ -160,6 +207,7 @@ def get_tokenizer(tokenizer_config: TokenizerConfig) -> PreTrainedTokenizerBase:
                     - "default": Uses the tokenizer's default template
                     - A custom jinja2 template string
                     If not specified, the tokenizer's default template will be used.
+        get_processor: Whether to return a processor (via AutoProcessor) instead of a tokenizer.
 
     Returns:
         PreTrainedTokenizerBase: The configured tokenizer instance
@@ -198,25 +246,381 @@ def get_tokenizer(tokenizer_config: TokenizerConfig) -> PreTrainedTokenizerBase:
         Using custom chat template
         >>> formatted = tokenizer.apply_chat_template(messages, tokenize=False)
         >>> assert formatted == " START: You are a helpful AI assistant. END. START: Hello! END."
+
+        >>> # Requesting a processor (for multimodal models like Qwen-VL)
+        >>> config = {"name": "Qwen/Qwen2.5-VL-3B-Instruct"}
+        >>> processor = get_tokenizer(config, get_processor=True)
+        No chat template provided, using tokenizer's default
+        >>> messages = [
+        ...     {"role": "system", "content": "You are a helpful AI assistant."},
+        ...     {"role": "user", "content": "Hello!"}
+        ... ]
+        >>> formatted = processor.tokenizer.apply_chat_template(messages, tokenize=False)
+        >>> assert formatted == AutoTokenizer.from_pretrained(
+        ...     "Qwen/Qwen2.5-VL-3B-Instruct", trust_remote_code=True
+        ... ).apply_chat_template(messages, tokenize=False)
+        >>> assert processor.pad_token_id == processor.tokenizer.pad_token_id
+        >>>
         ```
     """
-    tokenizer = AutoTokenizer.from_pretrained(
-        tokenizer_config["name"], trust_remote_code=True
-    )
+    processor = None
+
+    if get_processor:
+        processor = AutoProcessor.from_pretrained(
+            tokenizer_config["name"], trust_remote_code=True, use_fast=True
+        )
+        tokenizer = processor.tokenizer
+    else:
+        tokenizer = AutoTokenizer.from_pretrained(
+            tokenizer_config["name"], trust_remote_code=True
+        )
+
     if tokenizer.pad_token is None:
         tokenizer.pad_token = tokenizer.eos_token
+
     if "chat_template" in tokenizer_config:
         if tokenizer_config["chat_template"] is None:
             print("Using passthrough chat template")
-            tokenizer.chat_template = (
-                hf_datasets.COMMON_CHAT_TEMPLATES.passthrough_prompt_response
-            )
+            tokenizer.chat_template = COMMON_CHAT_TEMPLATES.passthrough_prompt_response
         elif tokenizer_config["chat_template"].lower() == "default":
             print("Using tokenizer's default chat template")
+        elif tokenizer_config["chat_template"].endswith(".jinja"):
+            # Load template from file
+            template_path = tokenizer_config["chat_template"]
+            print(f"Loading chat template from file: {template_path}")
+            with open(template_path, "r") as f:
+                tokenizer.chat_template = f.read()
         else:
             print("Using custom chat template")
             tokenizer.chat_template = tokenizer_config["chat_template"]
     else:
         print("No chat template provided, using tokenizer's default")
 
-    return tokenizer
+    if (
+        "chat_template_kwargs" in tokenizer_config
+        and tokenizer_config["chat_template_kwargs"] is not None
+    ):
+        assert isinstance(tokenizer_config["chat_template_kwargs"], dict), (
+            "chat_template_kwargs should be a dictionary"
+        )
+        tokenizer.apply_chat_template = partial(
+            tokenizer.apply_chat_template, **tokenizer_config["chat_template_kwargs"]
+        )
+
+    # The "tokenizer" is passed to the policy workers only to use the pad/eos/bos tokens for extra padding and processing of the tokenized messages. That is the only reason it is needed.
+    # However, the dataloader needs the processor for multimodal data preprocessing, so the processor is needed for the dataloader (only tokenizer is NOT enough).
+    # Inheriting special keys from the tokenizer is a minimal change that doesn't disturb the rest of the SFT pipeline
+    if processor is not None:
+        processor.pad_token = tokenizer.pad_token
+        processor.eos_token = tokenizer.eos_token
+        processor.bos_token = tokenizer.bos_token
+        processor.pad_token_id = tokenizer.pad_token_id
+        processor.eos_token_id = tokenizer.eos_token_id
+        processor.bos_token_id = tokenizer.bos_token_id
+        # copy name_or_path from tokenizer to processor for logging
+        processor.name_or_path = tokenizer.name_or_path
+
+    return tokenizer if processor is None else processor
+
+
+def maybe_pad_last_batch(batch: dict, dp_size: int, mbs: int) -> dict:
+    """Pads the given batch so that its size is divisible by (mbs * dp_size).
+
+    Args:
+        batch (dict): The batch to pad.
+        dp_size (int): Data parallel size.
+        mbs (int): Micro batch size.
+
+    Returns:
+        dict: The padded batch.
+    """
+    min_padding = (math.ceil(batch.size / (mbs * dp_size)) * mbs * dp_size) - batch.size
+    if min_padding > 0:
+        print(f"Padding last validation batch with {min_padding} padding samples")
+        # Pad input_ids
+        batch["input_ids"] = torch.cat(
+            [
+                batch["input_ids"],
+                batch["input_ids"][-1].unsqueeze(0).repeat(min_padding, 1),
+            ]
+        )
+        # Pad input_lengths
+        batch["input_lengths"] = torch.cat(
+            [
+                batch["input_lengths"],
+                batch["input_lengths"][-1].unsqueeze(0).repeat(min_padding),
+            ]
+        )
+        if "token_mask" in batch:
+            # Pad token_mask
+            batch["token_mask"] = torch.cat(
+                [
+                    batch["token_mask"],
+                    batch["token_mask"][-1].unsqueeze(0).repeat(min_padding, 1),
+                ]
+            )
+        # Pad sample_mask
+        batch["sample_mask"] = torch.cat(
+            [
+                batch["sample_mask"],
+                torch.zeros_like(batch["sample_mask"][-1])
+                .unsqueeze(0)
+                .repeat(min_padding),
+            ]
+        )
+
+        if "reference_policy_logprobs" in batch:
+            # Pad reference_policy_logprobs
+            batch["reference_policy_logprobs"] = torch.cat(
+                [
+                    batch["reference_policy_logprobs"],
+                    batch["reference_policy_logprobs"][-1]
+                    .unsqueeze(0)
+                    .repeat(min_padding, 1),
+                ]
+            )
+    return batch
+
+
+def print_performance_metrics(
+    train_results: dict[str, float],
+    metrics: dict[str, float],
+    timing_metrics: dict[str, float],
+    master_config: dict,
+) -> dict[str, float]:
+    """Print performance metrics for GRPO."""
+
+    # =====================================================
+    # Generate Token Imbalance Visualization
+    # =====================================================
+    def visualize_per_worker_load(per_worker_token_counts: dict[int, int]) -> float:
+        per_worker_token_counts_list = [
+            v for k, v in sorted(per_worker_token_counts.items())
+        ]
+        per_worker_load_ratio = [
+            v / max(per_worker_token_counts_list) for v in per_worker_token_counts_list
+        ]
+        max_rows_to_print = 100
+        print("  • Visualizing Token Imbalance per Generation Worker:")
+        for i in range(min(len(per_worker_token_counts_list), max_rows_to_print)):
+            print(
+                f"    - Generated Tokens from Worker {i:3.0f}:"
+                f"{'■' * int(per_worker_load_ratio[i] * 10)}"
+                f"{'□' * (10 - int(per_worker_load_ratio[i] * 10))}"
+                f" Count: {per_worker_token_counts_list[i] / 1000:.1f}K"
+            )
+        estimated_idle_ratio = 1 - sum(per_worker_load_ratio) / len(
+            per_worker_load_ratio
+        )
+        print(f"  • Average Token Imbalance: {100 * estimated_idle_ratio:.2f}%")
+        return estimated_idle_ratio
+
+    print("\n🔍 Performance Metrics:")
+    performance_metrics = {}
+
+    if "per_worker_token_counts" in metrics:
+        # Can be a list of each trajectory
+        if isinstance(metrics["per_worker_token_counts"], list):
+            per_worker_token_counts = {}
+            for trajectory_metrics in metrics["per_worker_token_counts"]:
+                for worker_idx, token_count in trajectory_metrics.items():
+                    per_worker_token_counts[worker_idx] = (
+                        per_worker_token_counts.get(worker_idx, 0) + token_count
+                    )
+        elif isinstance(metrics["per_worker_token_counts"], dict):
+            per_worker_token_counts = metrics["per_worker_token_counts"]
+        else:
+            per_worker_token_counts = None
+
+        if per_worker_token_counts is not None:
+            average_token_imbalance = visualize_per_worker_load(per_worker_token_counts)
+            performance_metrics["average_token_imbalance"] = average_token_imbalance
+
+    if "mean_total_tokens_per_sample" in metrics:
+        print(
+            f"  • Mean Total Tokens per Sample: {metrics['mean_total_tokens_per_sample']:.2f}"
+        )
+
+    # =====================================================
+    # Throughputs
+    # =====================================================
+
+    policy_and_reference_logprobs_time = timing_metrics["policy_and_reference_logprobs"]
+    policy_training_time = timing_metrics["policy_training"]
+    total_time = timing_metrics["total_step_time"]
+    refit_time = (
+        timing_metrics["weight_sync"]
+        if "weight_sync" in timing_metrics
+        else timing_metrics["prepare_for_generation/total"]
+    )
+    if "generation" in timing_metrics:  # Sync GRPO
+        generation_time = timing_metrics["generation"]
+    else:  # Async GRPO
+        # If the training time is greater than the generation time, we include the idle time caused by training as part of the generation time.
+        # if training time > generation time, generation time = training time
+        # if training time < generation time, generation time = training time + exposed generation time
+        generation_time = (
+            timing_metrics["exposed_generation"]
+            + timing_metrics["policy_and_reference_logprobs"]
+            + timing_metrics["policy_training"]
+        )
+
+    num_nodes = master_config["cluster"]["num_nodes"]
+    gpus_per_node = master_config["cluster"]["gpus_per_node"]
+    total_num_gpus = num_nodes * gpus_per_node
+    colocated_inference = master_config["policy"]["generation"]["colocated"]["enabled"]
+
+    # Idle Time from Training Worker (Async GRPO only)
+    if (
+        "async_grpo" in master_config and master_config["async_grpo"]["enabled"]
+    ) and not colocated_inference:
+        # async grpo
+        exposed_generation_time = timing_metrics["exposed_generation"]
+        training_worker_idle_time_ratio = (
+            0
+            if exposed_generation_time > 0.1
+            else exposed_generation_time
+            / (
+                policy_training_time
+                + policy_and_reference_logprobs_time
+                + exposed_generation_time
+                + refit_time
+            )
+        )
+        print(
+            f"  • Training Worker Idle Time Ratio: {100 * training_worker_idle_time_ratio:.2f}%"
+        )
+        performance_metrics["training_worker_idle_time_ratio"] = (
+            training_worker_idle_time_ratio
+        )
+
+    number_of_samples_per_step = (
+        master_config["grpo"]["num_prompts_per_step"]
+        * master_config["grpo"]["num_generations_per_prompt"]
+    )
+
+    if colocated_inference:
+        training_num_gpus = total_num_gpus
+        generation_num_gpus = total_num_gpus
+    else:
+        generation_num_nodes = (
+            master_config["policy"]["generation"]["colocated"]["resources"]["num_nodes"]
+            or 1
+        )
+        generation_num_gpus = (
+            master_config["policy"]["generation"]["colocated"]["resources"][
+                "gpus_per_node"
+            ]
+            * generation_num_nodes
+        )
+        training_num_gpus = total_num_gpus - generation_num_gpus
+
+    e2e_samples_per_sec_per_gpu = (
+        number_of_samples_per_step / total_time / total_num_gpus
+    )
+
+    e2e_tokens_per_sec_per_gpu = (
+        metrics["total_num_tokens"] / total_time / total_num_gpus
+    )
+    policy_training_tokens_per_sec_per_gpu = (
+        metrics["total_num_tokens"] / policy_training_time / training_num_gpus
+    )
+    policy_and_reference_logprobs_tokens_per_sec_per_gpu = (
+        metrics["total_num_tokens"]
+        / policy_and_reference_logprobs_time
+        / training_num_gpus
+    )
+    training_worker_group_tokens_per_sec_per_gpu = (
+        metrics["total_num_tokens"]
+        / (policy_training_time + policy_and_reference_logprobs_time)
+        / training_num_gpus
+    )
+    generation_tokens_per_sec_per_gpu = (
+        metrics["total_num_tokens"] / generation_time / generation_num_gpus
+    )
+
+    print("  • Throughputs (per GPU):")
+    print(f"    - E2E (Samples/sec/gpu): {e2e_samples_per_sec_per_gpu:.2f}")
+    print(f"    - E2E (Tokens/sec/gpu): {e2e_tokens_per_sec_per_gpu:.2f}")
+    print(
+        f"    - Policy Training (Tokens/sec/gpu): {policy_training_tokens_per_sec_per_gpu:.2f}"
+    )
+    print(
+        f"    - Policy and Reference Logprobs (Tokens/sec/gpu): {policy_and_reference_logprobs_tokens_per_sec_per_gpu:.2f}"
+    )
+    print(
+        f"    - Training Worker Group (Tokens/sec/gpu): {training_worker_group_tokens_per_sec_per_gpu:.2f}"
+    )
+    print(
+        f"    - Generation Worker Group (Tokens/sec/gpu): {generation_tokens_per_sec_per_gpu:.2f}"
+    )
+
+    print("  • Throughputs (per Group):")
+    print(
+        f"    - E2E (Samples/sec): {(e2e_samples_per_sec_per_gpu * total_num_gpus):.2f}"
+    )
+    print(
+        f"    - E2E (Tokens/sec): {(e2e_tokens_per_sec_per_gpu * total_num_gpus):.2f}"
+    )
+    print(
+        f"    - Training Worker Group (Tokens/sec): {(training_worker_group_tokens_per_sec_per_gpu * training_num_gpus):.2f}"
+    )
+    print(
+        f"    - Generation Worker Group (Tokens/sec): {(generation_tokens_per_sec_per_gpu * generation_num_gpus):.2f}"
+    )
+
+    # =====================================================
+    # FLOPS
+    # =====================================================
+
+    if "total_flops" in train_results:
+        total_tflops = (
+            train_results["total_flops"] / timing_metrics["policy_training"] / 1e12
+        )
+        num_ranks = train_results["num_ranks"]
+        print(
+            f"  • Training FLOPS: {total_tflops:.2f} TFLOPS ({total_tflops / num_ranks:.2f} TFLOPS per rank)",
+            flush=True,
+        )
+        performance_metrics["train_flops_per_gpu"] = total_tflops / num_ranks
+        if "theoretical_tflops" in train_results:
+            theoretical_tflops = train_results["theoretical_tflops"]
+            print(
+                f"  • Training Model Floating Point Utilization: {100 * total_tflops / theoretical_tflops:.2f}%",
+                flush=True,
+            )
+            performance_metrics["train_fp_utilization"] = (
+                total_tflops / theoretical_tflops
+            )
+
+    # =====================================================
+    # Clean up metrics
+    # =====================================================
+
+    # Clean up metrics to avoid wandb logging errors
+    # Dict structures cannot be logged to wandb
+    if "per_worker_token_counts" in metrics:
+        del metrics["per_worker_token_counts"]
+
+    # =====================================================
+    # Logging
+    # =====================================================
+
+    performance_metrics.update(
+        {
+            "samples_per_sec": e2e_samples_per_sec_per_gpu * total_num_gpus,
+            "tokens_per_sec": e2e_tokens_per_sec_per_gpu * total_num_gpus,
+            "samples_per_sec_per_gpu": e2e_samples_per_sec_per_gpu,
+            "tokens_per_sec_per_gpu": e2e_tokens_per_sec_per_gpu,
+            "policy_training_tokens_per_sec_per_gpu": policy_training_tokens_per_sec_per_gpu,
+            "policy_and_reference_logprobs_tokens_per_sec_per_gpu": policy_and_reference_logprobs_tokens_per_sec_per_gpu,
+            "training_worker_group_tokens_per_sec_per_gpu": training_worker_group_tokens_per_sec_per_gpu,
+            "generation_tokens_per_sec_per_gpu": generation_tokens_per_sec_per_gpu,
+            "training_worker_group_tokens_per_sec": training_worker_group_tokens_per_sec_per_gpu
+            * training_num_gpus,
+            "generation_tokens_per_sec": generation_tokens_per_sec_per_gpu
+            * generation_num_gpus,
+        }
+    )
+
+    return performance_metrics
diff --git a/nemo_rl/converters/__init__.py b/nemo_rl/converters/__init__.py
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/nemo_rl/converters/huggingface/__init__.py b/nemo_rl/converters/huggingface/__init__.py
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/nemo_rl/converters/megatron/__init__.py b/nemo_rl/converters/megatron/__init__.py
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/nemo_rl/data/__init__.py b/nemo_rl/data/__init__.py
index 9a9ce4b23a..3e40c9d78c 100644
--- a/nemo_rl/data/__init__.py
+++ b/nemo_rl/data/__init__.py
@@ -12,24 +12,135 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import NotRequired, TypedDict
+from typing import Literal, NotRequired, TypedDict
 
 
+# TODO: split this typed dict up so it can be PreferenceDataConfig | ResponseDataConfig | etc
+#       so that we can type check the configs more rigorously as opposed to saying everything
+#       is not required.
 class DataConfig(TypedDict):
     max_input_seq_length: int
-    prompt_file: NotRequired[str]
-    system_prompt_file: NotRequired[str]
+    prompt_file: NotRequired[str | None]
+    system_prompt_file: NotRequired[str | None]
     dataset_name: str
     val_dataset_name: NotRequired[str]
     add_bos: NotRequired[bool]
     add_eos: NotRequired[bool]
     input_key: NotRequired[str]
-    output_key: NotRequired[str]
+    output_key: NotRequired[str | None]
     add_generation_prompt: NotRequired[bool]
     add_system_prompt: NotRequired[bool]
-    split: NotRequired[str]
+    split: NotRequired[str | None]
+    shuffle: bool
+    seed: NotRequired[int | None]
+    download_dir: NotRequired[str]
+    train_data_path: NotRequired[str]
+    val_data_paths: NotRequired[dict[str, str]]
+    # Number of data loader workers.
+    # Set to 8 or 10 for large batches to improve loading speed.
+    # This saturates CPU threads without consuming too much memory
+    # However, setting it too high might cause memory issues for long seqlens.
+    num_workers: NotRequired[int]
 
 
-class MathDataConfig(DataConfig):
+# ===============================================================================
+# Eval Dataset Configs
+# ===============================================================================
+# These configs correspond to the eval datasets in data/datasets/eval_datasets/
+# Note: TypedDict doesn't allow narrowing types in child classes, so each config
+# is defined independently with common fields repeated.
+
+
+class MMLUEvalDataConfig(TypedDict):
+    """Config for MMLU and multilingual MMLU datasets.
+
+    Supports dataset_name: "mmlu" or "mmlu_{language}" where language is one of:
+    AR-XY, BN-BD, DE-DE, EN-US, ES-LA, FR-FR, HI-IN, ID-ID, IT-IT, JA-JP,
+    KO-KR, PT-BR, ZH-CN, SW-KE, YO-NG
+    """
+
+    max_input_seq_length: int
+    dataset_name: Literal[
+        "mmlu",
+        "mmlu_AR-XY",
+        "mmlu_BN-BD",
+        "mmlu_DE-DE",
+        "mmlu_EN-US",
+        "mmlu_ES-LA",
+        "mmlu_FR-FR",
+        "mmlu_HI-IN",
+        "mmlu_ID-ID",
+        "mmlu_IT-IT",
+        "mmlu_JA-JP",
+        "mmlu_KO-KR",
+        "mmlu_PT-BR",
+        "mmlu_ZH-CN",
+        "mmlu_SW-KE",
+        "mmlu_YO-NG",
+    ]
+    prompt_file: NotRequired[str | None]
+    system_prompt_file: NotRequired[str | None]
+
+
+class MMLUProEvalDataConfig(TypedDict):
+    """Config for MMLU Pro dataset."""
+
+    max_input_seq_length: int
+    dataset_name: Literal["mmlu_pro"]
+    prompt_file: NotRequired[str | None]
+    system_prompt_file: NotRequired[str | None]
+
+
+class AIMEEvalDataConfig(TypedDict):
+    """Config for AIME datasets."""
+
+    max_input_seq_length: int
+    dataset_name: Literal["aime2024", "aime2025"]
+    prompt_file: NotRequired[str | None]
+    system_prompt_file: NotRequired[str | None]
+
+
+class GPQAEvalDataConfig(TypedDict):
+    """Config for GPQA datasets."""
+
+    max_input_seq_length: int
+    dataset_name: Literal["gpqa", "gpqa_diamond"]
+    prompt_file: NotRequired[str | None]
+    system_prompt_file: NotRequired[str | None]
+
+
+class MathEvalDataConfig(TypedDict):
+    """Config for Math datasets."""
+
+    max_input_seq_length: int
+    dataset_name: Literal["math", "math500"]
+    prompt_file: NotRequired[str | None]
+    system_prompt_file: NotRequired[str | None]
+
+
+class LocalMathEvalDataConfig(TypedDict):
+    """Config for local math datasets loaded from files.
+
+    dataset_name can be a URL or local file path.
+    Requires additional fields: problem_key, solution_key, file_format, split.
+    """
+
+    max_input_seq_length: int
+    dataset_name: str  # URL or file path
     problem_key: str
     solution_key: str
+    file_format: Literal["csv", "json"]
+    split: NotRequired[str | None]
+    prompt_file: NotRequired[str | None]
+    system_prompt_file: NotRequired[str | None]
+
+
+# Union type for all eval dataset configs
+EvalDataConfigType = (
+    MMLUEvalDataConfig
+    | MMLUProEvalDataConfig
+    | AIMEEvalDataConfig
+    | GPQAEvalDataConfig
+    | MathEvalDataConfig
+    | LocalMathEvalDataConfig
+)
diff --git a/nemo_rl/data/hf_datasets/chat_templates.py b/nemo_rl/data/chat_templates.py
similarity index 100%
rename from nemo_rl/data/hf_datasets/chat_templates.py
rename to nemo_rl/data/chat_templates.py
diff --git a/nemo_rl/data/datasets.py b/nemo_rl/data/collate_fn.py
similarity index 57%
rename from nemo_rl/data/datasets.py
rename to nemo_rl/data/collate_fn.py
index 3542aa1e42..e1ca489455 100644
--- a/nemo_rl/data/datasets.py
+++ b/nemo_rl/data/collate_fn.py
@@ -11,107 +11,19 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Any, Optional, Union
+from typing import Any, Union
 
 import torch
-from datasets import Dataset
-from transformers import PreTrainedTokenizerBase
+from transformers import AutoProcessor, PreTrainedTokenizerBase
 
-from nemo_rl.data.interfaces import (
-    DatumSpec,
-    DPODatumSpec,
-    TaskDataProcessFnCallable,
-    TaskDataSpec,
-)
+from nemo_rl.data.interfaces import DatumSpec, DPODatumSpec
 from nemo_rl.data.llm_message_utils import (
     add_loss_mask_to_message_log,
     batched_message_log_to_flat_message,
 )
 from nemo_rl.distributed.batched_data_dict import BatchedDataDict
 
-TokenizerType = PreTrainedTokenizerBase
-
-
-# TODO @sahilj handle too-long prompts and masking them out throughout the whole process and renormalizing on loss
-class AllTaskProcessedDataset:
-    """Dataset for processing single or multi-task data with task-specific tokenization and processing.
-
-    Args:
-        dataset: Input dataset containing raw data
-        tokenizer: Tokenizer for text processing
-        default_task_data_spec: Default task processing specifications.
-            In the case of single-task, this is the spec used for processing all entries.
-            In the case of multi-task, any values not specified in the task-specific specs will be taken from the default spec.
-        task_data_processors: Either a single TaskDataProcessFnCallable for single-task,
-            or a dict mapping task names to (TaskDataSpec, TaskDataProcessFnCallable) for multi-task
-        max_seq_length: Maximum sequence length for tokenized outputs
-    """
-
-    def __init__(
-        self,
-        dataset: Dataset | Any,
-        tokenizer: TokenizerType,
-        default_task_data_spec: TaskDataSpec,
-        task_data_processors: (
-            dict[str, tuple[TaskDataSpec, TaskDataProcessFnCallable]]
-            | TaskDataProcessFnCallable
-        ),
-        max_seq_length: Optional[int] = None,
-    ):
-        self.dataset = dataset
-        self.tokenizer = tokenizer
-        self.default_task_data_spec = default_task_data_spec
-        self.task_data_processors = task_data_processors
-        self.max_seq_length = max_seq_length
-
-        if isinstance(task_data_processors, dict):
-            # apply defaults to all task data specs
-            for task_name, (
-                task_data_spec,
-                task_data_processor,
-            ) in task_data_processors.items():
-                task_data_spec.copy_defaults(self.default_task_data_spec)
-
-    def __len__(self) -> int:
-        return len(self.dataset)
-
-    def encode_single(
-        self, text: Union[str, list[str]]
-    ) -> tuple[list[int] | torch.Tensor, int]:
-        """Takes either a single string or a list of strings that represent multiple turns for the same conversation.
-
-        Returns a single (concatenated) list of tokenized ids and the length of the tokenized ids.
-        """
-        if isinstance(text, str):
-            text_ids = self.tokenizer.text_to_ids(text)
-            return text_ids, len(text_ids)
-        elif isinstance(text, list):
-            text_ids = [self.tokenizer.text_to_ids(t) for t in text]
-            return torch.cat(text_ids), sum(len(t) for t in text_ids)
-        else:
-            raise ValueError(
-                f"text must be a string or a list of strings, got {type(text)}"
-            )
-
-    def __getitem__(self, idx: int) -> DatumSpec:
-        """Return a single prompt."""
-        entry = self.dataset[idx]
-
-        if isinstance(self.task_data_processors, dict):
-            task_name = entry["task_name"]
-
-            assert task_name in self.task_data_processors, (
-                f"task processor not provided for {task_name}. Provided processors: {self.task_data_processors.keys()}"
-            )
-            task_data_spec, task_data_processor = self.task_data_processors[task_name]
-        else:
-            task_data_spec = self.default_task_data_spec
-            task_data_processor = self.task_data_processors
-
-        datum_spec = task_data_processor(
-            entry, task_data_spec, self.tokenizer, self.max_seq_length, idx
-        )
-        return datum_spec
+TokenizerType = Union[PreTrainedTokenizerBase, AutoProcessor]
 
 
 def rl_collate_fn(data_batch: list[DatumSpec]) -> BatchedDataDict[Any]:
@@ -133,6 +45,20 @@ def rl_collate_fn(data_batch: list[DatumSpec]) -> BatchedDataDict[Any]:
     # Extract stop_strings if present
     stop_strings = [datum.get("stop_strings", None) for datum in data_batch]
 
+    # check if any of the data batch has vllm content and images
+    extra_args = {}
+    if any(
+        [datum_spec.get("vllm_content", None) is not None for datum_spec in data_batch]
+    ):
+        vllm_content = [
+            datum_spec.get("vllm_content", None) for datum_spec in data_batch
+        ]
+        vllm_images = [datum_spec.get("vllm_images", []) for datum_spec in data_batch]
+        vllm_videos = [datum_spec.get("vllm_videos", []) for datum_spec in data_batch]
+        extra_args["vllm_content"] = vllm_content
+        extra_args["vllm_images"] = vllm_images
+        extra_args["vllm_videos"] = vllm_videos
+
     output: BatchedDataDict[Any] = BatchedDataDict(
         message_log=message_log,
         length=length,
@@ -142,6 +68,7 @@ def rl_collate_fn(data_batch: list[DatumSpec]) -> BatchedDataDict[Any]:
         idx=idx,
         batch_max_length=batch_max_length,
         stop_strings=stop_strings,
+        **extra_args,
     )
     return output
 
@@ -161,7 +88,7 @@ def eval_collate_fn(data_batch: list[DatumSpec]) -> BatchedDataDict[Any]:
     Examples:
     ```{doctest}
     >>> import torch
-    >>> from nemo_rl.data.datasets import eval_collate_fn
+    >>> from nemo_rl.data.collate_fn import eval_collate_fn
     >>> from nemo_rl.data.interfaces import DatumSpec
     >>> data_batch = [
     ...     DatumSpec(
@@ -197,16 +124,25 @@ def eval_collate_fn(data_batch: list[DatumSpec]) -> BatchedDataDict[Any]:
     return output
 
 
-def dpo_collate_fn(
+def preference_collate_fn(
     data_batch: list[DPODatumSpec],
     tokenizer: TokenizerType,
     make_sequence_length_divisible_by: int,
+    add_loss_mask: bool,
 ) -> BatchedDataDict[Any]:
-    """Collate function for DPO training.
+    """Collate function for preference data training.
 
     This function separates the chosen and rejected responses to create
     two examples per prompt. The chosen and rejected examples are interleaved
     along the batch dimension, resulting in a batch size of 2 * len(data_batch).
+
+    Args:
+        data_batch: List of data samples with message_log_chosen, message_log_rejected, length_chosen, length_rejected, loss_multiplier, idx, and task_name fields.
+        tokenizer: Tokenizer for text processing
+        make_sequence_length_divisible_by: Make the sequence length divisible by this value
+        add_loss_mask: Whether to add a token_mask to the returned data
+    Returns:
+        BatchedDataDict with input_ids, input_lengths, token_mask (optional), and sample_mask fields.
     """
     message_log = []
     length = []
@@ -236,11 +172,11 @@ def dpo_collate_fn(
         batch_max_length=batch_max_length,
     )
 
-    ## add loss mask based on role to every message
-    add_loss_mask_to_message_log(
-        batch["message_log"],
-        only_unmask_final=True,
-    )
+    if add_loss_mask:
+        add_loss_mask_to_message_log(
+            batch["message_log"],
+            only_unmask_final=True,
+        )
 
     cat_and_padded, input_lengths = batched_message_log_to_flat_message(
         batch["message_log"],
@@ -248,13 +184,14 @@ def dpo_collate_fn(
         make_sequence_length_divisible_by=make_sequence_length_divisible_by,
     )
 
-    train_data: BatchedDataDict[Any] = BatchedDataDict(
+    data: BatchedDataDict[Any] = BatchedDataDict(
         {
             "input_ids": cat_and_padded["token_ids"],
             "input_lengths": input_lengths,
-            "token_mask": cat_and_padded["token_loss_mask"],
-            "sample_mask": loss_multiplier_batch,
+            "sample_mask": batch["loss_multiplier"],
         }
     )
+    if add_loss_mask:
+        data["token_mask"] = cat_and_padded["token_loss_mask"]
 
-    return train_data
+    return data
diff --git a/3rdparty/NeMo-workspace/is_nemo_installed.py b/nemo_rl/data/datasets/__init__.py
similarity index 54%
rename from 3rdparty/NeMo-workspace/is_nemo_installed.py
rename to nemo_rl/data/datasets/__init__.py
index 4eeadc0006..f859705dba 100644
--- a/3rdparty/NeMo-workspace/is_nemo_installed.py
+++ b/nemo_rl/data/datasets/__init__.py
@@ -11,17 +11,16 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import contextlib
-import io
+from nemo_rl.data.datasets.eval_datasets import load_eval_dataset
+from nemo_rl.data.datasets.preference_datasets import load_preference_dataset
+from nemo_rl.data.datasets.processed_dataset import AllTaskProcessedDataset
+from nemo_rl.data.datasets.response_datasets import load_response_dataset
+from nemo_rl.data.datasets.utils import assert_no_double_bos
 
-try:
-    with (
-        contextlib.redirect_stdout(io.StringIO()),
-        contextlib.redirect_stderr(io.StringIO()),
-    ):
-        # Silence the logging because NeMo is very verbose
-        from nemo.tron.init import initialize_megatron  # noqa: F401
-    INSTALLED = True
-except ImportError:
-    INSTALLED = False
-print(f"NeMo {INSTALLED=}")
+__all__ = [
+    "AllTaskProcessedDataset",
+    "load_eval_dataset",
+    "load_preference_dataset",
+    "load_response_dataset",
+    "assert_no_double_bos",
+]
diff --git a/nemo_rl/data/eval_datasets/__init__.py b/nemo_rl/data/datasets/eval_datasets/__init__.py
similarity index 77%
rename from nemo_rl/data/eval_datasets/__init__.py
rename to nemo_rl/data/datasets/eval_datasets/__init__.py
index e99a7c6af2..8386286c83 100644
--- a/nemo_rl/data/eval_datasets/__init__.py
+++ b/nemo_rl/data/datasets/eval_datasets/__init__.py
@@ -12,17 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from nemo_rl.data.eval_datasets.aime2024 import AIME2024Dataset
-from nemo_rl.data.eval_datasets.gpqa import GPQADataset
-from nemo_rl.data.eval_datasets.local_math_dataset import LocalMathDataset
-from nemo_rl.data.eval_datasets.math import MathDataset
-from nemo_rl.data.eval_datasets.mmlu import MMLUDataset
-from nemo_rl.data.eval_datasets.mmlu_pro import MMLUProDataset
+from nemo_rl.data.datasets.eval_datasets.aime import AIMEDataset
+from nemo_rl.data.datasets.eval_datasets.gpqa import GPQADataset
+from nemo_rl.data.datasets.eval_datasets.local_math_dataset import LocalMathDataset
+from nemo_rl.data.datasets.eval_datasets.math import MathDataset
+from nemo_rl.data.datasets.eval_datasets.mmlu import MMLUDataset
+from nemo_rl.data.datasets.eval_datasets.mmlu_pro import MMLUProDataset
 
 
 def load_eval_dataset(data_config):
     """Loads evaluation dataset."""
     dataset_name = data_config["dataset_name"]
+
+    # mmlu
     if dataset_name.startswith("mmlu") and dataset_name != "mmlu_pro":
         if dataset_name == "mmlu":
             base_dataset = MMLUDataset(
@@ -36,11 +38,25 @@ def load_eval_dataset(data_config):
                 prompt_file=data_config["prompt_file"],
                 system_prompt_file=data_config["system_prompt_file"],
             )
+    elif dataset_name == "mmlu_pro":
+        base_dataset = MMLUProDataset(
+            prompt_file=data_config["prompt_file"],
+            system_prompt_file=data_config["system_prompt_file"],
+        )
+    # aime
     elif dataset_name == "aime2024":
-        base_dataset = AIME2024Dataset(
+        base_dataset = AIMEDataset(
+            variant="2024",
+            prompt_file=data_config["prompt_file"],
+            system_prompt_file=data_config["system_prompt_file"],
+        )
+    elif dataset_name == "aime2025":
+        base_dataset = AIMEDataset(
+            variant="2025",
             prompt_file=data_config["prompt_file"],
             system_prompt_file=data_config["system_prompt_file"],
         )
+    # gpqa
     elif dataset_name == "gpqa":
         base_dataset = GPQADataset(
             variant="main",
@@ -53,11 +69,7 @@ def load_eval_dataset(data_config):
             prompt_file=data_config["prompt_file"],
             system_prompt_file=data_config["system_prompt_file"],
         )
-    elif dataset_name == "mmlu_pro":
-        base_dataset = MMLUProDataset(
-            prompt_file=data_config["prompt_file"],
-            system_prompt_file=data_config["system_prompt_file"],
-        )
+    # math
     elif dataset_name == "math":
         base_dataset = MathDataset(
             variant="math_test",
@@ -70,10 +82,11 @@ def load_eval_dataset(data_config):
             prompt_file=data_config["prompt_file"],
             system_prompt_file=data_config["system_prompt_file"],
         )
-    elif dataset_name == "local":
+    # fall back to local dataset
+    else:
+        print(f"Loading dataset from {dataset_name}...")
         base_dataset = LocalMathDataset(
-            name=dataset_name,
-            data_paths=data_config["data_paths"],
+            data_path=dataset_name,
             problem_key=data_config["problem_key"],
             solution_key=data_config["solution_key"],
             file_format=data_config["file_format"],
@@ -81,13 +94,12 @@ def load_eval_dataset(data_config):
             prompt_file=data_config["prompt_file"],
             system_prompt_file=data_config["system_prompt_file"],
         )
-    else:
-        raise ValueError(f"Unknown dataset {dataset_name}.")
+
     return base_dataset
 
 
 __all__ = [
-    "AIME2024Dataset",
+    "AIMEDataset",
     "GPQADataset",
     "LocalMathDataset",
     "MathDataset",
diff --git a/nemo_rl/data/eval_datasets/aime2024.py b/nemo_rl/data/datasets/eval_datasets/aime.py
similarity index 60%
rename from nemo_rl/data/eval_datasets/aime2024.py
rename to nemo_rl/data/datasets/eval_datasets/aime.py
index 9e585bb511..671b410a69 100644
--- a/nemo_rl/data/eval_datasets/aime2024.py
+++ b/nemo_rl/data/datasets/eval_datasets/aime.py
@@ -12,26 +12,37 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""AIME 2024 dataset."""
+"""AIME dataset."""
 
-from typing import Any, Optional
+from typing import Any, Literal, Optional
 
-from datasets import load_dataset
+from datasets import concatenate_datasets, load_dataset
 
 from nemo_rl.data import processors
 from nemo_rl.data.interfaces import TaskDataSpec
 
 
-class AIME2024Dataset:
+class AIMEDataset:
     def __init__(
         self,
+        variant: Literal["2024", "2025"] = "2025",
         prompt_file: Optional[str] = None,
         system_prompt_file: Optional[str] = None,
     ):
-        ds = load_dataset("HuggingFaceH4/aime_2024", split="train")
+        if variant == "2024":
+            ds = load_dataset("HuggingFaceH4/aime_2024", split="train")
+            self.input_key = "problem"
+        elif variant == "2025":
+            ds0 = load_dataset("opencompass/AIME2025", "AIME2025-I", split="test")
+            ds1 = load_dataset("opencompass/AIME2025", "AIME2025-II", split="test")
+            ds = concatenate_datasets([ds0, ds1])
+            self.input_key = "question"
+        else:
+            raise ValueError(f"Invalid variant for aime dataset: aime{variant}")
+
         self.rekeyed_ds = ds.map(self._rekey, remove_columns=ds.column_names)
         self.task_spec = TaskDataSpec(
-            task_name="aime2024",
+            task_name=f"aime{variant}",
             prompt_file=prompt_file,
             system_prompt_file=system_prompt_file,
         )
@@ -39,6 +50,6 @@ def __init__(
 
     def _rekey(self, data: dict[str, Any]):
         return {
-            "problem": data["problem"],
+            "problem": data[self.input_key],
             "expected_answer": data["answer"],
         }
diff --git a/nemo_rl/data/eval_datasets/gpqa.py b/nemo_rl/data/datasets/eval_datasets/gpqa.py
similarity index 100%
rename from nemo_rl/data/eval_datasets/gpqa.py
rename to nemo_rl/data/datasets/eval_datasets/gpqa.py
diff --git a/nemo_rl/data/eval_datasets/local_math_dataset.py b/nemo_rl/data/datasets/eval_datasets/local_math_dataset.py
similarity index 91%
rename from nemo_rl/data/eval_datasets/local_math_dataset.py
rename to nemo_rl/data/datasets/eval_datasets/local_math_dataset.py
index 2810899b4a..7ea90ca498 100644
--- a/nemo_rl/data/eval_datasets/local_math_dataset.py
+++ b/nemo_rl/data/datasets/eval_datasets/local_math_dataset.py
@@ -14,6 +14,7 @@
 
 """Local math dataset."""
 
+import os
 from typing import Any, Literal, Optional
 
 from datasets import load_dataset
@@ -25,23 +26,22 @@
 class LocalMathDataset:
     def __init__(
         self,
-        data_paths: str | list[str],
+        data_path: str,
         problem_key: str,
         solution_key: str,
-        name: str,
         split: Optional[str] = None,
         file_format: Literal["csv", "json"] = "csv",
         prompt_file: Optional[str] = None,
         system_prompt_file: Optional[str] = None,
     ):
-        ds = load_dataset(file_format, data_files=data_paths)
+        ds = load_dataset(file_format, data_files=data_path)
         if split is not None:
             ds = ds[split]
         self._problem_key = problem_key
         self._solution_key = solution_key
         self.rekeyed_ds = ds.map(self._rekey, remove_columns=ds.column_names)
         self.task_spec = TaskDataSpec(
-            task_name=name,
+            task_name=os.path.basename(data_path).split(".")[0],
             prompt_file=prompt_file,
             system_prompt_file=system_prompt_file,
         )
diff --git a/nemo_rl/data/eval_datasets/math.py b/nemo_rl/data/datasets/eval_datasets/math.py
similarity index 100%
rename from nemo_rl/data/eval_datasets/math.py
rename to nemo_rl/data/datasets/eval_datasets/math.py
diff --git a/nemo_rl/data/eval_datasets/mmlu.py b/nemo_rl/data/datasets/eval_datasets/mmlu.py
similarity index 100%
rename from nemo_rl/data/eval_datasets/mmlu.py
rename to nemo_rl/data/datasets/eval_datasets/mmlu.py
diff --git a/nemo_rl/data/eval_datasets/mmlu_pro.py b/nemo_rl/data/datasets/eval_datasets/mmlu_pro.py
similarity index 100%
rename from nemo_rl/data/eval_datasets/mmlu_pro.py
rename to nemo_rl/data/datasets/eval_datasets/mmlu_pro.py
diff --git a/nemo_rl/data/datasets/preference_datasets/__init__.py b/nemo_rl/data/datasets/preference_datasets/__init__.py
new file mode 100644
index 0000000000..6593d0bd7a
--- /dev/null
+++ b/nemo_rl/data/datasets/preference_datasets/__init__.py
@@ -0,0 +1,86 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from nemo_rl.data.datasets.preference_datasets.binary_preference_dataset import (
+    BinaryPreferenceDataset,
+)
+from nemo_rl.data.datasets.preference_datasets.helpsteer3 import HelpSteer3Dataset
+from nemo_rl.data.datasets.preference_datasets.preference_dataset import (
+    PreferenceDataset,
+)
+from nemo_rl.data.datasets.preference_datasets.tulu3 import Tulu3PreferenceDataset
+from nemo_rl.data.datasets.utils import get_extra_kwargs
+
+
+def load_preference_dataset(data_config):
+    """Loads preference dataset."""
+    dataset_name = data_config["dataset_name"]
+
+    if dataset_name == "HelpSteer3":
+        base_dataset = HelpSteer3Dataset()
+    elif dataset_name == "Tulu3Preference":
+        base_dataset = Tulu3PreferenceDataset()
+    # fall back to load from JSON file
+    elif dataset_name == "BinaryPreferenceDataset":
+        if "train_data_path" not in data_config:
+            raise ValueError(
+                "train_data_path is required for dataset_name=BinaryPreferenceDataset."
+            )
+        extra_kwargs = get_extra_kwargs(
+            data_config,
+            [
+                "val_data_path",
+                "prompt_key",
+                "chosen_key",
+                "rejected_key",
+                "train_split",
+                "val_split",
+            ],
+        )
+        base_dataset = BinaryPreferenceDataset(
+            train_data_path=data_config["train_data_path"],
+            **extra_kwargs,
+        )
+    elif dataset_name == "PreferenceDataset":
+        if "train_data_path" not in data_config:
+            raise ValueError(
+                "train_data_path is required for dataset_name=PreferenceDataset."
+            )
+        extra_kwargs = get_extra_kwargs(
+            data_config,
+            [
+                "val_data_path",
+                "train_split",
+                "val_split",
+            ],
+        )
+        base_dataset = PreferenceDataset(
+            train_data_path=data_config["train_data_path"],
+            **extra_kwargs,
+        )
+    else:
+        raise ValueError(
+            f"Unsupported {dataset_name=}. "
+            "Please either set dataset_name in {'HelpSteer3', 'Tulu3Preference'} to use a built-in dataset "
+            "or set dataset_name in {'PreferenceDataset', 'BinaryPreferenceDataset'} to load from local JSONL file or HuggingFace."
+        )
+
+    return base_dataset
+
+
+__all__ = [
+    "BinaryPreferenceDataset",
+    "HelpSteer3Dataset",
+    "PreferenceDataset",
+    "Tulu3PreferenceDataset",
+]
diff --git a/nemo_rl/data/datasets/preference_datasets/binary_preference_dataset.py b/nemo_rl/data/datasets/preference_datasets/binary_preference_dataset.py
new file mode 100644
index 0000000000..1a166cdd7e
--- /dev/null
+++ b/nemo_rl/data/datasets/preference_datasets/binary_preference_dataset.py
@@ -0,0 +1,110 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Optional
+
+from nemo_rl.data.datasets.utils import load_dataset_from_path
+from nemo_rl.data.interfaces import TaskDataSpec
+
+
+def to_preference_data_format(
+    data: dict[str, Any], prompt_key: str, chosen_key: str, rejected_key: str
+) -> dict[str, list[dict[str, Any]]]:
+    return {
+        "context": data[prompt_key]
+        if isinstance(data[prompt_key], list)
+        else [{"role": "user", "content": data[prompt_key]}],
+        "completions": [
+            {
+                "rank": 0,
+                "completion": [{"role": "assistant", "content": data[chosen_key]}],
+            },
+            {
+                "rank": 1,
+                "completion": [{"role": "assistant", "content": data[rejected_key]}],
+            },
+        ],
+    }
+
+
+class BinaryPreferenceDataset:
+    """Dataset class for binary preference data which can be loaded from a JSON file.
+
+    This class handles loading of preference data for DPO and RM training.
+    It will be converted to the format of PreferenceDataset through the `to_preference_data_format` function.
+
+    The input JSONL files should contain valid JSON objects formatted like this:
+    {
+        prompt_key: str,    # The input prompt/context
+        chosen_key: str,    # The preferred/winning response
+        rejected_key: str,  # The non-preferred/losing response
+    }
+
+    Args:
+        train_data_path: Path to the JSON file containing training data
+        val_data_path: Path to the JSON file containing validation data
+        prompt_key: Key for the input prompt/context, default is "prompt"
+        chosen_key: Key for the preferred/winning response, default is "chosen"
+        rejected_key: Key for the non-preferred/losing response, default is "rejected"
+        train_split: Split name for the training data, used for HuggingFace datasets, default is None
+        val_split: Split name for the validation data, used for HuggingFace datasets, default is None
+    """
+
+    def __init__(
+        self,
+        train_data_path: str,
+        val_data_path: Optional[str] = None,
+        prompt_key: str = "prompt",
+        chosen_key: str = "chosen",
+        rejected_key: str = "rejected",
+        train_split: Optional[str] = None,
+        val_split: Optional[str] = None,
+    ):
+        self.prompt_key = prompt_key
+        self.chosen_key = chosen_key
+        self.rejected_key = rejected_key
+
+        # load from json file or huggingface
+        train_ds = load_dataset_from_path(train_data_path, train_split)
+        if val_data_path:
+            val_ds = load_dataset_from_path(val_data_path, val_split)
+        else:
+            val_ds = None
+
+        # format the dataset
+        # convert to PreferenceDataset format
+        train_ds = train_ds.map(
+            to_preference_data_format,
+            fn_kwargs={
+                "prompt_key": prompt_key,
+                "chosen_key": chosen_key,
+                "rejected_key": rejected_key,
+            },
+        )
+        if val_ds:
+            val_ds = val_ds.map(
+                to_preference_data_format,
+                fn_kwargs={
+                    "prompt_key": prompt_key,
+                    "chosen_key": chosen_key,
+                    "rejected_key": rejected_key,
+                },
+            )
+
+        # store the formatted dataset
+        self.formatted_ds = {
+            "train": train_ds,
+            "validation": val_ds,
+        }
+
+        self.task_spec = TaskDataSpec(task_name="BinaryPreferenceDataset")
diff --git a/nemo_rl/data/hf_datasets/helpsteer3.py b/nemo_rl/data/datasets/preference_datasets/helpsteer3.py
similarity index 74%
rename from nemo_rl/data/hf_datasets/helpsteer3.py
rename to nemo_rl/data/datasets/preference_datasets/helpsteer3.py
index 7d694c4c06..e80fbff302 100644
--- a/nemo_rl/data/hf_datasets/helpsteer3.py
+++ b/nemo_rl/data/datasets/preference_datasets/helpsteer3.py
@@ -19,7 +19,11 @@
 from nemo_rl.data.interfaces import TaskDataSpec
 
 
-def format_helpsteer3(data: dict[str, Any]) -> dict[str, str | dict[str, str]]:
+def to_preference_data_format(
+    data: dict[str, Any],
+) -> dict[
+    str, list[dict[str, int | list[dict[str, str | Any]]]] | list[dict[str, str]]
+]:
     response_1 = data["response1"]
     response_2 = data["response2"]
     overall_preference = data["overall_preference"]
@@ -40,9 +44,13 @@ def format_helpsteer3(data: dict[str, Any]) -> dict[str, str | dict[str, str]]:
         rejected = response_1
 
     return {
-        "prompt": data["context"],
-        "chosen_response": chosen,
-        "rejected_response": rejected,
+        "context": [{"role": "user", "content": data["context"]}]
+        if isinstance(data["context"], str)
+        else data["context"],
+        "completions": [
+            {"rank": 0, "completion": [{"role": "assistant", "content": chosen}]},
+            {"rank": 1, "completion": [{"role": "assistant", "content": rejected}]},
+        ],
     }
 
 
@@ -51,7 +59,7 @@ class HelpSteer3Dataset:
 
     def __init__(self) -> None:
         ds = load_dataset("nvidia/HelpSteer3", "preference")
-        self.formatted_ds = ds.map(format_helpsteer3)
+        self.formatted_ds = ds.map(to_preference_data_format)
 
         self.task_spec = TaskDataSpec(
             task_name="HelpSteer3",
diff --git a/nemo_rl/data/datasets/preference_datasets/preference_dataset.py b/nemo_rl/data/datasets/preference_datasets/preference_dataset.py
new file mode 100644
index 0000000000..a168fc2360
--- /dev/null
+++ b/nemo_rl/data/datasets/preference_datasets/preference_dataset.py
@@ -0,0 +1,61 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Optional
+
+from nemo_rl.data.datasets.utils import load_dataset_from_path
+from nemo_rl.data.interfaces import TaskDataSpec
+
+
+class PreferenceDataset:
+    """Dataset class for preference data which can be loaded from a JSON file.
+
+    This class handles loading of preference data for DPO and RM training.
+    The input JSONL files should contain valid JSON objects formatted like this:
+    {
+        "context": list of dicts, # The prompt message (including previous turns, if any)
+        "completions": list of dicts, # The list of completions
+            {
+                "rank": int, # The rank of the completion (lower rank is preferred)
+                "completion": list of dicts, # The completion message(s)
+            }
+    }
+
+    Args:
+        train_data_path: Path to the JSON file containing training data
+        val_data_path: Path to the JSON file containing validation data
+        train_split: Split name for the training data, used for HuggingFace datasets, default is None
+        val_split: Split name for the validation data, used for HuggingFace datasets, default is None
+    """
+
+    def __init__(
+        self,
+        train_data_path: str,
+        val_data_path: Optional[str] = None,
+        train_split: Optional[str] = None,
+        val_split: Optional[str] = None,
+    ):
+        # load from json file or huggingface
+        train_ds = load_dataset_from_path(train_data_path, train_split)
+        if val_data_path:
+            val_ds = load_dataset_from_path(val_data_path, val_split)
+        else:
+            val_ds = None
+
+        # store the formatted dataset
+        self.formatted_ds = {
+            "train": train_ds,
+            "validation": val_ds,
+        }
+
+        self.task_spec = TaskDataSpec(task_name="PreferenceDataset")
diff --git a/nemo_rl/data/datasets/preference_datasets/tulu3.py b/nemo_rl/data/datasets/preference_datasets/tulu3.py
new file mode 100644
index 0000000000..20e381da7e
--- /dev/null
+++ b/nemo_rl/data/datasets/preference_datasets/tulu3.py
@@ -0,0 +1,81 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+from typing import Any
+
+from datasets import load_dataset
+
+from nemo_rl.data.interfaces import TaskDataSpec
+
+
+def to_preference_data_format(
+    data: dict[str, Any],
+) -> dict[
+    str, list[dict[str, int | list[dict[str, str | Any]]]] | list[dict[str, str]]
+]:
+    chosen_conversation = data["chosen"]
+    rejected_conversation = data["rejected"]
+
+    context = chosen_conversation[:-1]
+
+    # We assume that except last assistant response, all messages in
+    # chosen and rejected conversations are similar. Validating this...
+    assert json.dumps(context, ensure_ascii=False) == json.dumps(
+        rejected_conversation[:-1], ensure_ascii=False
+    ), (
+        f"Context mismatch.\n\nchosen: {chosen_conversation}\n\n rejected: {rejected_conversation}"
+    )
+
+    # We assume that last response is always from the assistant. Validating this...
+    assert chosen_conversation[-1]["role"] == "assistant", (
+        f"The last chosen response ({chosen_conversation[-1]}) is not from assistant!"
+    )
+    assert rejected_conversation[-1]["role"] == "assistant", (
+        f"The last rejected response ({rejected_conversation[-1]}) is not from assistant!"
+    )
+
+    chosen_response = chosen_conversation[-1]["content"]
+    rejected_response = rejected_conversation[-1]["content"]
+
+    return {
+        "context": context,
+        "completions": [
+            {
+                "rank": 0,
+                "completion": [{"role": "assistant", "content": chosen_response}],
+            },
+            {
+                "rank": 1,
+                "completion": [{"role": "assistant", "content": rejected_response}],
+            },
+        ],
+    }
+
+
+class Tulu3PreferenceDataset:
+    """Tulu3 preference dataset for DPO training."""
+
+    def __init__(self) -> None:
+        ds = load_dataset(
+            path="allenai/llama-3.1-tulu-3-8b-preference-mixture",
+            trust_remote_code=True,
+        )
+        self.formatted_ds = ds.map(to_preference_data_format)
+        # Tulu3 preference dataset has no validation set
+        self.formatted_ds["validation"] = None
+
+        self.task_spec = TaskDataSpec(
+            task_name="Tulu3Preference",
+        )
diff --git a/nemo_rl/data/datasets/processed_dataset.py b/nemo_rl/data/datasets/processed_dataset.py
new file mode 100644
index 0000000000..906ab591fc
--- /dev/null
+++ b/nemo_rl/data/datasets/processed_dataset.py
@@ -0,0 +1,126 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Optional, Union
+
+import torch
+from datasets import Dataset
+from transformers import AutoProcessor, PreTrainedTokenizerBase
+
+from nemo_rl.data.datasets.utils import assert_no_double_bos
+from nemo_rl.data.interfaces import (
+    DatumSpec,
+    TaskDataProcessFnCallable,
+    TaskDataSpec,
+)
+
+TokenizerType = Union[PreTrainedTokenizerBase, AutoProcessor]
+
+
+# TODO @sahilj handle too-long prompts and masking them out throughout the whole process and renormalizing on loss
+class AllTaskProcessedDataset:
+    """Dataset for processing single or multi-task data with task-specific tokenization and processing.
+
+    Args:
+        dataset: Input dataset containing raw data
+        tokenizer: Tokenizer for text processing
+        default_task_data_spec: Default task processing specifications.
+            In the case of single-task, this is the spec used for processing all entries.
+            In the case of multi-task, any values not specified in the task-specific specs will be taken from the default spec.
+        task_data_processors: Either a single TaskDataProcessFnCallable for single-task,
+            or a dict mapping task names to (TaskDataSpec, TaskDataProcessFnCallable) for multi-task
+        max_seq_length: Maximum sequence length for tokenized outputs
+    """
+
+    def __init__(
+        self,
+        dataset: Dataset | Any,
+        tokenizer: TokenizerType,
+        default_task_data_spec: TaskDataSpec,
+        task_data_processors: (
+            dict[str, tuple[TaskDataSpec, TaskDataProcessFnCallable]]
+            | TaskDataProcessFnCallable
+        ),
+        max_seq_length: Optional[int] = None,
+    ):
+        self.dataset = dataset
+        self.tokenizer = tokenizer
+        self.default_task_data_spec = default_task_data_spec
+        self.task_data_processors = task_data_processors
+        self.max_seq_length = max_seq_length
+        self._bos_checked = False
+
+        if isinstance(task_data_processors, dict):
+            # apply defaults to all task data specs
+            for task_name, (
+                task_data_spec,
+                task_data_processor,
+            ) in task_data_processors.items():
+                task_data_spec.copy_defaults(self.default_task_data_spec)
+
+    def __len__(self) -> int:
+        return len(self.dataset)
+
+    def encode_single(
+        self, text: Union[str, list[str]]
+    ) -> tuple[list[int] | torch.Tensor, int]:
+        """Takes either a single string or a list of strings that represent multiple turns for the same conversation.
+
+        Returns a single (concatenated) list of tokenized ids and the length of the tokenized ids.
+        """
+        if isinstance(text, str):
+            text_ids = self.tokenizer.text_to_ids(text)
+            return text_ids, len(text_ids)
+        elif isinstance(text, list):
+            text_ids = [self.tokenizer.text_to_ids(t) for t in text]
+            return torch.cat(text_ids), sum(len(t) for t in text_ids)
+        else:
+            raise ValueError(
+                f"text must be a string or a list of strings, got {type(text)}"
+            )
+
+    def __getitem__(self, idx: int) -> DatumSpec:
+        """Return a single prompt."""
+        entry = self.dataset[idx]
+
+        if isinstance(self.task_data_processors, dict):
+            task_name = entry["task_name"]
+
+            assert task_name in self.task_data_processors, (
+                f"task processor not provided for {task_name}. Provided processors: {self.task_data_processors.keys()}"
+            )
+            task_data_spec, task_data_processor = self.task_data_processors[task_name]
+        else:
+            task_data_spec = self.default_task_data_spec
+            task_data_processor = self.task_data_processors
+
+        datum_spec = task_data_processor(
+            entry, task_data_spec, self.tokenizer, self.max_seq_length, idx
+        )
+
+        # Check the first processed item for BOS token assertion
+        if (
+            not self._bos_checked
+            and "message_log" in datum_spec
+            and datum_spec["message_log"]
+        ):
+            first_message = datum_spec["message_log"][0]
+            if "token_ids" in first_message:
+                token_ids = first_message["token_ids"]
+                assert isinstance(token_ids, torch.Tensor), (
+                    f"token_ids must be a torch.Tensor, got {type(token_ids)}"
+                )
+                assert_no_double_bos(token_ids, self.tokenizer)
+            self._bos_checked = True
+
+        return datum_spec
diff --git a/nemo_rl/data/datasets/response_datasets/__init__.py b/nemo_rl/data/datasets/response_datasets/__init__.py
new file mode 100644
index 0000000000..8e75a99a0c
--- /dev/null
+++ b/nemo_rl/data/datasets/response_datasets/__init__.py
@@ -0,0 +1,137 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any
+
+from nemo_rl.data.datasets.response_datasets.clevr import CLEVRCoGenTDataset
+from nemo_rl.data.datasets.response_datasets.dapo_math import DAPOMath17KDataset
+from nemo_rl.data.datasets.response_datasets.deepscaler import DeepScalerDataset
+from nemo_rl.data.datasets.response_datasets.geometry3k import Geometry3KDataset
+from nemo_rl.data.datasets.response_datasets.oai_format_dataset import (
+    OpenAIFormatDataset,
+)
+from nemo_rl.data.datasets.response_datasets.oasst import OasstDataset
+from nemo_rl.data.datasets.response_datasets.openmathinstruct2 import (
+    OpenMathInstruct2Dataset,
+)
+from nemo_rl.data.datasets.response_datasets.refcoco import RefCOCODataset
+from nemo_rl.data.datasets.response_datasets.response_dataset import ResponseDataset
+from nemo_rl.data.datasets.response_datasets.squad import SquadDataset
+from nemo_rl.data.datasets.utils import get_extra_kwargs
+
+
+def load_response_dataset(data_config, seed: int = 42):
+    """Loads response dataset."""
+    dataset_name = data_config["dataset_name"]
+
+    # TODO @yukih: remove duplicated dataset_name (openmathinstruct2, clevr_cogent)
+    # for sft training
+    if dataset_name == "open_assistant":
+        base_dataset = OasstDataset(
+            output_dir="/tmp/open_assistant",
+            seed=seed,
+        )
+    elif dataset_name == "squad":
+        base_dataset = SquadDataset()
+    elif dataset_name == "openmathinstruct2":
+        base_dataset = OpenMathInstruct2Dataset(
+            split=data_config["split"],
+            output_key=data_config["output_key"],
+            prompt_file=data_config["prompt_file"],
+            seed=seed,
+        )
+    elif dataset_name == "clevr_cogent":
+        base_dataset = CLEVRCoGenTDataset(
+            split=data_config["split"],
+            prompt_file=data_config["prompt_file"],
+        )
+    elif dataset_name == "openai_format":
+        base_dataset = OpenAIFormatDataset(
+            data_config["train_data_path"],
+            data_config["val_data_path"],
+            data_config["chat_key"],
+            data_config["system_key"],
+            data_config["system_prompt"],
+            data_config["tool_key"],
+            data_config["use_preserving_dataset"],
+        )
+    # for rl training
+    elif dataset_name == "OpenMathInstruct-2":
+        print("Loading nvidia/OpenMathInstruct2Dataset for training and validation")
+        base_dataset: Any = OpenMathInstruct2Dataset(seed=seed)
+    elif dataset_name == "DeepScaler":
+        print(
+            "Loading agentica-org/DeepScaleR-Preview-Dataset for training and validation"
+        )
+        base_dataset: Any = DeepScalerDataset(seed=seed)
+    elif dataset_name == "DAPOMath17K":
+        print(
+            "Loading BytedTsinghua-SIA/DAPO-Math-17k for training and AIME 2024 for validation"
+        )
+        base_dataset: Any = DAPOMath17KDataset(seed=seed)
+    # for vlm rl training
+    elif dataset_name == "clevr-cogent":
+        base_dataset: Any = CLEVRCoGenTDataset(
+            split=data_config["split"],
+        )
+    elif dataset_name == "refcoco":
+        base_dataset: Any = RefCOCODataset(
+            split=data_config["split"],
+            download_dir=data_config["download_dir"],
+        )
+    elif dataset_name == "geometry3k":
+        base_dataset: Any = Geometry3KDataset(
+            split=data_config["split"],
+        )
+    # fall back to load from JSON file
+    elif dataset_name == "ResponseDataset":
+        if "train_data_path" not in data_config:
+            raise ValueError(
+                "train_data_path is required when dataset_name is not one of the built-ins."
+            )
+        extra_kwargs = get_extra_kwargs(
+            data_config,
+            [
+                "val_data_path",
+                "input_key",
+                "output_key",
+                "train_split",
+                "val_split",
+            ],
+        )
+        base_dataset = ResponseDataset(
+            train_data_path=data_config["train_data_path"],
+            **extra_kwargs,
+        )
+    else:
+        raise ValueError(
+            f"Unsupported {dataset_name=}. "
+            "Please either use a built-in dataset "
+            "or set dataset_name=ResponseDataset to load from local JSONL file or HuggingFace."
+        )
+
+    return base_dataset
+
+
+__all__ = [
+    "CLEVRCoGenTDataset",
+    "DeepScalerDataset",
+    "DAPOMath17KDataset",
+    "Geometry3KDataset",
+    "OpenAIFormatDataset",
+    "OasstDataset",
+    "OpenMathInstruct2Dataset",
+    "RefCOCODataset",
+    "ResponseDataset",
+    "SquadDataset",
+]
diff --git a/nemo_rl/data/datasets/response_datasets/clevr.py b/nemo_rl/data/datasets/response_datasets/clevr.py
new file mode 100644
index 0000000000..843bf34414
--- /dev/null
+++ b/nemo_rl/data/datasets/response_datasets/clevr.py
@@ -0,0 +1,123 @@
+## Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any, Optional
+
+from datasets import load_dataset
+
+from nemo_rl.data.datasets.utils import pil_to_base64
+from nemo_rl.data.interfaces import TaskDataSpec
+
+
+def format_answer_fromtags(answer: str) -> str:
+    """Extract content between <answer> tags and strip whitespace."""
+    import re
+
+    pattern = r"<answer>(.*?)</answer>"
+    match = re.search(pattern, answer)
+    ret = match.group(1).strip() if match else answer.strip()
+    return ret
+
+
+def format_clevr_cogent_dataset(
+    example: dict[str, Any], return_pil: bool = False
+) -> dict[str, Any]:
+    """Format the CLEVR-CoGenT dataset into an OpenAI-API-like message log."""
+    user_content = [
+        {
+            "type": "image",
+            "image": pil_to_base64(example["image"])
+            if not return_pil
+            else example["image"],
+        },
+        {
+            "type": "text",
+            "text": str(example["problem"]),
+        },
+    ]
+
+    assistant_content = format_answer_fromtags(str(example["solution"]))
+
+    ret = {
+        "messages": [
+            {"role": "user", "content": user_content},
+            {
+                "role": "assistant",
+                "content": assistant_content,
+            },
+        ],
+        "task_name": "clevr-cogent",
+    }
+    return ret
+
+
+# contain different variants of the CLEVR dataset
+def prepare_clevr_cogent_dataset(
+    split: str = "trainA", task_name: Optional[str] = None
+):
+    if task_name is None:
+        task_name = "clevr-cogent"
+
+    if split == "trainA":
+        tr_dataset = load_dataset("MMInstruction/Clevr_CoGenT_TrainA_70K_Complex")[
+            "train"
+        ]
+        val_dataset = load_dataset("MMInstruction/Clevr_CoGenT_ValA")["train"]
+    elif split == "trainB":
+        tr_dataset = load_dataset("MMInstruction/Clevr_CoGenT_TrainA_70K_Complex")[
+            "train"
+        ]
+        val_dataset = load_dataset("MMInstruction/Clevr_CoGenT_ValB")["train"]
+    elif split == "valA":
+        tr_dataset = load_dataset("MMInstruction/Clevr_CoGenT_ValA")["train"]
+        val_dataset = load_dataset("MMInstruction/Clevr_CoGenT_ValA")["train"]
+    elif split == "valB":
+        tr_dataset = load_dataset("MMInstruction/Clevr_CoGenT_ValB")["train"]
+        val_dataset = load_dataset("MMInstruction/Clevr_CoGenT_ValB")["train"]
+
+    # format - disable features to avoid schema conflicts
+    tr_dataset = tr_dataset.add_column("task_name", [task_name] * len(tr_dataset))
+    val_dataset = val_dataset.add_column("task_name", [task_name] * len(val_dataset))
+
+    return {
+        "train": tr_dataset,
+        "validation": val_dataset,
+    }
+
+
+class CLEVRCoGenTDataset:
+    def __init__(
+        self,
+        split: str = "trainA",
+        prompt_file: Optional[str] = None,
+    ):
+        """Simple wrapper around the CLEVR-CoGenT dataset.
+
+        Args:
+            split: The split of the dataset to use.
+            prompt_file: The file containing the prompt for the dataset.
+        """
+        if split not in ["trainA", "trainB", "valA", "valB"]:
+            raise ValueError(
+                f"Invalid split: {split}. Please use 'trainA', 'trainB', 'valA', or 'valB'."
+            )
+        self.task_name = "clevr-cogent"
+
+        self.formatted_ds = prepare_clevr_cogent_dataset(
+            split=split, task_name=self.task_name
+        )
+        self.task_spec = TaskDataSpec(
+            task_name="CLEVR",
+            prompt_file=prompt_file,
+        )
diff --git a/nemo_rl/data/datasets/response_datasets/dapo_math.py b/nemo_rl/data/datasets/response_datasets/dapo_math.py
new file mode 100644
index 0000000000..d1920ed5a0
--- /dev/null
+++ b/nemo_rl/data/datasets/response_datasets/dapo_math.py
@@ -0,0 +1,75 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from typing import Any
+
+from datasets import Dataset, load_dataset
+
+from nemo_rl.data.interfaces import TaskDataSpec
+
+
+def format_dapo_math_17k(
+    data: dict[str, str | float | int],
+) -> dict[str, list[Any] | str]:
+    return {
+        "messages": [
+            {
+                "role": "user",
+                "content": data["prompt"][0]["content"],
+            },
+            {
+                "role": "assistant",
+                "content": data["reward_model"]["ground_truth"],
+            },
+        ],
+        "task_name": "math",
+    }
+
+
+def prepare_dapo_math_17k_dataset(seed: int = 42) -> dict[str, Dataset | None]:
+    """Load and split the DeepScaler dataset into train and test sets."""
+    # Load the original dataset for training
+    train_ds = load_dataset("BytedTsinghua-SIA/DAPO-Math-17k", split="train")
+
+    # Load hendrydong/aime24 dataset for validation
+    val_ds = load_dataset("BytedTsinghua-SIA/AIME-2024", split="train")
+
+    # Shuffle the training dataset with the specified seed
+    train_ds = train_ds.shuffle(seed=seed)
+
+    # Format the examples, removing original columns
+    train_formatted = train_ds.map(
+        format_dapo_math_17k, remove_columns=train_ds.column_names
+    )
+    val_formatted = val_ds.map(format_dapo_math_17k, remove_columns=val_ds.column_names)
+
+    return {
+        "train": train_formatted,
+        "validation": val_formatted,
+    }
+
+
+class DAPOMath17KDataset:
+    def __init__(self, seed: int = 42) -> None:
+        """Initialize the DAPO Math 17K dataset with train split.
+
+        Args:
+            seed: Random seed for reproducible splitting
+        """
+        self.formatted_ds = prepare_dapo_math_17k_dataset(seed=seed)
+
+        self.task_spec = TaskDataSpec(
+            task_name="DAPOMath17K",
+        )
diff --git a/nemo_rl/data/hf_datasets/deepscaler.py b/nemo_rl/data/datasets/response_datasets/deepscaler.py
similarity index 100%
rename from nemo_rl/data/hf_datasets/deepscaler.py
rename to nemo_rl/data/datasets/response_datasets/deepscaler.py
diff --git a/nemo_rl/data/datasets/response_datasets/geometry3k.py b/nemo_rl/data/datasets/response_datasets/geometry3k.py
new file mode 100644
index 0000000000..deaac1e8e7
--- /dev/null
+++ b/nemo_rl/data/datasets/response_datasets/geometry3k.py
@@ -0,0 +1,101 @@
+## Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any, Optional
+
+from datasets import load_dataset
+
+from nemo_rl.data.datasets.utils import pil_to_base64
+from nemo_rl.data.interfaces import TaskDataSpec
+
+
+def format_geometry3k_dataset(
+    example: dict[str, Any], return_pil: bool = False
+) -> dict[str, Any]:
+    """Format the Geometry3K dataset into an OpenAI-API-like message log."""
+    # isolate single image
+    example["image"] = (
+        example["images"][0]
+        if isinstance(example["images"], list)
+        else example["images"]
+    )
+
+    user_content = [
+        {
+            "type": "image",
+            "image": pil_to_base64(example["image"])
+            if not return_pil
+            else example["image"],
+        },
+        {
+            "type": "text",
+            "text": str(example["problem"]).replace("<image>", ""),
+        },
+    ]
+
+    assistant_content = str(example["answer"])
+
+    ret = {
+        "messages": [
+            {"role": "user", "content": user_content},
+            {
+                "role": "assistant",
+                "content": assistant_content,
+            },
+        ],
+        "task_name": "geometry3k",
+    }
+    return ret
+
+
+def prepare_geometry3k_dataset(split: str = "train", task_name: str = "geometry3k"):
+    if split == "train":
+        tr_dataset = load_dataset("hiyouga/geometry3k")["train"]
+        val_dataset = load_dataset("hiyouga/geometry3k")["validation"]
+    else:
+        tr_dataset = load_dataset("hiyouga/geometry3k")[split]
+        val_dataset = load_dataset("hiyouga/geometry3k")[split]
+
+    # format - disable features to avoid schema conflicts
+    tr_dataset = tr_dataset.add_column("task_name", [task_name] * len(tr_dataset))
+    val_dataset = val_dataset.add_column("task_name", [task_name] * len(val_dataset))
+    return {
+        "train": tr_dataset,
+        "validation": val_dataset,
+    }
+
+
+class Geometry3KDataset:
+    def __init__(
+        self,
+        split: str = "train",
+        prompt_file: Optional[str] = None,
+    ):
+        """Simple wrapper around the Geometry3K dataset.
+
+        Args:
+            split: The split of the dataset to use.
+            prompt_file: The file containing the prompt for the dataset.
+        """
+        assert split in ["train", "validation", "test"], (
+            f"Invalid split: {split}. Please use 'train' or 'validation' or 'test'."
+        )
+        self.task_name = "geometry3k"
+
+        self.formatted_ds = prepare_geometry3k_dataset(
+            split=split, task_name=self.task_name
+        )
+        self.task_spec = TaskDataSpec(
+            task_name="Geometry3K",
+            prompt_file=prompt_file,
+        )
diff --git a/nemo_rl/data/datasets/response_datasets/oai_format_dataset.py b/nemo_rl/data/datasets/response_datasets/oai_format_dataset.py
new file mode 100644
index 0000000000..8a903a7259
--- /dev/null
+++ b/nemo_rl/data/datasets/response_datasets/oai_format_dataset.py
@@ -0,0 +1,221 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import warnings
+from typing import Any, Callable, Union
+
+from datasets import load_dataset
+
+from nemo_rl.data.interfaces import TaskDataSpec
+
+
+class PreservingDataset:
+    """A dataset wrapper that preserves original dict structure without None-filling.
+
+    Unlike HuggingFace's Dataset class which enforces schema uniformity across all samples
+    (filling missing keys with None), this class maintains the exact structure of each sample.
+    This is critical for heterogeneous data like tool calls where different samples may have
+    different argument structures.
+    """
+
+    def __init__(self, data: list[dict[str, Any]]):
+        """Initialize the dataset with a list of dictionaries.
+
+        Args:
+            data: List of dictionary samples, each can have different keys
+        """
+        self.data = data
+        self.features = None  # For compatibility with HF Dataset interface
+
+    def __len__(self) -> int:
+        return len(self.data)
+
+    def __getitem__(
+        self, idx: Union[int, slice, list]
+    ) -> Union[dict[str, Any], list[dict[str, Any]]]:
+        """Support integer indexing, slicing, and list indexing."""
+        if isinstance(idx, slice):
+            return [self.data[i] for i in range(*idx.indices(len(self.data)))]
+        elif isinstance(idx, int):
+            # Handle negative indices
+            if idx < 0:
+                idx = len(self.data) + idx
+            if idx < 0 or idx >= len(self.data):
+                raise IndexError(
+                    f"Index {idx} out of range for dataset of size {len(self.data)}"
+                )
+            return self.data[idx]
+        elif isinstance(idx, list):
+            return [self.data[i] for i in idx]
+        else:
+            raise TypeError(
+                f"Indices must be integers, slices, or lists, not {type(idx)}"
+            )
+
+    def __iter__(self):
+        return iter(self.data)
+
+    def map(self, function: Callable, *args, **kwargs) -> "PreservingDataset":
+        """Apply a function to each sample in the dataset.
+
+        Args:
+            function: Function to apply to each sample
+            with_indices: If True, pass index as second argument to function
+
+        Returns:
+            New PreservingDataset with transformed samples
+        """
+        if kwargs.get("with_indices", False):
+            mapped_data = [function(item, i) for i, item in enumerate(self.data)]
+        else:
+            mapped_data = [function(item) for item in self.data]
+        return PreservingDataset(mapped_data)
+
+
+class OpenAIFormatDataset:
+    """This class is used to load an SFT dataset in the OpenAI format.
+
+    The dataset should be in the following format:
+    {
+        "messages": [
+            {"role": "system", "content": "You are a helpful assistant."},
+            {"role": "user", "content": "What is the capital of France?"},
+            {"role": "assistant", "content": "The capital of France is Paris."}
+        ]
+    }
+
+    Args:
+        train_ds_path: Path to the training dataset JSON file
+        val_ds_path: Path to the validation dataset JSON file
+        chat_key: Key for the messages list in the dataset (default: "messages")
+        system_key: Optional key for system prompt in the dataset
+        system_prompt: Optional system prompt to add if not in the dataset
+        tool_key: Key for tools in the dataset (default: "tools")
+        use_preserving_dataset: If True, uses PreservingDataset to maintain
+            heterogeneous schemas (e.g., for tool calls with varying argument
+            structures). If False, uses standard HuggingFace dataset loading.
+            Default is False for backward compatibility.
+
+    Notes:
+        - system_key and system_prompt are optional. If provided, it will be added
+          to the beginning of the dataset.
+        - chat_key should be the key of the messages list. Multi-turn conversations
+          are supported.
+        - The last message in the conversation must be from the assistant.
+        - When use_preserving_dataset=True, the dataset preserves the exact structure
+          of each sample without None-filling for missing keys, which is useful for
+          heterogeneous tool argument schemas.
+    """
+
+    def __init__(
+        self,
+        train_ds_path: str,
+        val_ds_path: str,
+        chat_key: str = "messages",
+        system_key: str | None = None,
+        system_prompt: str | None = None,
+        tool_key: str | None = "tools",
+        use_preserving_dataset: bool = False,
+    ):
+        self.chat_key = chat_key
+        self.system_key = system_key
+        self.system_prompt = system_prompt
+        self.tool_key = tool_key
+
+        if not use_preserving_dataset:
+            # Use the standard HuggingFace approach (faster and more standard)
+            train_original_dataset = load_dataset("json", data_files=train_ds_path)[
+                "train"
+            ]
+            val_original_dataset = load_dataset("json", data_files=val_ds_path)["train"]
+
+            formatted_train_dataset = train_original_dataset.map(self.add_messages_key)
+            formatted_val_dataset = val_original_dataset.map(self.add_messages_key)
+
+            print(
+                f"Loaded dataset using standard approach (train: {len(formatted_train_dataset)}, val: {len(formatted_val_dataset)})"
+            )
+
+            # Warn if tools are present in the dataset
+            if self.tool_key and any(
+                self.tool_key in sample for sample in formatted_train_dataset
+            ):
+                warnings.warn(
+                    "Tools detected in dataset. Set use_preserving_dataset=True to preserve heterogeneous tool schemas. "
+                    "Current mode may add None values for missing tool arguments, making samples invalid.",
+                    UserWarning,
+                    stacklevel=2,
+                )
+
+        else:
+            # Use custom loading for heterogeneous schemas
+            # Issue: When tool calls have varying argument structures across samples,
+            # HuggingFace's Dataset.from_list enforces uniform schema by adding None
+            # values for missing keys. Example:
+            #   Sample 1: {"tools": [{"name": "search", "args": {"query": "x"}}]}
+            #   Sample 2: {"tools": [{"name": "calc", "args": {"expr": "y", "precision": 2}}]}
+            # Standard loading would add "precision: None" to Sample 1 and "query: None" to Sample 2.
+            # PreservingDataset maintains exact structure without None-filling.
+            print(
+                "Using PreservingDataset to preserve heterogeneous tool argument schemas without None-filling."
+            )
+
+            # Load JSON files directly
+            with open(train_ds_path, "r") as f:
+                train_data = [json.loads(line) for line in f]
+
+            with open(val_ds_path, "r") as f:
+                val_data = [json.loads(line) for line in f]
+
+            # Apply transformations
+            formatted_train_data = [self.add_messages_key(item) for item in train_data]
+            formatted_val_data = [self.add_messages_key(item) for item in val_data]
+
+            # Use PreservingDataset to maintain exact structure
+            formatted_train_dataset = PreservingDataset(formatted_train_data)
+            formatted_val_dataset = PreservingDataset(formatted_val_data)
+
+            print(
+                f"Loaded dataset using PreservingDataset (train: {len(formatted_train_dataset)}, val: {len(formatted_val_dataset)})"
+            )
+
+        self.formatted_ds = {
+            "train": formatted_train_dataset,
+            "validation": formatted_val_dataset,
+        }
+
+        self.task_spec = TaskDataSpec(
+            "json_dataset",
+        )
+
+    def add_messages_key(
+        self,
+        example: dict[str, Any],
+    ) -> dict[str, list[dict[str, Any]]]:
+        messages = [message for message in example[self.chat_key]]
+        if self.system_key is not None and self.system_key in example:
+            messages = [
+                {"role": "system", "content": example[self.system_key]}
+            ] + messages
+        elif self.system_prompt:
+            messages = [{"role": "system", "content": self.system_prompt}] + messages
+        assert messages[-1]["role"] == "assistant"
+
+        # Preserve tools if they exist in the data
+        result = {"messages": messages}
+        if self.tool_key and self.tool_key in example:
+            result["tools"] = example[self.tool_key]
+
+        return result
diff --git a/nemo_rl/data/hf_datasets/oasst.py b/nemo_rl/data/datasets/response_datasets/oasst.py
similarity index 98%
rename from nemo_rl/data/hf_datasets/oasst.py
rename to nemo_rl/data/datasets/response_datasets/oasst.py
index a0c19b6909..3ba044e452 100644
--- a/nemo_rl/data/hf_datasets/oasst.py
+++ b/nemo_rl/data/datasets/response_datasets/oasst.py
@@ -123,8 +123,8 @@ def download_and_process_oasst(
 
 
 class OasstDataset:
-    def __init__(self, output_dir: str = ".") -> None:
-        self.formatted_ds = download_and_process_oasst(output_dir)
+    def __init__(self, output_dir: str = ".", seed: int = 42) -> None:
+        self.formatted_ds = download_and_process_oasst(output_dir, seed)
         self.task_spec = TaskDataSpec(
             task_name="OASST",
         )
diff --git a/nemo_rl/data/hf_datasets/openmathinstruct2.py b/nemo_rl/data/datasets/response_datasets/openmathinstruct2.py
similarity index 100%
rename from nemo_rl/data/hf_datasets/openmathinstruct2.py
rename to nemo_rl/data/datasets/response_datasets/openmathinstruct2.py
diff --git a/nemo_rl/data/datasets/response_datasets/refcoco.py b/nemo_rl/data/datasets/response_datasets/refcoco.py
new file mode 100644
index 0000000000..87d5d7372f
--- /dev/null
+++ b/nemo_rl/data/datasets/response_datasets/refcoco.py
@@ -0,0 +1,262 @@
+## Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import random
+import zipfile
+from pathlib import Path
+from typing import Any, Optional, Union
+
+import requests
+from datasets import load_dataset
+from PIL import Image
+from tqdm import tqdm  # Using tqdm for progress bar, install with: pip install tqdm
+
+from nemo_rl.data.datasets.utils import pil_to_base64
+from nemo_rl.data.interfaces import TaskDataSpec
+
+
+def download_and_unzip(url: str, target_directory: str, subdir_name: str = "."):
+    """Downloads a zip file from a given URL to a target directory and unzips it into a specified subdirectory within the target directory, showing download progress.
+
+    Args:
+        url (str): The URL of the zip file to download.
+        target_directory (str): The directory where the zip file will be downloaded
+                                and unzipped.
+        subdir_name (str): The name of the subdirectory within the target_directory
+                           where the contents of the zip file will be unzipped.
+                           Defaults to "train".
+    """
+    if not os.path.exists(target_directory):
+        os.makedirs(target_directory)
+        print(f"Created target directory: {target_directory}")
+
+    # Extract filename from URL
+    filename = url.split("/")[-1]
+    filepath = os.path.join(target_directory, filename)
+
+    # Download the file with progress
+    if not os.path.exists(filepath):
+        print(f"Downloading {filename} from {url} to {filepath}...")
+        try:
+            with requests.get(url, stream=True) as r:
+                r.raise_for_status()
+                total_size_in_bytes = int(r.headers.get("content-length", 0))
+                block_size = 8192  # 8 Kibibytes
+
+                # Initialize tqdm progress bar
+                progress_bar = tqdm(
+                    total=total_size_in_bytes, unit="iB", unit_scale=True
+                )
+
+                with open(filepath, "wb") as f:
+                    for chunk in r.iter_content(chunk_size=block_size):
+                        progress_bar.update(len(chunk))
+                        f.write(chunk)
+                progress_bar.close()  # Close the progress bar
+
+            print(f"Download complete: {filepath}")
+        except requests.exceptions.RequestException as e:
+            raise requests.exceptions.RequestException(f"Error downloading file: {e}")
+    else:
+        print(f"File {filepath} already exists, skipping download.")
+
+    # Define the unzipping directory
+    unzip_dir = os.path.join(target_directory, subdir_name)
+    if not os.path.exists(unzip_dir):
+        os.makedirs(unzip_dir)
+        print(f"Created unzip directory: {unzip_dir}")
+
+    # Unzip the file
+    print(f"Unzipping {filepath} to {unzip_dir}...")
+    try:
+        with zipfile.ZipFile(filepath, "r") as zip_ref:
+            # You can add a progress bar for unzipping as well, but it's more complex
+            # as zipfile doesn't directly expose progress for extractall.
+            # For large files, consider iterating through namelist and extracting one by one.
+            zip_ref.extractall(unzip_dir)
+        print("Unzipping complete.")
+    except zipfile.BadZipFile:
+        raise zipfile.BadZipFile(f"Error: {filepath} is not a valid zip file.")
+    except Exception as e:
+        raise Exception(f"Error unzipping file: {e}")
+
+
+def format_refcoco_dataset(
+    example: dict[str, Any],
+    width: int = 256,
+    height: int = 256,
+    caption_type: str = "random",
+    prompt_file: Optional[str] = None,
+) -> dict[str, Any]:
+    """Format the RefCOCO dataset from huggingface.
+
+    This should be replaced with our own curated RefCOCO/+/g dataset soon
+
+    Args:
+        example: The example to format.
+        width: The width of the resized image.
+        height: The height of the resized image.
+        caption_type: The type of caption to use.
+    """
+    split = example["split"]
+    if "val" in split:
+        caption_type = "descriptive"
+
+    # resize image for easy image processing across batches
+    image = Image.open(example["image_path"])
+    orig_width, orig_height = image.size
+    resized_image = image.resize((width, height))
+
+    # get caption from many types
+    if caption_type == "random":
+        caption = random.choice(example["captions"])
+    elif caption_type == "first":
+        caption = example["captions"][0]
+    elif caption_type == "descriptive":  # choose the most descriptive caption
+        caption = max(example["captions"], key=lambda x: len(x))
+    elif caption_type == "brief":  # choose the briefest caption
+        caption = min(example["captions"], key=lambda x: len(x))
+    elif caption_type == "all":
+        caption = " or ".join(example["captions"])
+    else:
+        raise ValueError(f"Invalid caption type: {caption_type}")
+
+    # get normalized bounding box coordinates (top-left, bottom-right)
+    bbox = example["bbox"]
+    bbox = [
+        bbox[0] / orig_width * 1000,
+        bbox[1] / orig_height * 1000,
+        bbox[2] / orig_width * 1000,
+        bbox[3] / orig_height * 1000,
+    ]
+    bbox = [int(round(coord)) for coord in bbox]
+    solution = f"[{bbox[0]}, {bbox[1]}, {bbox[2]}, {bbox[3]}]"
+
+    user_content = [
+        {
+            "type": "image",
+            "image": pil_to_base64(resized_image),
+        },
+        {
+            "type": "text",
+            "text": f"Please provide the bounding box coordinate of the region described by the following phrase: {caption}",
+        },
+    ]
+
+    ret = {
+        "messages": [
+            {"role": "user", "content": user_content},
+            {
+                "role": "assistant",
+                "content": solution,
+            },
+        ],
+        "task_name": "refcoco",
+    }
+    return ret
+
+
+# contain different variants of the CLEVR dataset
+def prepare_refcoco_dataset(
+    split: str = "default",
+    task_name: Optional[str] = None,
+    path_to_coco_images: Optional[Union[str, Path]] = None,
+):
+    if task_name is None:
+        task_name = "refcoco"
+
+    tr_dataset = load_dataset("jxu124/refcoco")["train"]
+    val_dataset = load_dataset("jxu124/refcoco")["validation"]
+
+    # format - disable features to avoid schema conflicts
+    tr_dataset = tr_dataset.add_column("task_name", [task_name] * len(tr_dataset))
+    val_dataset = val_dataset.add_column("task_name", [task_name] * len(val_dataset))
+
+    if path_to_coco_images is None:
+        print("No path to coco images provided, downloading images to ./coco_images")
+        path_to_coco_images = Path("./coco_images")
+        os.makedirs(path_to_coco_images, exist_ok=True)
+    else:
+        path_to_coco_images = Path(path_to_coco_images)
+
+    # check for images
+    if not os.path.exists(str(path_to_coco_images / "train2014")):
+        print(f"Downloading train2014 images to {path_to_coco_images}")
+        download_and_unzip(
+            "http://images.cocodataset.org/zips/train2014.zip", str(path_to_coco_images)
+        )
+    if not os.path.exists(str(path_to_coco_images / "val2014")):
+        print(f"Downloading val2014 images to {path_to_coco_images}")
+        download_and_unzip(
+            "http://images.cocodataset.org/zips/val2014.zip", str(path_to_coco_images)
+        )
+
+    # add image column
+    tr_dataset = tr_dataset.map(
+        lambda example: {
+            **example,
+            "image_path": str(example["image_path"]).replace(
+                "coco/", str(path_to_coco_images) + "/"
+            )
+            if "image_path" in example
+            else example.get("image_path"),
+        }
+    )
+    val_dataset = val_dataset.map(
+        lambda example: {
+            **example,
+            "image_path": str(example["image_path"]).replace(
+                "coco/", str(path_to_coco_images) + "/"
+            )
+            if "image_path" in example
+            else example.get("image_path"),
+        }
+    )
+
+    return {
+        "train": tr_dataset,
+        "validation": val_dataset,
+    }
+
+
+class RefCOCODataset:
+    def __init__(
+        self,
+        split: str = "default",
+        prompt_file: Optional[str] = None,
+        download_dir: Optional[str] = None,
+    ):
+        """Simple wrapper around the RefCOCO dataset.
+
+        Args:
+            split: The split of the dataset to use (currently only 'default' is supported)
+            prompt_file: The file containing the prompt for the dataset.
+        """
+        VALID_SPLITS = ["default"]
+        if split not in VALID_SPLITS:
+            raise ValueError(
+                f"Invalid split: {split}. Please use one of {VALID_SPLITS}."
+            )
+        self.task_name = "refcoco"
+
+        self.formatted_ds = prepare_refcoco_dataset(
+            split=split,
+            task_name=self.task_name,
+            path_to_coco_images=download_dir,
+        )
+        self.task_spec = TaskDataSpec(
+            task_name="RefCOCO",
+            prompt_file=prompt_file,
+        )
diff --git a/nemo_rl/data/datasets/response_datasets/response_dataset.py b/nemo_rl/data/datasets/response_datasets/response_dataset.py
new file mode 100644
index 0000000000..6dbd7752b8
--- /dev/null
+++ b/nemo_rl/data/datasets/response_datasets/response_dataset.py
@@ -0,0 +1,80 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any, Optional
+
+from nemo_rl.data.datasets.utils import load_dataset_from_path
+from nemo_rl.data.interfaces import TaskDataSpec
+
+
+class ResponseDataset:
+    """Dataset class for response data which can be loaded from a JSON file.
+
+    This class handles loading of response data for SFT and RL training.
+    The input JSONL files should contain valid JSON objects formatted like this:
+    {
+        input_key: str,     # The input prompt/context
+        output_key: str,    # The output response/answer
+    }
+
+    Args:
+        train_data_path: Path to the JSON file containing training data
+        val_data_path: Path to the JSON file containing validation data
+        input_key: Key for the input text
+        output_key: Key for the output text
+        train_split: Split name for the training data, used for HuggingFace datasets, default is None
+        val_split: Split name for the validation data, used for HuggingFace datasets, default is None
+    """
+
+    def __init__(
+        self,
+        train_data_path: str,
+        val_data_path: Optional[str] = None,
+        input_key: str = "input",
+        output_key: str = "output",
+        train_split: Optional[str] = None,
+        val_split: Optional[str] = None,
+    ):
+        self.input_key = input_key
+        self.output_key = output_key
+
+        # load from json file or huggingface
+        train_ds = load_dataset_from_path(train_data_path, train_split)
+        if val_data_path:
+            val_ds = load_dataset_from_path(val_data_path, val_split)
+        else:
+            val_ds = None
+
+        # format the dataset
+        train_ds = train_ds.map(self.add_messages_key)
+        if val_ds:
+            val_ds = val_ds.map(self.add_messages_key)
+
+        # store the formatted dataset
+        self.formatted_ds = {
+            "train": train_ds,
+            "validation": val_ds,
+        }
+
+        self.task_spec = TaskDataSpec(task_name="ResponseDataset")
+
+    def add_messages_key(
+        self, example: dict[str, Any]
+    ) -> dict[str, list[dict[str, Any]]]:
+        return {
+            "messages": [
+                {"role": "user", "content": example[self.input_key]},
+                {"role": "assistant", "content": example[self.output_key]},
+            ]
+        }
diff --git a/nemo_rl/data/hf_datasets/squad.py b/nemo_rl/data/datasets/response_datasets/squad.py
similarity index 100%
rename from nemo_rl/data/hf_datasets/squad.py
rename to nemo_rl/data/datasets/response_datasets/squad.py
diff --git a/nemo_rl/data/datasets/utils.py b/nemo_rl/data/datasets/utils.py
new file mode 100644
index 0000000000..12c65ef14f
--- /dev/null
+++ b/nemo_rl/data/datasets/utils.py
@@ -0,0 +1,102 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import base64
+import io
+import os
+from typing import Optional, Union
+
+import torch
+from datasets import DatasetDict, load_dataset
+from PIL import Image
+from transformers import AutoProcessor, PreTrainedTokenizerBase
+
+TokenizerType = Union[PreTrainedTokenizerBase, AutoProcessor]
+
+
+def assert_no_double_bos(token_ids: torch.Tensor, tokenizer: TokenizerType) -> None:
+    """Assert that there are no double starting BOS tokens in the message.
+
+    Args:
+        token_ids: List of token IDs
+        tokenizer: Tokenizer
+    """
+    if tokenizer.bos_token_id is not None:
+        token_ids_list = token_ids.tolist()
+        if len(token_ids_list) > 1:
+            assert not (
+                token_ids_list[0] == tokenizer.bos_token_id
+                and token_ids_list[1] == tokenizer.bos_token_id
+            ), "Found double BOS token in the first two positions of the message."
+    else:
+        # `name_or_path` is not available for AutoProcessor, temp fix in get_tokenizer
+        print(
+            f"skip assert_start_single_bos since Tokenizer {tokenizer.name_or_path} has no BOS token"
+        )
+
+
+def pil_to_base64(image: Image.Image, format: str = "PNG") -> str:
+    """Converts a PIL Image object to a base64 encoded string.
+
+    Args:
+        image: The PIL Image object to convert.
+        format: The image format (e.g., "PNG", "JPEG"). Defaults to "PNG".
+
+    Returns:
+        A base64 encoded string representation of the image.
+    """
+    buffered = io.BytesIO()
+    image.save(buffered, format=format)
+    img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
+    return f"data:image/png;base64,{img_str}"
+
+
+def load_dataset_from_path(data_path: str, data_split: Optional[str] = "train"):
+    """Load a dataset from a json or huggingface dataset.
+
+    Args:
+        data_path: The path to the dataset.
+        data_split: The split to load from the dataset.
+    """
+    suffix = os.path.splitext(data_path)[-1]
+    if suffix in [".json", ".jsonl"]:
+        raw_dataset = load_dataset("json", data_files=data_path)
+    else:
+        raw_dataset = load_dataset(data_path)
+
+    if data_split:
+        raw_dataset = raw_dataset[data_split]
+    # if the dataset doesn't contain split, load_dataset will use "train" as default
+    elif isinstance(raw_dataset, DatasetDict) and "train" in raw_dataset:
+        raw_dataset = raw_dataset["train"]
+
+    return raw_dataset
+
+
+def get_extra_kwargs(data_config: dict, keys: list[str]) -> dict:
+    """Get extra kwargs from the data config.
+
+    If the key is not in the data config, it will be ignored.
+
+    Args:
+        data_config: The data config.
+        keys: The keys to get from the data config.
+
+    Returns:
+        The extra kwargs.
+    """
+    extra_kwargs = {}
+    for key in keys:
+        if key in data_config:
+            extra_kwargs[key] = data_config[key]
+    return extra_kwargs
diff --git a/nemo_rl/data/hf_datasets/__init__.py b/nemo_rl/data/hf_datasets/__init__.py
deleted file mode 100644
index aa5596397c..0000000000
--- a/nemo_rl/data/hf_datasets/__init__.py
+++ /dev/null
@@ -1,35 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from nemo_rl.data.hf_datasets.chat_templates import COMMON_CHAT_TEMPLATES
-from nemo_rl.data.hf_datasets.dpo import DPODataset
-from nemo_rl.data.hf_datasets.helpsteer3 import HelpSteer3Dataset
-from nemo_rl.data.hf_datasets.oai_format_dataset import OpenAIFormatDataset
-from nemo_rl.data.hf_datasets.oasst import OasstDataset
-from nemo_rl.data.hf_datasets.openmathinstruct2 import OpenMathInstruct2Dataset
-from nemo_rl.data.hf_datasets.prompt_response_dataset import (
-    PromptResponseDataset,
-)
-from nemo_rl.data.hf_datasets.squad import SquadDataset
-
-__all__ = [
-    "DPODataset",
-    "HelpSteer3Dataset",
-    "OasstDataset",
-    "OpenAIFormatDataset",
-    "OpenMathInstruct2Dataset",
-    "PromptResponseDataset",
-    "SquadDataset",
-    "COMMON_CHAT_TEMPLATES",
-]
diff --git a/nemo_rl/data/hf_datasets/dpo.py b/nemo_rl/data/hf_datasets/dpo.py
deleted file mode 100644
index 03d5c7e872..0000000000
--- a/nemo_rl/data/hf_datasets/dpo.py
+++ /dev/null
@@ -1,44 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from datasets import load_dataset
-
-from nemo_rl.data.interfaces import TaskDataSpec
-
-
-class DPODataset:
-    """Dataset class for Direct Preference Optimization (DPO) training.
-
-    This class handles loading of preference data for DPO training.
-    The input JSON files should contain examples with the following structure:
-    {
-        "prompt": str,           # The input prompt/context
-        "chosen_response": str,  # The preferred/winning response
-        "rejected_response": str # The non-preferred/losing response
-    }
-
-    Args:
-        train_data_path (str): Path to the JSON file containing training data
-        val_data_path (str): Path to the JSON file containing validation data
-
-    """
-
-    def __init__(self, train_data_path: str, val_data_path: str):
-        self.formatted_ds = {
-            "train": load_dataset("json", data_files=train_data_path, split="train"),
-            "validation": load_dataset("json", data_files=val_data_path, split="train"),
-        }
-
-        self.task_spec = TaskDataSpec(
-            task_name="DPO",
-        )
diff --git a/nemo_rl/data/hf_datasets/oai_format_dataset.py b/nemo_rl/data/hf_datasets/oai_format_dataset.py
deleted file mode 100644
index 3326ef3d9e..0000000000
--- a/nemo_rl/data/hf_datasets/oai_format_dataset.py
+++ /dev/null
@@ -1,78 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from typing import Any
-
-from datasets import load_dataset
-
-from nemo_rl.data.interfaces import TaskDataSpec
-
-
-class OpenAIFormatDataset:
-    """This class is used to load an SFT dataset in the OpenAI format.
-
-    The dataset should be in the following format:
-    {
-        "messages": [
-            {"role": "system", "content": "You are a helpful assistant."},
-            {"role": "user", "content": "What is the capital of France?"},
-            {"role": "assistant", "content": "The capital of France is Paris."}
-        ]
-    }
-    system_key and system_prompt are optional. If provided, it will be added to the
-    beginning of the dataset.
-    chat_key should be the key of the messages list. Multi-turn conversations are
-    supported.
-    The last message in the conversation must be from the assistant.
-    """
-
-    def __init__(
-        self,
-        train_ds_path: str,
-        val_ds_path: str,
-        chat_key: str = "messages",
-        system_key: str | None = None,
-        system_prompt: str | None = None,
-    ):
-        self.chat_key = chat_key
-        self.system_key = system_key
-        self.system_prompt = system_prompt
-        train_original_dataset = load_dataset("json", data_files=train_ds_path)["train"]
-        val_original_dataset = load_dataset("json", data_files=val_ds_path)["train"]
-
-        formatted_train_dataset = train_original_dataset.map(self.add_messages_key)
-        formatted_val_dataset = val_original_dataset.map(self.add_messages_key)
-
-        self.formatted_ds = {
-            "train": formatted_train_dataset,
-            "validation": formatted_val_dataset,
-        }
-
-        self.task_spec = TaskDataSpec(
-            "json_dataset",
-        )
-
-    def add_messages_key(
-        self,
-        example: dict[str, Any],
-    ) -> dict[str, list[dict[str, Any]]]:
-        messages = [message for message in example[self.chat_key]]
-        if self.system_key is not None and self.system_key in example:
-            messages = [
-                {"role": "system", "content": example[self.system_key]}
-            ] + messages
-        elif self.system_prompt:
-            messages = [{"role": "system", "content": self.system_prompt}] + messages
-        assert messages[-1]["role"] == "assistant"
-        return {"messages": messages}
diff --git a/nemo_rl/data/hf_datasets/prompt_response_dataset.py b/nemo_rl/data/hf_datasets/prompt_response_dataset.py
deleted file mode 100644
index 398459d2b2..0000000000
--- a/nemo_rl/data/hf_datasets/prompt_response_dataset.py
+++ /dev/null
@@ -1,56 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from typing import Any
-
-from datasets import load_dataset
-
-from nemo_rl.data.interfaces import TaskDataSpec
-
-
-class PromptResponseDataset:
-    def __init__(
-        self,
-        train_ds_path: str,
-        val_ds_path: str,
-        input_key: str = "input",
-        output_key: str = "output",
-    ):
-        train_original_dataset = load_dataset("json", data_files=train_ds_path)["train"]
-        val_original_dataset = load_dataset("json", data_files=val_ds_path)["train"]
-
-        self.input_key = input_key
-        self.output_key = output_key
-
-        formatted_train_dataset = train_original_dataset.map(self.add_messages_key)
-        formatted_val_dataset = val_original_dataset.map(self.add_messages_key)
-
-        self.formatted_ds = {
-            "train": formatted_train_dataset,
-            "validation": formatted_val_dataset,
-        }
-
-        self.task_spec = TaskDataSpec(
-            "json_dataset",
-        )
-
-    def add_messages_key(
-        self, example: dict[str, Any]
-    ) -> dict[str, list[dict[str, Any]]]:
-        return {
-            "messages": [
-                {"role": "user", "content": example[self.input_key]},
-                {"role": "assistant", "content": example[self.output_key]},
-            ]
-        }
diff --git a/nemo_rl/data/llm_message_utils.py b/nemo_rl/data/llm_message_utils.py
index c9563f1afd..c0572ce3a1 100644
--- a/nemo_rl/data/llm_message_utils.py
+++ b/nemo_rl/data/llm_message_utils.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import warnings
-from typing import Any, Optional, cast
+from typing import Any, Optional, Union, cast
 
 import torch
 from datasets import Dataset
@@ -23,6 +23,10 @@
     LLMMessageLogType,
     TaskDataSpec,
 )
+from nemo_rl.data.multimodal_utils import (
+    PackedTensor,
+    get_multimodal_keys_from_processor,
+)
 from nemo_rl.distributed.batched_data_dict import BatchedDataDict
 
 Tensor = torch.Tensor
@@ -60,6 +64,19 @@ def message_log_to_flat_messages(
     ['Hello', 'Hi there']
     >>> flat_msgs['token_ids']
     tensor([1, 2, 3, 4, 5, 6, 7])
+    >>>
+    >>> # Multimodal example:
+    >>> from nemo_rl.data.multimodal_utils import PackedTensor
+    >>> img1 = torch.randn(2, 3, 4, 4)
+    >>> img2 = torch.randn(3, 3, 4, 4)
+    >>> mm_log = [
+    ...     {'role': 'user', 'content': 'see', 'token_ids': torch.tensor([1]), 'images': PackedTensor(img1, dim_to_pack=0)},
+    ...     {'role': 'assistant', 'content': 'ok', 'token_ids': torch.tensor([2, 3]), 'images': PackedTensor(img2, dim_to_pack=0)},
+    ... ]
+    >>> flat_mm = message_log_to_flat_messages(mm_log)
+    >>> tuple(flat_mm['images'].as_tensor().shape)
+    (5, 3, 4, 4)
+    >>>
     ```
     """
     result: dict[str, list[Any]] = {}
@@ -94,6 +111,14 @@ def message_log_to_flat_messages(
                         f"tensors for {key=} must have same number of dimensions: {[t.shape for t in result[key]]}"
                     ) from e
                 raise
+        elif result[key] and isinstance(result[key][0], PackedTensor):
+            try:
+                concat[key] = PackedTensor.concat(result[key])
+            except Exception as e:
+                raise RuntimeError(
+                    f"Error concatenating packed multimodal data for {key=}"
+                ) from e
+
     output: FlatMessagesType = {**result, **concat}
     return output
 
@@ -264,6 +289,26 @@ def batched_message_log_to_flat_message(
     >>> input_lengths
     tensor([7, 9], dtype=torch.int32)
     >>>
+    >>> # Multimodal example: include images on both conversations and verify packing
+    >>> from nemo_rl.data.multimodal_utils import PackedTensor
+    >>> mm_batch = [
+    ...     [
+    ...         {'role': 'user', 'content': 'look', 'token_ids': torch.tensor([1, 2, 3]), 'images': PackedTensor(torch.randn(2, 3, 4, 4), dim_to_pack=0)},
+    ...         {'role': 'assistant', 'content': 'ok', 'token_ids': torch.tensor([4])}
+    ...     ],
+    ...     [
+    ...         {'role': 'user', 'content': 'again', 'token_ids': torch.tensor([5, 6]), 'images': PackedTensor(torch.randn(1, 3, 4, 4), dim_to_pack=0)},
+    ...         {'role': 'assistant', 'content': 'fine', 'token_ids': torch.tensor([7, 8])}
+    ...     ]
+    ... ]
+    >>> mm_flat, mm_lengths = batched_message_log_to_flat_message(mm_batch, pad_value_dict={'token_ids': 0})
+    >>> isinstance(mm_flat['images'], PackedTensor)
+    True
+    >>> tuple(mm_flat['images'].as_tensor().shape)  # 2 + 1 images
+    (3, 3, 4, 4)
+    >>> mm_lengths
+    tensor([4, 4], dtype=torch.int32)
+    >>>
     ```
     """
     if not message_log_batch:
@@ -276,6 +321,7 @@ def batched_message_log_to_flat_message(
     # Find max length and identify tensor keys
     max_len = 0
     tensor_keys = []
+    multimodal_keys = []
     for seq in sequenced_lists:
         for key, value in seq.items():
             if isinstance(value, Tensor):
@@ -313,6 +359,10 @@ def batched_message_log_to_flat_message(
     result = BatchedDataDict()
     for key in all_keys:
         values = [seq.get(key) for seq in sequenced_lists]
+        # if the values are PackedTensors, create a new PackedTensor from the list of values
+        if values and isinstance(values[0], PackedTensor):
+            result[key] = PackedTensor.flattened_concat(values)
+            continue
         if not values or not isinstance(values[0], Tensor):
             result[key] = values
             continue
@@ -372,6 +422,24 @@ def get_first_index_that_differs(str1: str, str2: str) -> int:
     return min(len(str1), len(str2))
 
 
+def get_images_from_message(message: dict[str, Any]) -> list[Any]:
+    """Get all images from a message log item."""
+    # Handle None or missing content (e.g., assistant messages with only tool_calls)
+    if message.get("content") is None:
+        return []
+    # Handle string content (no images)
+    if isinstance(message["content"], str):
+        return []
+    # iterate over the content list
+    images = []
+    for item in message["content"]:
+        if item["type"] == "image":
+            images.extend(list(item["image"])) if isinstance(
+                item["image"], (list, tuple)
+            ) else images.append(item["image"])
+    return images
+
+
 def get_formatted_message_log(
     message_log: LLMMessageLogType,
     tokenizer: TokenizerType,
@@ -379,6 +447,7 @@ def get_formatted_message_log(
     add_bos_token: bool = True,
     add_eos_token: bool = True,
     add_generation_prompt: bool = False,
+    tools: Optional[list[dict[str, Any]]] = None,
 ) -> LLMMessageLogType:
     """Format and tokenize chat messages using the specified template.
 
@@ -389,7 +458,7 @@ def get_formatted_message_log(
         add_bos_token: Whether to add bos token to first message if it is not already present. Default: True
         add_eos_token: Whether to add eos token to last message if it is not already present. Default: True
         add_generation_prompt: Whether to include assistant's generation prompt in user messages. Default: False
-
+        tools: Optional list of tool/function definitions to pass to the chat template. Default: None
     Returns:
         The message log with updated 'token_ids' and 'content' fields.
     """
@@ -399,22 +468,78 @@ def get_formatted_message_log(
         list[dict[str, str]], message_log
     )  # we just use the str:str parts here
 
+    multimodal_keys = get_multimodal_keys_from_processor(tokenizer)
+
+    def _format_content_helper(
+        content: Union[str, list[dict[str, Any]]],
+    ) -> Union[str, list[dict[str, Any]]]:
+        """This function formats the text portion of the first user message with the task prompt.
+
+        The `content` argument could either be a string (user text prompt) or a dict (user text prompt + multimodal data).
+
+        Examples of `content` argument include strings or dicts from the following conversation turns:
+        - {"role": "user", "content": "What is the capital of France?"}
+        - {"role": "user", "content": [{"type": "text", "text": "What is the capital of the city in the image?"}, {"type": "image", "image": "path/to/image.jpg"}]}
+        - {"role": "user", "content": [{"type": "text", "text": "Does the animal in the image match the sound it makes in the audio?"}, {"type": "image", "image": "path/to/image.jpg"}, {"type": "audio", "audio": "path/to/audio.mp3"}]}
+
+        In all cases, the text portion of the message is formatted with the task prompt.
+
+        Previously, the `content` argument was modified using
+        >>> message_log_strs = [
+        ...     {
+        ...         "role": "user",
+        ...         "content": task_data_spec.prompt.format(message_log_strs[0]["content"]),
+        ...     }
+        ... ] + message_log_strs[1:]
+        >>>
+
+        which assumes that the first message is a string (not true for multimodal data). This helper function correctly handles all cases.
+        """
+        if isinstance(content, str):
+            return task_data_spec.prompt.format(content)
+        # this is a list of dicts, format only the text ones
+        for item in content:
+            if item["type"] == "text":
+                item["text"] = task_data_spec.prompt.format(item["text"])
+        return content
+
+    # ignore any system prompts
+    first_user_msg_id = 0
+    for i, msg in enumerate(message_log_strs):
+        if msg["role"] == "user":
+            first_user_msg_id = i
+            break
+
     if task_data_spec.prompt:
-        message_log_strs = [
-            {
-                "role": "user",
-                "content": task_data_spec.prompt.format(message_log_strs[0]["content"]),
-            }
-        ] + message_log_strs[1:]
+        message_log_strs = (
+            message_log_strs[:first_user_msg_id]
+            + [
+                {
+                    "role": "user",
+                    "content": _format_content_helper(
+                        message_log_strs[first_user_msg_id]["content"]
+                    ),
+                }
+            ]
+            + message_log_strs[first_user_msg_id + 1 :]
+        )
 
     for i, message in enumerate(message_log_strs):
         # If enabled, add_generation_prompt is only used on user messages to include
         # the assistant's generation prompt as part of the user message.
+
+        # Only pass tools parameter if tools exist
+        template_kwargs = {
+            "add_generation_prompt": add_generation_prompt
+            and message["role"] in ["user", "tool"],
+            "tokenize": False,
+            "add_special_tokens": False,
+        }
+        if tools is not None:
+            template_kwargs["tools"] = tools
+
         formatted_message: str = tokenizer.apply_chat_template(  # type: ignore
-            message_log_strs[: i + 1],
-            add_generation_prompt=add_generation_prompt and message["role"] == "user",
-            tokenize=False,
-            add_special_tokens=False,
+            message_log_strs[: i + 1], **template_kwargs
         )
 
         ## get the length of the previous message, excluding the eos token (if present)
@@ -426,6 +551,31 @@ def get_formatted_message_log(
         ## pull out the chunk corresponding to the current message
         message_chunk = formatted_message[prev_message_len_no_eos:]
 
+        # Debug: Print each message turn separately (only once for the first sample)
+        if not hasattr(get_formatted_message_log, "_debug_printed"):
+            if i == 0:
+                # Print header only at the start of first message
+                print("\n" + "=" * 80)
+                print("DEBUG: Individual message turns from apply_chat_template")
+                print("=" * 80)
+
+            print(f"\n[Turn {i + 1}/{len(message_log_strs)}] Role: {message['role']}")
+            print("-" * 40)
+            print("Extracted message chunk:")
+            print(repr(message_chunk))  # Using repr to show special characters
+            print(f"Raw text (len={len(message_chunk)}):")
+            print(message_chunk)
+            print("-" * 40)
+
+            if i == len(message_log_strs) - 1:
+                # Mark as printed after processing all turns of the first sample
+                get_formatted_message_log._debug_printed = True
+                print("\n" + "=" * 80)
+                print("DEBUG: Complete formatted conversation:")
+                print("-" * 80)
+                print(formatted_message)
+                print("=" * 80 + "\n")
+
         if i == 0:
             if add_bos_token:
                 if tokenizer.bos_token is None:
@@ -436,28 +586,74 @@ def get_formatted_message_log(
                     message_chunk = tokenizer.bos_token + message_chunk
 
         if i == len(message_log_strs) - 1:
-            message_chunk = message_chunk.rstrip("\n")
+            r"""
+            This is an attempt to robustly append the eos token. The origin is Qwen
+            chat templates always append <eos>\n and some models like gemma do not
+            use the <eos> at all in the chat template. Adding a <eos> if the <eos> is
+            already at the end, is likely a user error, and since we know Qwen likes to
+            have <eos>\n we'll check for that case.
+
+            This makes the logic slightly more robust to the model family's chat template
+            so users don't need to know whether they need to add add_eos or not.
+            """
+            stripped_message_chunk = message_chunk.rstrip("\n")
             if add_eos_token:
                 if tokenizer.eos_token is None:
                     warnings.warn(
                         "add_eos_token is True but the tokenizer does not have an EOS token. Skipping EOS token addition."
                     )
-                elif not message_chunk.endswith(tokenizer.eos_token):
+                elif not stripped_message_chunk.endswith(tokenizer.eos_token):
                     message_chunk += tokenizer.eos_token
 
+        # get images too (extend this for other modalities)
+        images_cur_message = get_images_from_message(message)
+
         new_message = message.copy()
-        new_message["token_ids"] = tokenizer(
-            message_chunk, return_tensors="pt", add_special_tokens=False
-        )["input_ids"][0]
+        # extend this if statement to check for all(len(modality)) == 0 when adding other modalities
+        if len(images_cur_message) == 0:
+            new_message["token_ids"] = tokenizer(
+                text=message_chunk, return_tensors="pt", add_special_tokens=False
+            )["input_ids"][0]
+        else:
+            # extend the else statement to add other modalities (in this case, tokenizer will be a processor)
+            processed_chunk = tokenizer(
+                text=[message_chunk],
+                images=images_cur_message,
+                return_tensors="pt",
+                add_special_tokens=False,
+            )
+            new_message["token_ids"] = processed_chunk["input_ids"][0]
+
+            # add all vlm keys to the message
+            for key in multimodal_keys:
+                if key in processed_chunk:
+                    new_message[key] = PackedTensor(processed_chunk[key], dim_to_pack=0)
+
         if len(new_message["token_ids"]) == 0:
             # if there is an empty message, the empty `token_ids` tensor ends up being in fp32,
             # which causes `_validate_tensor_consistency` to fail. To fix this, we convert the
             # empty tensor to int64.
             new_message["token_ids"] = new_message["token_ids"].to(torch.int64)  # type: ignore
 
-        new_message["content"] = message_chunk
-        new_message_log.append(new_message)
+        # format content correctly
+        content = message.get("content")
+        if content is None or not content:
+            # Handle None or missing content (e.g., assistant messages with only tool_calls)
+            new_message["content"] = message_chunk
+        elif isinstance(content, str):
+            new_message["content"] = message_chunk
+        else:
+            # format the content list of new message the same way as the original message but replace the text with the new message chunk
+            new_message["content"] = []
+            for item in content:
+                if item["type"] == "text":
+                    new_message["content"].append(
+                        {"type": "text", "text": message_chunk}
+                    )
+                else:
+                    new_message["content"].append(item)
 
+        new_message_log.append(new_message)
         prev_formatted_message = formatted_message
 
     return new_message_log
diff --git a/nemo_rl/data/multimodal_utils.py b/nemo_rl/data/multimodal_utils.py
new file mode 100644
index 0000000000..0da507acc7
--- /dev/null
+++ b/nemo_rl/data/multimodal_utils.py
@@ -0,0 +1,181 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Optional, Union
+
+import torch
+from transformers import PreTrainedTokenizerBase
+
+
+class PackedTensor:
+    """Wrapper around a list of torch tensors and a dimension along which to pack the tensors.
+
+    This class is used to wrap a list of tensors along with a `dim_to_pack` parameter.
+    It can be used for data that can be packed along different dimensions (such as multimodal data).
+
+    `dim_to_pack` is used to specify the dimension along which to pack the tensors.
+
+    The list of tensors can be returned as a single packed tensor by calling `as_tensor` which will concatenate the tensors along the `dim_to_pack` dimension.
+    """
+
+    def __init__(
+        self,
+        tensors: Union[torch.Tensor, list[Optional[torch.Tensor]], list[None]],
+        dim_to_pack: int,
+    ) -> None:
+        assert tensors is not None, "Input tensors to PackedTensor cannot be None"
+
+        if isinstance(tensors, torch.Tensor):
+            self.tensors: list[Optional[torch.Tensor]] = [tensors]
+        elif isinstance(tensors, list):
+            assert len(tensors) > 0, (
+                "Input tensors to PackedTensor must be a non-empty list"
+            )
+            self.tensors: list[Optional[torch.Tensor]] = tensors
+        else:
+            raise ValueError(
+                f"Unsupported type for input tensors to PackedTensor: {type(tensors)}"
+            )
+        self.dim_to_pack = dim_to_pack
+
+    def as_tensor(
+        self, device: Optional[torch.device] = None
+    ) -> Optional[torch.Tensor]:
+        if device is not None:
+            # Move only non-None tensors to device, preserve Nones
+            for i, item in enumerate(self.tensors):
+                if item is not None:
+                    self.tensors[i] = item.to(device)
+        non_none_tensors = [t for t in self.tensors if t is not None]
+        if len(non_none_tensors) == 0:
+            return None
+        else:
+            return torch.cat(non_none_tensors, dim=self.dim_to_pack).to(device)
+
+    def __len__(self) -> int:
+        # this is the number of tensors in this data wrapper
+        return len(self.tensors)
+
+    def to(self, device: str | torch.device) -> "PackedTensor":
+        self.tensors = [
+            item.to(device) if item is not None else None for item in self.tensors
+        ]
+        return self
+
+    def slice(self, indices: Union[list[int], torch.Tensor]) -> "PackedTensor":
+        idx = indices.tolist() if isinstance(indices, torch.Tensor) else indices
+        tensors = [self.tensors[i] for i in idx]
+        return PackedTensor(tensors, self.dim_to_pack)
+
+    @classmethod
+    def empty_like(cls, other: "PackedTensor") -> "PackedTensor":
+        """Return a new PackedTensor with same length and dim_to_pack as `other`, with all entries None."""
+        return cls([None] * len(other.tensors), other.dim_to_pack)
+
+    @classmethod
+    def concat(cls, from_packed_tensors: list["PackedTensor"]) -> "PackedTensor":
+        """Concatenate a list of PackedTensor objects into a single PackedTensor.
+
+        The underlying tensors from the PackedTensors are combined into a single list of tensors and used to create a new PackedTensor.
+
+        Each batch must have the same dim_to_pack.
+
+        Example:
+        ```{doctest}
+        >>> import torch
+        >>> from nemo_rl.data.multimodal_utils import PackedTensor
+        >>> p1 = PackedTensor([torch.tensor([1, 2, 3]), torch.tensor([4, 5, 6])], dim_to_pack=0)
+        >>> p2 = PackedTensor([torch.tensor([7, 8, 9])], dim_to_pack=0)
+        >>> p3 = PackedTensor.concat([p1, p2])
+        >>> p3.tensors
+        [tensor([1, 2, 3]), tensor([4, 5, 6]), tensor([7, 8, 9])]
+        >>> p3.as_tensor()
+        tensor([1, 2, 3, 4, 5, 6, 7, 8, 9])
+        >>>
+        ```
+        """
+        dim_to_packs = [batch.dim_to_pack for batch in from_packed_tensors]
+        assert len(set(dim_to_packs)) == 1, (
+            "All packed tensors must have the same dim_to_pack"
+        )
+        # concatenate the tensors
+        tensors = []
+        for packed_tensor in from_packed_tensors:
+            tensors.extend(packed_tensor.tensors)
+        dim_to_pack = dim_to_packs[0]
+        return cls(tensors, dim_to_pack)
+
+    @classmethod
+    def flattened_concat(
+        cls, from_packed_tensors: list["PackedTensor"]
+    ) -> "PackedTensor":
+        """Given a list of PackedTensor objects, flattens each PackedTensor and then concatenates them into a single PackedTensor.
+
+        Each PackedTensor is first flattened by packing along the PackedTensor's `dim_to_pack` dimension. Then, the resulting flattened tensors are used to create a new PackedTensor.
+
+        This is different from `PackedTensor.concat` which simply extends the underlying list of tensors. This is important because the `slice` and `__len__` methods operate on the underlying list of tensors. Note, however, that calling `as_tensor` on the resulting PackedTensor will result in the same tensor as `concat`.
+
+        Each batch must have the same dim_to_pack.
+
+        Example:
+        ```{doctest}
+        >>> import torch
+        >>> from nemo_rl.data.multimodal_utils import PackedTensor
+        >>> p1 = PackedTensor([torch.tensor([1, 2, 3]), torch.tensor([4, 5, 6])], dim_to_pack=0)
+        >>> p2 = PackedTensor([torch.tensor([7, 8, 9])], dim_to_pack=0)
+        >>> p3 = PackedTensor.flattened_concat([p1, p2])
+        >>> p3.tensors
+        [tensor([1, 2, 3, 4, 5, 6]), tensor([7, 8, 9])]
+        >>> p3.as_tensor()
+        tensor([1, 2, 3, 4, 5, 6, 7, 8, 9])
+        >>>
+        ```
+        """
+        dim_to_packs = [batch.dim_to_pack for batch in from_packed_tensors]
+        assert len(set(dim_to_packs)) == 1, (
+            "All packed tensors must have the same dim_to_pack"
+        )
+        tensors = [p.as_tensor() for p in from_packed_tensors]
+        return cls(tensors, from_packed_tensors[0].dim_to_pack)
+
+
+def get_multimodal_keys_from_processor(processor) -> list[str]:
+    """Get keys of the multimodal data that can be used as model inputs.
+
+    This will be used in the data_processor function to determine which keys to use as model inputs.
+    """
+    if isinstance(processor, PreTrainedTokenizerBase):
+        return []
+
+    all_keys = set()
+    if hasattr(processor, "image_processor"):
+        all_keys.update(processor.image_processor.model_input_names)
+    if hasattr(processor, "video_processor"):
+        all_keys.update(processor.video_processor.model_input_names)
+    if hasattr(processor, "feature_extractor"):
+        all_keys.update(processor.feature_extractor.model_input_names)
+    # all_keys.update(processor.model_input_names)
+    all_keys.difference_update(set(processor.tokenizer.model_input_names))
+    return list(all_keys)
+
+
+def get_dim_to_pack_along(processor, key: str) -> int:
+    """Special considerations for packing certain keys from certain processors.
+
+    In most cases, the packed items are along dim 0
+    """
+    if processor.__class__.__name__ == "SmolVLMProcessor":
+        return 1
+    # return zero by default
+    return 0
diff --git a/nemo_rl/data/processors.py b/nemo_rl/data/processors.py
index 67e3658882..3a90f384fe 100644
--- a/nemo_rl/data/processors.py
+++ b/nemo_rl/data/processors.py
@@ -51,7 +51,9 @@ def math_data_processor(
             add_generation_prompt=False,
             add_special_tokens=False,
         )
-        sys_prompt["token_ids"] = tokenizer(sys, return_tensors="pt")["input_ids"][0]
+        sys_prompt["token_ids"] = tokenizer(
+            sys, return_tensors="pt", add_special_tokens=False
+        )["input_ids"][0]
         message_log.append(sys_prompt)
 
     # user prompt
@@ -64,7 +66,9 @@ def math_data_processor(
         add_generation_prompt=True,
         add_special_tokens=False,
     )
-    user_message["token_ids"] = tokenizer(message, return_tensors="pt")["input_ids"][0]
+    user_message["token_ids"] = tokenizer(
+        message, return_tensors="pt", add_special_tokens=False
+    )["input_ids"][0]
     user_message["content"] = message
     message_log.append(user_message)
 
@@ -91,6 +95,63 @@ def math_data_processor(
     return output
 
 
+def math_hf_data_processor(
+    datum_dict: dict[str, Any],
+    task_data_spec: TaskDataSpec,
+    tokenizer: TokenizerType,
+    max_seq_length: int,
+    idx: int,
+) -> DatumSpec:
+    """Process a datum dictionary (directly loaded from data/hf_datasets/openmathinstruct2.py) into a DatumSpec for the Reward Model Environment."""
+    user_message = datum_dict["messages"]
+    problem = user_message[0]["content"]
+    extra_env_info = {"ground_truth": user_message[1]["content"]}
+
+    message_log: LLMMessageLogType = []
+    formatted_content = (
+        task_data_spec.prompt.format(problem) if task_data_spec.prompt else problem
+    )
+    user_message = {
+        "role": "user",
+        "content": formatted_content,
+    }
+    message: list[str] = tokenizer.apply_chat_template(  # type: ignore
+        [user_message],
+        tokenize=False,
+        add_generation_prompt=True,
+        add_special_tokens=False,
+    )
+
+    user_message["token_ids"] = tokenizer(
+        message,
+        return_tensors="pt",
+        add_special_tokens=False,
+    )["input_ids"][0]
+    user_message["content"] = message
+    message_log.append(user_message)
+
+    length = sum(len(m["token_ids"]) for m in message_log)
+
+    loss_multiplier = 1.0
+    if length > max_seq_length:
+        # make smaller and mask out
+        for chat_message in message_log:
+            chat_message["token_ids"] = chat_message["token_ids"][
+                : min(4, max_seq_length // len(message_log))
+            ]
+        loss_multiplier = 0.0
+
+    output: DatumSpec = {
+        "message_log": message_log,
+        "length": length,
+        "extra_env_info": extra_env_info,
+        "loss_multiplier": loss_multiplier,
+        "idx": idx,
+        "task_name": datum_dict["task_name"],
+    }
+    return output
+
+
 def _construct_multichoice_prompt(
     prompt: str, question: str, options: dict[str, str]
 ) -> str:
@@ -136,7 +197,9 @@ def multichoice_qa_processor(
             add_generation_prompt=False,
             add_special_tokens=False,
         )
-        sys_prompt["token_ids"] = tokenizer(sys, return_tensors="pt")["input_ids"][0]
+        sys_prompt["token_ids"] = tokenizer(
+            sys, return_tensors="pt", add_special_tokens=False
+        )["input_ids"][0]
         message_log.append(sys_prompt)
 
     # user prompt
@@ -151,7 +214,9 @@ def multichoice_qa_processor(
         add_generation_prompt=True,
         add_special_tokens=False,
     )
-    user_message["token_ids"] = tokenizer(message, return_tensors="pt")["input_ids"][0]
+    user_message["token_ids"] = tokenizer(
+        message, return_tensors="pt", add_special_tokens=False
+    )["input_ids"][0]
     user_message["content"] = message
     message_log.append(user_message)
 
diff --git a/nemo_rl/distributed/batched_data_dict.py b/nemo_rl/distributed/batched_data_dict.py
index dc75b39cf0..20b39f2b50 100644
--- a/nemo_rl/distributed/batched_data_dict.py
+++ b/nemo_rl/distributed/batched_data_dict.py
@@ -29,6 +29,9 @@
 import torch
 from typing_extensions import Self
 
+from nemo_rl.data.multimodal_utils import (
+    PackedTensor,
+)
 from nemo_rl.data.packing import get_packer
 from nemo_rl.distributed.collectives import (
     gather_jagged_object_lists,
@@ -70,6 +73,11 @@ class DynamicBatchingArgs(TypedDict):
 
 
 class BatchedDataDict(UserDict, Generic[DictT]):
+    # keys that are model specific, but not part of the PackedTensor
+    ADDITIONAL_OPTIONAL_KEY_TENSORS = [
+        "token_type_ids",  # specific to gemma3 that tells where the image tokens are in the sequence, not required for llm-only inference/training
+    ]
+
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
 
@@ -77,6 +85,19 @@ def __init__(self, *args, **kwargs):
         self.micro_batch_lengths = None
         self.elem_counts_per_gb = None
 
+    def get_multimodal_dict(
+        self, as_tensors: bool = False, device: Optional[torch.device] = None
+    ) -> dict[str, Any]:
+        """Return a regular dict of tensors or packed multimodal data items."""
+        multimodal_dict = {}
+        for k, v in self.data.items():
+            if isinstance(v, PackedTensor):
+                multimodal_dict[k] = v.as_tensor(device=device) if as_tensors else v
+            elif k in self.ADDITIONAL_OPTIONAL_KEY_TENSORS:
+                multimodal_dict[k] = v
+
+        return multimodal_dict
+
     @classmethod
     def from_batches(
         cls: Type[Self],
@@ -104,18 +125,41 @@ def from_batches(
                 tensor_or_list: list[Any] | torch.Tensor = [
                     item for sublist in list_of_tensors for item in sublist
                 ]
+            elif isinstance(list_of_tensors[0], PackedTensor):
+                tensor_or_list = PackedTensor.concat(list_of_tensors)
             elif all(x.ndim == 1 for x in list_of_tensors):
                 tensor_or_list = torch.cat(list_of_tensors)
             elif isinstance(list_of_tensors[0], torch.Tensor):
                 pad_value = pad_value_dict.get(k, 0)
-
-                list_of_tensors = [
-                    row.flatten() for tensor in list_of_tensors for row in tensor
-                ]
-                # TODO: can we avoid padding locally then padding globally?
-                tensor_or_list = torch.nn.utils.rnn.pad_sequence(
-                    list_of_tensors, batch_first=True, padding_value=pad_value
-                )
+                # We now add the following if statement to handle the 3D case in distillation
+                # (i.e., teacher top-k logits and indices); the else branch is the original code.
+                if list_of_tensors[0].ndim == 3:
+                    # For 3D tensors, pad only along the sequence dimension (the 1st dimension here),
+                    # keeping the feature dimension.
+                    max_seq_len = max(tensor.shape[1] for tensor in list_of_tensors)
+                    padded_tensors = []
+                    for tensor in list_of_tensors:
+                        # Pad along the 1st dimension to max_seq_len.
+                        pad_length = max_seq_len - tensor.shape[1]
+                        padded = torch.nn.functional.pad(
+                            tensor,
+                            # Only pad the last two dimensions (sequence length).
+                            (0, 0, 0, pad_length),
+                            mode="constant",
+                            value=pad_value,
+                        )
+                        padded_tensors.append(padded)
+                    tensor_or_list = torch.cat(
+                        padded_tensors, dim=0
+                    )  # concatenate along the batch dimension
+                else:
+                    list_of_tensors = [
+                        row.flatten() for tensor in list_of_tensors for row in tensor
+                    ]
+                    # TODO: can we avoid padding locally then padding globally?
+                    tensor_or_list = torch.nn.utils.rnn.pad_sequence(
+                        list_of_tensors, batch_first=True, padding_value=pad_value
+                    )
             else:
                 raise NotImplementedError(
                     (
@@ -183,6 +227,8 @@ def chunk(self, rank: int, chunks: int) -> "SlicedDataDict":
         for k in self.data:
             if torch.is_tensor(self.data[k]):
                 chunked_batch[k] = self.data[k][indices].clone()
+            elif isinstance(self.data[k], PackedTensor):
+                chunked_batch[k] = self.data[k].slice(indices)
             else:
                 chunked_batch[k] = [self.data[k][i] for i in indices]
 
@@ -212,6 +258,8 @@ def reorder_data(self, reorded_indices: list[int]):
                 sorted_v = v.index_select(
                     dim=0, index=torch.IntTensor(reordered_indices)
                 )
+            elif isinstance(v, PackedTensor):
+                sorted_v = v.slice(reordered_indices)
             else:
                 sorted_v = [v[i] for i in reordered_indices]
             self.data[k] = sorted_v
@@ -315,7 +363,8 @@ def shard_by_batch_size(
                 batch_sizes.add(len(val))
 
         assert len(batch_sizes) == 1, (
-            "Batch sizes are not the same across the rollout batch"
+            "Batch sizes are not the same across the rollout batch, found sizes: "
+            + f"[{','.join(str(size) for size in batch_sizes)}]"
         )
         total_batch_size = batch_sizes.pop()
         if batch_size is None:
@@ -365,11 +414,13 @@ def shard_by_batch_size(
 
             # finally reorder the data along the sorted sequence len indices
             for k, v in self.data.items():
-                sorted_v: torch.Tensor | list[Any]
+                sorted_v: torch.Tensor | list[Any] | PackedTensor
                 if torch.is_tensor(v):
                     sorted_v = v.index_select(
                         dim=0, index=torch.IntTensor(batch_sorted_indices)
                     )
+                elif isinstance(v, PackedTensor):
+                    sorted_v = v.slice(batch_sorted_indices)
                 else:
                     sorted_v = [v[i] for i in batch_sorted_indices]
                 data[k] = sorted_v
@@ -505,6 +556,10 @@ def _get_padded_seqlen(seqlen: int) -> int:
                         # First time seeing this key for this shard, initialize it
                         if torch.is_tensor(data[k]):
                             aggregated_shards[shard_idx][k] = data[k][indices].clone()
+                        elif isinstance(data[k], PackedTensor):
+                            aggregated_shards[shard_idx][k] = data[k].slice(
+                                indices.tolist()
+                            )
                         else:
                             aggregated_shards[shard_idx][k] = [
                                 data[k][i] for i in indices
@@ -518,6 +573,13 @@ def _get_padded_seqlen(seqlen: int) -> int:
                                     data[k][indices].clone(),
                                 ]
                             )
+                        elif isinstance(data[k], PackedTensor):
+                            aggregated_shards[shard_idx][k] = PackedTensor.concat(
+                                [
+                                    aggregated_shards[shard_idx][k],
+                                    data[k].slice(indices.tolist()),
+                                ]
+                            )
                         else:
                             aggregated_shards[shard_idx][k].extend(
                                 [data[k][i] for i in indices]
@@ -648,6 +710,10 @@ def slice(self, start: int, end: int) -> "SlicedDataDict":
         """
         sliced_batch = SlicedDataDict()
         for k in self.data:
+            if isinstance(self.data[k], PackedTensor):
+                sliced_batch[k] = self.data[k].slice(list(range(start, end)))
+                continue
+
             if isinstance(self.data[k], torch.Tensor):
                 assert end <= self.data[k].shape[0], (
                     f"end: {end} is greater than the shape of the tensor: {self.data[k].shape[0]} for key: {k}"
@@ -667,6 +733,10 @@ def repeat_interleave(self, num_repeats: int) -> Self:
             if torch.is_tensor(v):
                 # For tensors, use repeat_interleave to repeat each element
                 repeated_batch[k] = v.repeat_interleave(num_repeats, dim=0)
+            elif isinstance(v, PackedTensor):
+                raise NotImplementedError(
+                    "PackedTensor does not currently support repeat_interleave"
+                )
             else:
                 # For lists or other sequences, use a list comprehension to repeat each element
                 repeated_batch[k] = [
@@ -757,6 +827,8 @@ def to(self, device: str | torch.device) -> Self:
         for k, v in self.data.items():
             if torch.is_tensor(v):
                 self.data[k] = v.to(device)
+            elif isinstance(v, PackedTensor):
+                self.data[k] = v.to(device)
         return self
 
     def select_indices(self, indices: Union[list[int], torch.Tensor]) -> Self:
@@ -772,6 +844,8 @@ def select_indices(self, indices: Union[list[int], torch.Tensor]) -> Self:
         for k, v in self.data.items():
             if torch.is_tensor(v):
                 selected_batch[k] = v[indices]
+            elif isinstance(v, PackedTensor):
+                selected_batch[k] = v.slice(indices)
             elif isinstance(v, list):
                 selected_batch[k] = [v[i] for i in indices]
             else:
diff --git a/nemo_rl/distributed/model_utils.py b/nemo_rl/distributed/model_utils.py
index 5b6a2d57f2..fb17ee1661 100644
--- a/nemo_rl/distributed/model_utils.py
+++ b/nemo_rl/distributed/model_utils.py
@@ -77,11 +77,10 @@ def forward(  # pyrefly: ignore[bad-override]  Always ignore torch.autograd.Func
         masked_target = target - vocab_start_index
         masked_target[target_mask] = 0
 
-        log_softmax_output = _compute_distributed_log_softmax(
-            vocab_parallel_logits, group=group
-        )
-        log_probs = log_softmax_output.clone()
-        softmax_output = log_softmax_output.exp_()
+        vocab_parallel_logits = vocab_parallel_logits.to(dtype=torch.float32)
+
+        log_probs = _compute_distributed_log_softmax(vocab_parallel_logits, group=group)
+        softmax_output = log_probs.exp()
 
         log_probs = torch.gather(log_probs, -1, masked_target.unsqueeze(-1)).squeeze(-1)
         log_probs[target_mask] = 0.0
@@ -141,6 +140,247 @@ def backward(
         return grad_input, None, None, None, None, None, None
 
 
+class ChunkedDistributedLogprob(torch.autograd.Function):
+    """Custom autograd function for computing log probabilities in a distributed setting.
+
+    The log probabilities computation is chunked in the sequence dimension
+    to mitigate GPU OOM (especially during backward pass).
+    In addition, logits casting from float16 or bfloat16 -> float32 is performed
+    inside the chunk loop to avoid materializing a whole float32 logits tensor.
+
+    Adapted from https://github.com/NVIDIA/NeMo-Aligner/blob/9faab404f21994a7eb1d6ed5890b76152b941636/nemo_aligner/utils/distributed.py#L286
+    """
+
+    @staticmethod
+    def forward(  # pyrefly: ignore[bad-override]  Always ignore torch.autograd.Function.forward's type since it's always more specific than the base class
+        ctx: Any,
+        vocab_parallel_logits: torch.Tensor,
+        target: torch.Tensor,
+        vocab_start_index: int,
+        vocab_end_index: int,
+        chunk_size: int,
+        tp_group: torch.distributed.ProcessGroup,
+        inference_only: bool = False,
+    ) -> torch.Tensor:
+        # Create a mask of valid vocab ids (1 means it needs to be masked).
+        target_mask = (target < vocab_start_index) | (target >= vocab_end_index)
+        masked_target = target - vocab_start_index
+        masked_target[target_mask] = 0
+
+        seq_size = int(vocab_parallel_logits.shape[1])
+        num_chunks = (seq_size + chunk_size - 1) // chunk_size
+        all_log_probs = []
+
+        for chunk_idx in range(num_chunks):
+            chunk_start = chunk_idx * chunk_size
+            chunk_end = min(seq_size, (chunk_idx + 1) * chunk_size)
+
+            logits = vocab_parallel_logits[:, chunk_start:chunk_end, :]
+            logits = logits.to(dtype=torch.float32)
+
+            log_probs = _compute_distributed_log_softmax(
+                logits,
+                group=tp_group,
+            )
+
+            log_probs = torch.gather(
+                log_probs, -1, masked_target[:, chunk_start:chunk_end].unsqueeze(-1)
+            ).squeeze(-1)
+            log_probs[target_mask[:, chunk_start:chunk_end]] = 0.0
+
+            torch.distributed.all_reduce(
+                log_probs,
+                op=torch.distributed.ReduceOp.SUM,
+                group=tp_group,
+            )
+
+            all_log_probs.append(log_probs)
+
+        log_probs = torch.cat(all_log_probs, dim=1)
+
+        if not inference_only:
+            # only save for backward when we have inference only=False
+            ctx.save_for_backward(vocab_parallel_logits, target_mask, masked_target)
+            ctx.chunk_size = chunk_size
+            ctx.tp_group = tp_group
+
+        return log_probs
+
+    @staticmethod
+    def backward(
+        ctx: Any,
+        *grad_outputs: torch.Tensor,
+    ) -> tuple[torch.Tensor, None, None, None, None, None, None]:
+        grad_output = grad_outputs[0]
+        vocab_parallel_logits, target_mask, masked_target = ctx.saved_tensors
+        chunk_size = ctx.chunk_size
+        tp_group = ctx.tp_group
+
+        partition_vocab_size = int(vocab_parallel_logits.shape[-1])
+        seq_size = int(vocab_parallel_logits.shape[1])
+        num_chunks = (seq_size + chunk_size - 1) // chunk_size
+
+        all_grad_input = []
+
+        for chunk_idx in range(num_chunks):
+            chunk_start = chunk_idx * chunk_size
+            chunk_end = min(seq_size, (chunk_idx + 1) * chunk_size)
+
+            logits = vocab_parallel_logits[:, chunk_start:chunk_end, :]
+            logits = logits.to(dtype=torch.float32)
+
+            softmax_output = _compute_distributed_log_softmax(
+                logits,
+                group=tp_group,
+            )
+            softmax_output = softmax_output.exp()
+
+            # 1 if it's the chosen log prob, 0 otherwise
+            is_chosen = (~(target_mask[:, chunk_start:chunk_end])).unsqueeze(
+                -1
+            ) * torch.nn.functional.one_hot(
+                masked_target[:, chunk_start:chunk_end],
+                num_classes=partition_vocab_size,
+            )
+
+            grad_input = is_chosen.float().sub_(softmax_output)
+
+            grad_input.mul_(grad_output[:, chunk_start:chunk_end].unsqueeze(dim=-1))
+
+            all_grad_input.append(grad_input)
+
+        grad_input = torch.cat(all_grad_input, dim=1)
+
+        # if you add an argument to the forward method, then you must add a corresponding None here
+        return grad_input, None, None, None, None, None, None
+
+
+class ChunkedDistributedGatherLogprob(torch.autograd.Function):
+    """Compute distributed log-softmax once and gather logprobs at given global indices.
+
+    Forward computes per-chunk distributed log-softmax across TP, gathers selected
+    log probabilities at the provided global indices (shape [B, S, K]), and returns
+    a tensor of shape [B, S, K].
+
+    Backward recomputes per-chunk softmax from logits and applies the gradient rule:
+      dL/dz = -softmax * sum_k(dL/dy_k) + scatter_add(dL/dy_k) over selected indices.
+    """
+
+    @staticmethod
+    def forward(  # pyrefly: ignore[bad-override]
+        ctx: Any,
+        vocab_parallel_logits: torch.Tensor,  # [B, S, V_local]
+        global_indices: torch.Tensor,  # [B, S, K]
+        vocab_start_index: int,
+        vocab_end_index: int,
+        chunk_size: int,
+        tp_group: torch.distributed.ProcessGroup,
+        inference_only: bool = False,
+    ) -> torch.Tensor:
+        B, S, V_local = vocab_parallel_logits.shape
+        num_chunks = (int(S) + chunk_size - 1) // chunk_size
+        out_chunks: list[torch.Tensor] = []
+
+        for chunk_idx in range(num_chunks):
+            s0 = chunk_idx * chunk_size
+            s1 = min(int(S), (chunk_idx + 1) * chunk_size)
+
+            logits = vocab_parallel_logits[:, s0:s1, :].to(dtype=torch.float32)
+            # distributed log softmax along full vocab
+            log_probs = _compute_distributed_log_softmax(logits, group=tp_group)
+
+            gi = global_indices[:, s0:s1, :]
+            in_range = (gi >= int(vocab_start_index)) & (gi < int(vocab_end_index))
+            li = (gi - int(vocab_start_index)).clamp(min=0, max=V_local - 1)
+
+            local_vals = torch.gather(log_probs, dim=-1, index=li)
+            local_vals = local_vals * in_range.to(dtype=local_vals.dtype)
+
+            torch.distributed.all_reduce(
+                local_vals, op=torch.distributed.ReduceOp.SUM, group=tp_group
+            )
+
+            out_chunks.append(local_vals)
+
+        out = torch.cat(out_chunks, dim=1) if len(out_chunks) > 1 else out_chunks[0]
+
+        if not inference_only:
+            ctx.save_for_backward(vocab_parallel_logits, global_indices)
+            ctx.chunk_size = int(chunk_size)
+            ctx.tp_group = tp_group
+            ctx.vocab_start_index = int(vocab_start_index)
+            ctx.vocab_end_index = int(vocab_end_index)
+
+        return out.contiguous()
+
+    @staticmethod
+    def backward(
+        ctx: Any, *grad_outputs: torch.Tensor
+    ) -> tuple[torch.Tensor, None, None, None, None, None, None]:
+        grad_output = grad_outputs[0]  # [B, S, K]
+        vocab_parallel_logits, global_indices = ctx.saved_tensors
+        chunk_size: int = ctx.chunk_size
+        tp_group = ctx.tp_group
+        vocab_start_index = ctx.vocab_start_index
+        vocab_end_index = ctx.vocab_end_index
+
+        B, S, V_local = vocab_parallel_logits.shape
+        num_chunks = (int(S) + chunk_size - 1) // chunk_size
+        all_grad_input: list[torch.Tensor] = []
+
+        for chunk_idx in range(num_chunks):
+            s0 = chunk_idx * chunk_size
+            s1 = min(int(S), (chunk_idx + 1) * chunk_size)
+
+            logits = vocab_parallel_logits[:, s0:s1, :].to(dtype=torch.float32)
+            log_probs = _compute_distributed_log_softmax(logits, group=tp_group)
+            softmax_output = log_probs.exp()
+
+            gi = global_indices[:, s0:s1, :]
+            in_range = (gi >= int(vocab_start_index)) & (gi < int(vocab_end_index))
+            li = (gi - int(vocab_start_index)).clamp(min=0, max=V_local - 1)
+
+            # Sum over K for the softmax term
+            go_chunk = grad_output[:, s0:s1, :]  # [B, Sc, K]
+            go_sum = go_chunk.sum(dim=-1, keepdim=True)  # [B, Sc, 1]
+
+            grad_input = softmax_output.neg()
+            grad_input = grad_input.mul_(go_sum)
+
+            # Positive scatter term: add gradients to selected indices
+            # Mask grad_output for indices not on this shard
+            go_masked = go_chunk * in_range.to(dtype=go_chunk.dtype)
+            # Flatten for scatter_add
+            flat_grad = grad_input.view(-1)
+            # compute flattened indices positions
+            Bc, Sc = go_masked.shape[0], go_masked.shape[1]
+            # row offset per [B, Sc]
+            row = (
+                torch.arange(Bc, device=grad_input.device)
+                .view(-1, 1)
+                .expand(-1, Sc)
+                .reshape(-1)
+            )
+            col = torch.arange(Sc, device=grad_input.device).expand(Bc, -1).reshape(-1)
+            flat_idx_base = (row * Sc + col) * V_local  # [Bc*Sc]
+            # selected flat indices
+            flat_li = li.reshape(-1, li.shape[-1])  # [Bc*Sc, K]
+            flat_base_expanded = flat_idx_base.unsqueeze(-1).expand_as(flat_li)
+            flat_chosen = (flat_base_expanded + flat_li).reshape(-1)
+            flat_go = go_masked.reshape(-1)
+            flat_grad.scatter_add_(0, flat_chosen, flat_go)
+
+            all_grad_input.append(grad_input)
+
+        grad_input_total = (
+            torch.cat(all_grad_input, dim=1)
+            if len(all_grad_input) > 1
+            else all_grad_input[0]
+        )
+
+        return grad_input_total, None, None, None, None, None, None
+
+
 def dtensor_from_parallel_logits_to_logprobs(
     vocab_parallel_logits: torch.Tensor,
     target: DTensor | torch.Tensor,
@@ -149,6 +389,7 @@ def dtensor_from_parallel_logits_to_logprobs(
     tp_group: torch.distributed.ProcessGroup,
     inference_only: bool = False,
     seq_index: Optional[torch.Tensor] = None,
+    chunk_size: Optional[int] = None,
 ) -> torch.Tensor:
     """Get log probabilities from TP+CP sharded vocab logits.
 
@@ -163,6 +404,7 @@ def dtensor_from_parallel_logits_to_logprobs(
         inference_only (bool, optional): If True, tensors won't be saved for backward pass. Defaults to False.
         seq_index (Optional[torch.Tensor]): Sequence index tensor with shape [seq_len].
             It is only provided for cp sharded logits. It represents how tensor is sharded across the sequence dimension.
+        chunk_size (Optional[int]): Sequence dimension chunk size for computing the log probabilities.
 
     Returns:
         torch.Tensor: Log probabilities tensor with shape [batch_size, seq_len-1].
@@ -194,23 +436,34 @@ def dtensor_from_parallel_logits_to_logprobs(
     else:
         target = target.roll(shifts=-1, dims=-1)
 
-    probs: torch.Tensor = DistributedLogprob.apply(  # type: ignore
-        vocab_parallel_logits,
-        target,
-        vocab_start_index,
-        vocab_end_index,
-        tp_group,
-        inference_only,
-    ).contiguous()
+    if chunk_size is not None:
+        logprobs: torch.Tensor = ChunkedDistributedLogprob.apply(  # type: ignore
+            vocab_parallel_logits,
+            target,
+            vocab_start_index,
+            vocab_end_index,
+            chunk_size,
+            tp_group,
+            inference_only,
+        ).contiguous()
+    else:
+        logprobs: torch.Tensor = DistributedLogprob.apply(  # type: ignore
+            vocab_parallel_logits,
+            target,
+            vocab_start_index,
+            vocab_end_index,
+            tp_group,
+            inference_only,
+        ).contiguous()
 
     if cp_size > 1:
-        # probs is sharded on the sequence dimension.
+        # logprobs is sharded on the sequence dimension.
         # Get full sequence tensor, vocab dim has been reduced already.
-        probs_dtensor = DTensor.from_local(probs, cp_mesh, cp_placements)
-        probs = probs_dtensor.full_tensor()[:, sorted_indices]
-        assert probs.shape == target_shape
+        logprobs_dtensor = DTensor.from_local(logprobs, cp_mesh, cp_placements)
+        logprobs = logprobs_dtensor.full_tensor()[:, sorted_indices]
+        assert logprobs.shape == target_shape
 
-    return probs[:, :-1]
+    return logprobs[:, :-1]
 
 
 def from_parallel_logits_to_logprobs(
@@ -221,6 +474,7 @@ def from_parallel_logits_to_logprobs(
     tp_group: torch.distributed.ProcessGroup,
     inference_only: bool = False,
     cp_group: Optional[torch.distributed.ProcessGroup] = None,
+    chunk_size: Optional[int] = None,
 ) -> torch.Tensor:
     """Get log probabilities from TP+CP sharded vocab logits.
 
@@ -234,6 +488,7 @@ def from_parallel_logits_to_logprobs(
         tp_group (torch.distributed.ProcessGroup): Process group for distributed communication.
         inference_only (bool, optional): If True, tensors won't be saved for backward pass. Defaults to False.
         cp_group (torch.distributed.ProcessGroup, optional): Context parallelism process group. Defaults to None.
+        chunk_size (int, optional): Sequence dimension chunk size for computing the log probabilities.
 
     Returns:
         torch.Tensor: Log probabilities tensor with shape [batch_size, seq_len-1].
@@ -254,25 +509,36 @@ def from_parallel_logits_to_logprobs(
     cp_rank = torch.distributed.get_rank(cp_group)
     target = _get_tokens_on_this_cp_rank(target, cp_rank, cp_size, seq_dim=1)
 
-    probs: torch.Tensor = DistributedLogprob.apply(  # type: ignore
-        vocab_parallel_logits,
-        target,
-        vocab_start_index,
-        vocab_end_index,
-        tp_group,
-        inference_only,
-    ).contiguous()
+    if chunk_size is not None:
+        logprobs: torch.Tensor = ChunkedDistributedLogprob.apply(  # type: ignore
+            vocab_parallel_logits,
+            target,
+            vocab_start_index,
+            vocab_end_index,
+            chunk_size,
+            tp_group,
+            inference_only,
+        ).contiguous()
+    else:
+        logprobs: torch.Tensor = DistributedLogprob.apply(  # type: ignore
+            vocab_parallel_logits,
+            target,
+            vocab_start_index,
+            vocab_end_index,
+            tp_group,
+            inference_only,
+        ).contiguous()
 
     if cp_size > 1:
         # we need to gather the logits by context parallelism
-        probs = allgather_cp_sharded_tensor(
-            probs, cp_group, seq_dim=1
+        logprobs = allgather_cp_sharded_tensor(
+            logprobs, cp_group, seq_dim=1
         )  # , unpadded_seqlen=target.shape[1])
 
     if pad_len > 0:
-        probs = probs[:, :-pad_len]
+        logprobs = logprobs[:, :-pad_len]
 
-    return probs[:, :-1]
+    return logprobs[:, :-1]
 
 
 def from_parallel_logits_to_logprobs_packed_sequences(
@@ -285,6 +551,7 @@ def from_parallel_logits_to_logprobs_packed_sequences(
     group: torch.distributed.ProcessGroup,
     inference_only: bool = False,
     cp_group: Optional[torch.distributed.ProcessGroup] = None,
+    chunk_size: Optional[int] = None,
 ) -> torch.Tensor:
     """Get log probabilities from TP sharded vocab logits for packed sequences.
 
@@ -301,6 +568,7 @@ def from_parallel_logits_to_logprobs_packed_sequences(
         group (torch.distributed.ProcessGroup): Process group for distributed communication.
         inference_only (bool, optional): If True, tensors won't be saved for backward pass. Defaults to False.
         cp_group (torch.distributed.ProcessGroup, optional): Context parallelism process group. Defaults to None.
+        chunk_size (int, optional): Sequence dimension chunk size for computing the log probabilities.
 
     Returns:
         torch.Tensor: Unpacked log probabilities tensor with shape [batch_size, unpacked_seqlen-1].
@@ -334,14 +602,25 @@ def from_parallel_logits_to_logprobs_packed_sequences(
     vocab_parallel_logits = vocab_parallel_logits.unsqueeze(0)
 
     # Apply distributed log probability computation
-    probs: torch.Tensor = DistributedLogprob.apply(  # type: ignore
-        vocab_parallel_logits,
-        rolled_targets,
-        vocab_start_index,
-        vocab_end_index,
-        group,
-        inference_only,
-    ).contiguous()
+    if chunk_size is not None:
+        probs: torch.Tensor = ChunkedDistributedLogprob.apply(  # type: ignore
+            vocab_parallel_logits,
+            rolled_targets,
+            vocab_start_index,
+            vocab_end_index,
+            chunk_size,
+            group,
+            inference_only,
+        ).contiguous()
+    else:
+        probs: torch.Tensor = DistributedLogprob.apply(  # type: ignore
+            vocab_parallel_logits,
+            rolled_targets,
+            vocab_start_index,
+            vocab_end_index,
+            group,
+            inference_only,
+        ).contiguous()
 
     # Remove batch dimension for filtering
     probs = probs.squeeze(0)
@@ -494,3 +773,284 @@ def backward(ctx, grad_output):
         )
 
         return grad_input, None, None  # , None
+
+
+def get_logprobs_from_vocab_parallel_logits(
+    vocab_parallel_logits: DTensor,
+    input_ids: torch.Tensor | DTensor,
+    seq_index: Optional[torch.Tensor] = None,
+    chunk_size: Optional[int] = None,
+):
+    """Computes log probabilities from vocabulary-parallel logits.
+
+    This function takes logits that are sharded across the vocabulary dimension (tensor parallel)
+    and computes the log probabilities for the given input IDs.
+
+    Args:
+        vocab_parallel_logits (DTensor): Logits distributed across tensor parallel workers,
+            with shape [batch_size, seq_len, vocab_size/tp_size].
+        input_ids (torch.Tensor | DTensor): Input token IDs for which to compute log probabilities,
+            with shape [batch_size, seq_len].
+        seq_index (Optional[torch.Tensor]): Sequence index for the input IDs,
+            with shape [sequence_length].
+        chunk_size (Optional[int]): Sequence dimension chunk size for computing log probabilities.
+
+    Returns:
+        torch.Tensor: Log probabilities for the given input IDs.
+    """
+    device_mesh = vocab_parallel_logits.device_mesh
+    if seq_index is not None:
+        assert (
+            device_mesh.mesh_dim_names is not None
+            and "cp" in device_mesh.mesh_dim_names
+        ), "seq_index must be provided for cp sharded logits"
+
+    tp_size = 1
+
+    tp_group = device_mesh.get_group("tp")
+    tp_rank = tp_group.rank()
+    tp_size = tp_group.size()
+
+    vocab_interval_per_rank = vocab_parallel_logits.shape[-1] // tp_size
+
+    return dtensor_from_parallel_logits_to_logprobs(
+        vocab_parallel_logits.to_local(),
+        input_ids,
+        vocab_interval_per_rank * tp_rank,
+        (tp_rank + 1) * vocab_interval_per_rank,
+        tp_group,
+        inference_only=not torch.is_grad_enabled(),
+        seq_index=seq_index,
+        chunk_size=chunk_size,
+    )
+
+
+@torch.no_grad()
+def distributed_vocab_topk(
+    vocab_parallel_logits: torch.Tensor,
+    k: int,
+    tp_group: torch.distributed.ProcessGroup,
+    *,
+    vocab_start_index: int,
+    vocab_end_index: int,
+    chunk_size: Optional[int] = None,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    """Compute global top-k over TP-sharded vocabulary logits.
+
+    Args:
+        vocab_parallel_logits: [B, S, V_local]
+        k: number of top tokens to select globally
+        tp_group: tensor-parallel process group
+        vocab_start_index: global vocab start for this rank (inclusive)
+        vocab_end_index: global vocab end for this rank (exclusive)
+        chunk_size: optional chunk along sequence dim to bound memory
+
+    Returns:
+        topk_vals: [B, S, k]
+        topk_global_indices: [B, S, k] (global token ids)
+    """
+    assert vocab_end_index > vocab_start_index
+    world_size = torch.distributed.get_world_size(tp_group)
+
+    logits = vocab_parallel_logits.to(dtype=torch.float32)
+    B, S, V_local = logits.shape
+    V_total = V_local * world_size
+    K_eff = int(min(k, max(1, V_total)))
+
+    if chunk_size is None:
+        chunk_size = S
+
+    vals_chunks: list[torch.Tensor] = []
+    idx_chunks: list[torch.Tensor] = []
+
+    for s0 in range(0, S, chunk_size):
+        s1 = min(S, s0 + chunk_size)
+        # local top-k on this TP rank
+        local_vals, local_idx_local = torch.topk(
+            logits[:, s0:s1, :], min(k, V_local), dim=-1
+        )
+        local_idx_global = local_idx_local + int(vocab_start_index)
+
+        # gather candidates from all TP ranks
+        gathered_vals = [torch.empty_like(local_vals) for _ in range(world_size)]
+        gathered_idx = [torch.empty_like(local_idx_global) for _ in range(world_size)]
+        torch.distributed.all_gather(gathered_vals, local_vals, group=tp_group)
+        torch.distributed.all_gather(gathered_idx, local_idx_global, group=tp_group)
+
+        all_vals = torch.cat(gathered_vals, dim=-1)
+        all_idx = torch.cat(gathered_idx, dim=-1)
+
+        sel_vals, sel_pos = torch.topk(all_vals, K_eff, dim=-1)
+        sel_idx = torch.gather(all_idx, dim=-1, index=sel_pos)
+
+        vals_chunks.append(sel_vals)
+        idx_chunks.append(sel_idx)
+
+    topk_vals = (
+        torch.cat(vals_chunks, dim=1) if len(vals_chunks) > 1 else vals_chunks[0]
+    )
+    topk_global_indices = (
+        torch.cat(idx_chunks, dim=1) if len(idx_chunks) > 1 else idx_chunks[0]
+    )
+
+    return topk_vals, topk_global_indices
+
+
+def gather_logits_at_global_indices(
+    vocab_parallel_logits: torch.Tensor,
+    global_indices: torch.Tensor,
+    tp_group: Optional[torch.distributed.ProcessGroup] = None,
+    cp_group: Optional[torch.distributed.ProcessGroup] = None,
+    *,
+    vocab_start_index: int,
+    vocab_end_index: int,
+    chunk_size: Optional[int] = None,
+) -> torch.Tensor:
+    """Gather student logits at given global token indices under TP+CP sharding.
+
+    Differentiable w.r.t. vocab_parallel_logits.
+
+    Args:
+        vocab_parallel_logits: [B, S_cp, V_local] where S_cp is CP sharded sequence length
+        global_indices: [B, S_full, k] where S_full is full sequence length
+        tp_group: Optional tensor-parallel process group. If None, treats logits as full-vocab (no TP) and skips TP all-reduce.
+        vocab_start_index: global vocab start for this rank (inclusive)
+        vocab_end_index: global vocab end for this rank (exclusive)
+        chunk_size: optional chunk along sequence dim to bound memory
+        cp_group: Optional context-parallel process group
+
+    Returns:
+        gathered_logits: [B, S_full, k]
+    """
+    # CP support: get CP group and size
+    cp_size = 1 if cp_group is None else torch.distributed.get_world_size(cp_group)
+
+    # Handle CP sharding of global_indices (similar to from_parallel_logits_to_logprobs)
+    pad_len = 0
+    if cp_size > 1:
+        # Pad the global_indices to local size * cp_size if needed
+        pad_len = vocab_parallel_logits.shape[1] * cp_size - global_indices.shape[1]
+        if pad_len > 0:
+            global_indices = torch.nn.functional.pad(
+                global_indices, (0, 0, 0, pad_len), value=0
+            )
+
+        # Shard the global_indices by context parallelism
+        cp_rank = torch.distributed.get_rank(cp_group)
+        global_indices = _get_tokens_on_this_cp_rank(
+            global_indices, cp_rank, cp_size, seq_dim=1
+        )
+
+    logits = vocab_parallel_logits.to(dtype=torch.float32)
+    B, S, V_local = logits.shape
+    if chunk_size is None:
+        chunk_size = S
+
+    out_chunks: list[torch.Tensor] = []
+    for s0 in range(0, S, chunk_size):
+        s1 = min(S, s0 + chunk_size)
+        gi = global_indices[:, s0:s1, :]
+
+        in_range = (gi >= int(vocab_start_index)) & (gi < int(vocab_end_index))
+        # Map global ids to local shard ids and clamp to valid range to avoid OOB gather
+        V_local = logits.shape[-1]
+        li = (gi - int(vocab_start_index)).clamp(min=0, max=V_local - 1)
+
+        local_vals = torch.gather(logits[:, s0:s1, :], dim=-1, index=li)
+        local_vals = local_vals * in_range.to(dtype=local_vals.dtype)
+
+        if tp_group is not None:
+            torch.distributed.all_reduce(
+                local_vals, op=torch.distributed.ReduceOp.SUM, group=tp_group
+            )
+        out_chunks.append(local_vals)
+
+    gathered_logits = (
+        torch.cat(out_chunks, dim=1) if len(out_chunks) > 1 else out_chunks[0]
+    )
+
+    # CP gather: gather the logits by context parallelism
+    if cp_size > 1:
+        gathered_logits = allgather_cp_sharded_tensor(
+            gathered_logits, cp_group, seq_dim=1
+        )
+
+        # Remove padding if we added it earlier
+        if pad_len > 0:
+            gathered_logits = gathered_logits[:, :-pad_len, :]
+
+    return gathered_logits
+
+
+class ChunkedDistributedEntropy(torch.autograd.Function):
+    """Compute H_all = sum_v p_v log p_v across TP with chunking over sequence.
+
+    Forward returns [B, S] tensor of global entropy; backward propagates through logits.
+    """
+
+    @staticmethod
+    def forward(  # pyrefly: ignore[bad-override]
+        ctx: Any,
+        vocab_parallel_logits: torch.Tensor,  # [B, S, V_local]
+        chunk_size: int,
+        tp_group: torch.distributed.ProcessGroup,
+        inference_only: bool = False,
+    ) -> torch.Tensor:
+        B, S, _ = vocab_parallel_logits.shape
+        num_chunks = (int(S) + chunk_size - 1) // chunk_size
+        out_chunks: list[torch.Tensor] = []
+
+        for chunk_idx in range(num_chunks):
+            s0 = chunk_idx * chunk_size
+            s1 = min(int(S), (chunk_idx + 1) * chunk_size)
+
+            logits = vocab_parallel_logits[:, s0:s1, :].to(dtype=torch.float32)
+            log_probs = _compute_distributed_log_softmax(logits, group=tp_group)
+            softmax_output = log_probs.exp()
+            H_local = (softmax_output * log_probs).sum(dim=-1)  # [B, Sc]
+            torch.distributed.all_reduce(
+                H_local, op=torch.distributed.ReduceOp.SUM, group=tp_group
+            )
+            out_chunks.append(H_local)
+
+        H_all = torch.cat(out_chunks, dim=1) if len(out_chunks) > 1 else out_chunks[0]
+
+        if not inference_only:
+            ctx.save_for_backward(vocab_parallel_logits)
+            ctx.chunk_size = int(chunk_size)
+            ctx.tp_group = tp_group
+
+        return H_all.contiguous()
+
+    @staticmethod
+    def backward(
+        ctx: Any, *grad_outputs: torch.Tensor
+    ) -> tuple[torch.Tensor, None, None, None]:
+        grad_output = grad_outputs[0]  # [B, S]
+        (vocab_parallel_logits,) = ctx.saved_tensors
+        chunk_size: int = ctx.chunk_size
+        tp_group = ctx.tp_group
+
+        B, S, V_local = vocab_parallel_logits.shape
+        num_chunks = (int(S) + chunk_size - 1) // chunk_size
+        grads: list[torch.Tensor] = []
+
+        for chunk_idx in range(num_chunks):
+            s0 = chunk_idx * chunk_size
+            s1 = min(int(S), (chunk_idx + 1) * chunk_size)
+
+            logits = vocab_parallel_logits[:, s0:s1, :].to(dtype=torch.float32)
+            log_probs = _compute_distributed_log_softmax(logits, group=tp_group)
+            softmax_output = log_probs.exp()
+            H_local = (softmax_output * log_probs).sum(dim=-1)
+            torch.distributed.all_reduce(
+                H_local, op=torch.distributed.ReduceOp.SUM, group=tp_group
+            )
+
+            # dH/dz = softmax * (log_probs - H_all)
+            grad_chunk = softmax_output * (log_probs - H_local.unsqueeze(-1))
+            grad_chunk.mul_(grad_output[:, s0:s1].unsqueeze(-1))
+            grads.append(grad_chunk)
+
+        grad_input = torch.cat(grads, dim=1) if len(grads) > 1 else grads[0]
+        return grad_input, None, None, None
diff --git a/nemo_rl/distributed/ray_actor_environment_registry.py b/nemo_rl/distributed/ray_actor_environment_registry.py
index 277619bb92..6a3529d4a1 100644
--- a/nemo_rl/distributed/ray_actor_environment_registry.py
+++ b/nemo_rl/distributed/ray_actor_environment_registry.py
@@ -12,16 +12,37 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
+
 from nemo_rl.distributed.virtual_cluster import PY_EXECUTABLES
 
+USE_SYSTEM_EXECUTABLE = os.environ.get("NEMO_RL_PY_EXECUTABLES_SYSTEM", "0") == "1"
+VLLM_EXECUTABLE = (
+    PY_EXECUTABLES.SYSTEM if USE_SYSTEM_EXECUTABLE else PY_EXECUTABLES.VLLM
+)
+MCORE_EXECUTABLE = (
+    PY_EXECUTABLES.SYSTEM if USE_SYSTEM_EXECUTABLE else PY_EXECUTABLES.MCORE
+)
+
 ACTOR_ENVIRONMENT_REGISTRY: dict[str, str] = {
-    "nemo_rl.models.generation.vllm.VllmGenerationWorker": PY_EXECUTABLES.VLLM,
+    "nemo_rl.models.generation.vllm.vllm_worker.VllmGenerationWorker": VLLM_EXECUTABLE,
+    "nemo_rl.models.generation.vllm.vllm_worker_async.VllmAsyncGenerationWorker": VLLM_EXECUTABLE,
     # Temporary workaround for the coupled implementation of DTensorPolicyWorker and vLLM.
     # This will be reverted to PY_EXECUTABLES.BASE once https://github.com/NVIDIA-NeMo/RL/issues/501 is resolved.
-    "nemo_rl.models.policy.dtensor_policy_worker.DTensorPolicyWorker": PY_EXECUTABLES.VLLM,
-    "nemo_rl.models.policy.megatron_policy_worker.MegatronPolicyWorker": PY_EXECUTABLES.MCORE,
+    "nemo_rl.models.policy.dtensor_policy_worker.DTensorPolicyWorker": VLLM_EXECUTABLE,
+    "nemo_rl.models.policy.dtensor_policy_worker_v2.DTensorPolicyWorkerV2": PY_EXECUTABLES.AUTOMODEL,
+    "nemo_rl.models.policy.megatron_policy_worker.MegatronPolicyWorker": MCORE_EXECUTABLE,
     "nemo_rl.environments.math_environment.MathEnvironment": PY_EXECUTABLES.SYSTEM,
+    "nemo_rl.environments.vlm_environment.VLMEnvironment": PY_EXECUTABLES.SYSTEM,
+    "nemo_rl.environments.code_environment.CodeEnvironment": PY_EXECUTABLES.SYSTEM,
+    "nemo_rl.environments.reward_model_environment.RewardModelEnvironment": PY_EXECUTABLES.SYSTEM,
     "nemo_rl.environments.games.sliding_puzzle.SlidingPuzzleEnv": PY_EXECUTABLES.SYSTEM,
+    # AsyncTrajectoryCollector needs vLLM environment to handle exceptions from VllmGenerationWorker
+    "nemo_rl.algorithms.async_utils.AsyncTrajectoryCollector": PY_EXECUTABLES.VLLM,
+    # ReplayBuffer needs vLLM environment to handle trajectory data from VllmGenerationWorker
+    "nemo_rl.algorithms.async_utils.ReplayBuffer": PY_EXECUTABLES.VLLM,
+    "nemo_rl.environments.tools.retriever.RAGEnvironment": PY_EXECUTABLES.SYSTEM,
+    "nemo_rl.environments.penguin.Penguin": PY_EXECUTABLES.PENGUIN,
 }
 
 
diff --git a/nemo_rl/distributed/virtual_cluster.py b/nemo_rl/distributed/virtual_cluster.py
index 6e0a75b880..c28befb541 100644
--- a/nemo_rl/distributed/virtual_cluster.py
+++ b/nemo_rl/distributed/virtual_cluster.py
@@ -21,6 +21,7 @@
 from ray.util.placement_group import (
     PlacementGroup,
     placement_group,
+    placement_group_table,
     remove_placement_group,
 )
 from ray.util.scheduling_strategies import PlacementGroupSchedulingStrategy
@@ -48,25 +49,37 @@ class PY_EXECUTABLES:
     # Use NeMo-RL direct dependencies and vllm.
     VLLM = "uv run --locked --extra vllm"
 
-    # Megatron-core (and nemo dependencies)
-    # We always run with --reinstall to avoid issues where someone runs "uv run ... --extra mcore ..."
-    # but the submodules are not downloaded yet. This results in errors where it appears Megatron/Nemo
-    # aren't installed. Simple workaround is to always run the mcore py_executable with --reinstall.
-    MCORE = "uv run --reinstall --extra mcore"
+    # Use NeMo-RL direct dependencies and nemo-automodel.
+    AUTOMODEL = "uv run --locked --extra automodel"
+
+    # Use NeMo-RL direct dependencies and Megatron.
+    MCORE = "uv run --locked --extra mcore"
+
+    # Use Penguin dependencies
+    PENGUIN = "uv run --locked --extra penguin"
 
 
 @ray.remote  # pragma: no cover
 def _get_node_ip_and_free_port() -> tuple[str, int]:
-    import socket
+    return _get_node_ip_local(), _get_free_port_local()
 
+
+def _get_node_ip_local() -> str:
     # Get the IP address of the current node
     node_ip = ray._private.services.get_node_ip_address()
 
+    return node_ip
+
+
+def _get_free_port_local() -> int:
+    import socket
+
     with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
         s.bind(("", 0))  # Bind to port 0 to get a random free port
         s.listen(1)
         port = s.getsockname()[1]
-    return node_ip, port
+
+    return port
 
 
 def init_ray(log_dir: Optional[str] = None) -> None:
@@ -75,6 +88,9 @@ def init_ray(log_dir: Optional[str] = None) -> None:
     Try to attach to an existing local cluster.
     If that cluster uses the same CUDA_VISIBLE_DEVICES or Slurm managed tag we will reuse it.
     Otherwise, we will detach and start a fresh local cluster.
+
+    Args:
+        log_dir: Optional directory to store Ray logs and temp files.
     """
     # Set up runtime environment
     env_vars = dict(os.environ)
@@ -155,6 +171,14 @@ def init_ray(log_dir: Optional[str] = None) -> None:
     )
 
 
+@ray.remote(num_gpus=1)
+class GetGPUIDActor:  # pragma: no cover
+    """Util actor class to return GPU id of the current worker."""
+
+    def get_gpu_id(self):
+        return ray.get_gpu_ids()[0]
+
+
 class ResourceInsufficientError(Exception):
     """Exception raised when the cluster does not have enough resources to satisfy the requested configuration."""
 
@@ -195,6 +219,7 @@ def __init__(
         self._bundle_ct_per_node_list = bundle_ct_per_node_list
         self._world_size = sum(self._bundle_ct_per_node_list)
         self._node_placement_groups: Optional[list[PlacementGroup]] = None
+        self._sorted_bundle_indices: Optional[list[int]] = None
 
         self.num_gpus_per_node = num_gpus_per_node
         self.use_gpus = use_gpus
@@ -236,6 +261,8 @@ def _init_placement_groups(
                 self._node_placement_groups = self._create_placement_groups_internal(
                     strategy, use_unified_pg
                 )
+                if use_unified_pg and self.use_gpus:
+                    self._sorted_bundle_indices = self._get_sorted_bundle_indices()
                 return self._node_placement_groups
             except ResourceInsufficientError as e:
                 print(e)
@@ -345,8 +372,10 @@ def world_size(self) -> int:
     def node_count(self) -> int:
         return sum(1 for count in self._bundle_ct_per_node_list if count > 0)
 
-    def get_master_address_and_port(self) -> tuple[str, int]:
-        """Gets the master address and port for the distributed training setup.
+    def get_available_address_and_port(
+        self, pg_idx: int, bundle_idx: int
+    ) -> tuple[str, int]:
+        """Gets an available address and port for the given placement group index and bundle index.
 
         Returns:
             Tuple of (address, port)
@@ -355,15 +384,19 @@ def get_master_address_and_port(self) -> tuple[str, int]:
         if not self._node_placement_groups:
             self.get_placement_groups()
 
-        # Use the first bundle of the first placement group
-        # This works for both unified PG and per-node PGs
-        pg = self.get_placement_groups()[0]
+        # Get the placement group
+        placement_groups = self.get_placement_groups()
+        if len(placement_groups) == 1:
+            pg = placement_groups[0]
+        else:
+            pg = placement_groups[pg_idx]
+
         if pg.bundle_specs:
-            # Launch port finder on the first bundle of this placement group
+            # Launch port finder on the given bundle of this placement group
             addr, port = ray.get(
                 _get_node_ip_and_free_port.options(
                     scheduling_strategy=PlacementGroupSchedulingStrategy(
-                        placement_group=pg, placement_group_bundle_index=0
+                        placement_group=pg, placement_group_bundle_index=bundle_idx
                     ),
                     # Need to explicitly set to 0 since it's possible for this to be unschedulable if all CPUs are already in use.
                     num_cpus=0,
@@ -371,7 +404,75 @@ def get_master_address_and_port(self) -> tuple[str, int]:
             )
             return addr, port
 
-        raise RuntimeError("No valid placement groups found to get master address")
+        raise RuntimeError(
+            "No valid placement groups found to get available address and port"
+        )
+
+    def get_master_address_and_port(self) -> tuple[str, int]:
+        """Gets the master address and port for the distributed training setup.
+
+        Returns:
+            Tuple of (address, port)
+        """
+        # Get placement groups if not already created
+        if not self._node_placement_groups:
+            self.get_placement_groups()
+
+        # If sorted bundle indices are available, get the address and port for the first bundle index
+        if self._sorted_bundle_indices is not None:
+            return self.get_available_address_and_port(
+                pg_idx=0, bundle_idx=self._sorted_bundle_indices[0]
+            )
+
+        # Otherwise, get the address and port for bundle index 0
+        return self.get_available_address_and_port(pg_idx=0, bundle_idx=0)
+
+    def _get_sorted_bundle_indices(self) -> Optional[list[int]]:
+        """Gets the sorted bundle indices for the placement groups."""
+        if self._node_placement_groups is None:
+            raise ValueError(
+                "Placement groups must be initialized before calling _get_sorted_bundle_indices"
+            )
+
+        if not self.use_gpus:
+            return None
+
+        if len(self._node_placement_groups) != 1:
+            return None
+
+        pg = self._node_placement_groups[0]
+        pg_data = placement_group_table(pg)
+        num_bundles = len(pg_data["bundles"])
+        bundle_to_node_ids = pg_data["bundles_to_node_id"]
+
+        # use info actor to get the GPU id
+        info_actors = []
+        for i in range(num_bundles):
+            info_actors.append(
+                GetGPUIDActor.options(
+                    num_cpus=0.01,  # set both num_cpus and num_gpus to be small values to enable assignment in colocated case
+                    num_gpus=0.01,
+                    resources=None,
+                    scheduling_strategy=PlacementGroupSchedulingStrategy(
+                        placement_group=pg,
+                        placement_group_bundle_index=i,
+                    ),
+                ).remote()
+            )
+
+        gpu_ids = ray.get([actor.get_gpu_id.remote() for actor in info_actors])
+        for actor in info_actors:
+            ray.kill(actor)
+
+        # original index, node_id, gpu_id
+        bundle_infos = [
+            (i, bundle_to_node_ids[i], gpu_ids[i]) for i in range(num_bundles)
+        ]
+        pg_reordered_bundle_indices = [
+            bundle_info[0]
+            for bundle_info in sorted(bundle_infos, key=lambda x: (x[1], x[2]))
+        ]  # sort by node_id, then gpu_id
+        return pg_reordered_bundle_indices
 
     def shutdown(self) -> bool:
         """Cleans up and releases all resources associated with this virtual cluster.
diff --git a/nemo_rl/distributed/worker_group_utils.py b/nemo_rl/distributed/worker_group_utils.py
index fe2a9a03be..c51d3b8a7f 100644
--- a/nemo_rl/distributed/worker_group_utils.py
+++ b/nemo_rl/distributed/worker_group_utils.py
@@ -57,6 +57,7 @@ def get_nsight_config_if_pattern_matches(worker_name: str) -> dict[str, Any]:
                     # Profile will only start/stop when torch.cuda.profiler.start()/stop() is called
                     "capture-range": "cudaProfilerApi",
                     "capture-range-end": "stop",
+                    "cuda-graph-trace": "node",
                 }
             }
 
diff --git a/nemo_rl/distributed/worker_groups.py b/nemo_rl/distributed/worker_groups.py
index b625be25dc..e4045183c2 100644
--- a/nemo_rl/distributed/worker_groups.py
+++ b/nemo_rl/distributed/worker_groups.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 import importlib
 import os
+import time
 from copy import deepcopy
 from dataclasses import dataclass
 from typing import Any, Optional, Union
@@ -20,6 +21,7 @@
 import ray
 from ray.util.placement_group import PlacementGroup
 from ray.util.scheduling_strategies import PlacementGroupSchedulingStrategy
+from tqdm import tqdm
 
 from nemo_rl.distributed.named_sharding import NamedSharding
 from nemo_rl.distributed.ray_actor_environment_registry import (
@@ -27,7 +29,9 @@
 )
 from nemo_rl.distributed.virtual_cluster import RayVirtualCluster
 from nemo_rl.distributed.worker_group_utils import recursive_merge_options
-from nemo_rl.utils.venvs import create_local_venv_on_each_node
+from nemo_rl.utils.venvs import (
+    create_local_venv_on_each_node,
+)
 
 
 @dataclass
@@ -457,6 +461,17 @@ def _create_workers_from_bundle_indices(
         # Get all placement groups
         placement_groups = self.cluster.get_placement_groups()
 
+        # Get available address and port for each worker
+        available_addresses = []
+        available_ports = []
+        for group_idx, (pg_idx, local_bundle_indices) in enumerate(bundle_indices_list):
+            for local_rank, bundle_idx in enumerate(local_bundle_indices):
+                addr, port = self.cluster.get_available_address_and_port(
+                    pg_idx, bundle_idx
+                )
+                available_addresses.append(addr)
+                available_ports.append(port)
+
         for group_idx, (pg_idx, local_bundle_indices) in enumerate(bundle_indices_list):
             current_group = []
 
@@ -478,9 +493,17 @@ def _create_workers_from_bundle_indices(
                         "MASTER_ADDR": self.master_address,
                         "MASTER_PORT": str(self.master_port),
                         "NODE_RANK": str(pg_idx),
+                        "AVAILABLE_ADDR_LIST": str(available_addresses),
+                        "AVAILABLE_PORT_LIST": str(available_ports),
                     }
                 )
+                # Remove Ray-specific environment variables, let the worker itself set them.
                 worker_env_vars.pop("RAY_EXPERIMENTAL_NOSET_CUDA_VISIBLE_DEVICES", None)
+                worker_env_vars.pop("RAY_CLIENT_MODE", None)
+                worker_env_vars.pop("RAY_JOB_ID", None)
+                worker_env_vars.pop("RAY_LD_PRELOAD", None)
+                worker_env_vars.pop("RAY_RAYLET_PID", None)
+                worker_env_vars.pop("RAY_USAGE_STATS_ENABLED", None)
 
                 # Only the first worker in each group gets bundle_indices
                 # This ensures only one worker per group is the model owner
@@ -541,12 +564,39 @@ def _create_workers_from_bundle_indices(
 
                 global_rank += 1
 
+        # Wait for all workers to initialize with timing and progress bar
+        num_workers = len(worker_futures)
+        worker_refs = [future for future, _ in worker_futures]
+
+        start_time = time.perf_counter()
+
+        # Use ray.wait() to track individual worker completion times
+        remaining_refs = worker_refs.copy()
+
+        with tqdm(
+            total=num_workers,
+            desc=f"Initializing {self.name_prefix} workers",
+            unit="worker",
+            disable=False,
+        ) as pbar:
+            while remaining_refs:
+                # Wait for at least one worker to complete
+                ready_refs, remaining_refs = ray.wait(
+                    remaining_refs, num_returns=1, timeout=None
+                )
+
+                # Update progress bar for each ready worker
+                for _ in ready_refs:
+                    pbar.update(1)
+
+        # Get all worker results
+        workers = ray.get(worker_refs)
+        total_init_time = time.perf_counter() - start_time
+
         print(
-            f"Waiting for {len(worker_futures)} workers to finish initializing...",
+            f"  ✓ {num_workers} workers initialized in {total_init_time:.2f}s",
             flush=True,
         )
-        worker_refs = [future for future, _ in worker_futures]
-        workers = ray.get(worker_refs)
 
         for idx, (worker, (_, initializer)) in enumerate(zip(workers, worker_futures)):
             worker._RAY_INITIALIZER_ACTOR_REF_TO_AVOID_GC = initializer
diff --git a/nemo_rl/environments/code_environment.py b/nemo_rl/environments/code_environment.py
new file mode 100644
index 0000000000..a1047d081b
--- /dev/null
+++ b/nemo_rl/environments/code_environment.py
@@ -0,0 +1,268 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import ast
+import builtins
+import os
+import re
+from collections.abc import Mapping, Sequence
+from contextlib import contextmanager
+from copy import copy
+from io import IOBase
+from pprint import pformat
+from types import ModuleType
+from typing import Any, Dict, List, Optional, Tuple, TypedDict
+
+import ray
+import torch
+
+from nemo_rl.data.interfaces import LLMMessageLogType
+from nemo_rl.distributed.batched_data_dict import BatchedDataDict
+from nemo_rl.distributed.virtual_cluster import PY_EXECUTABLES
+from nemo_rl.environments.interfaces import EnvironmentInterface, EnvironmentReturn
+from nemo_rl.environments.utils import chunk_list_to_workers
+
+
+class CodeEnvConfig(TypedDict):
+    num_workers: int
+    # whether to terminate the execution after expression evaluation
+    # if you want to execute multiple rounds of code, set this to False
+    # and wrap CodeEnvironment in another environment that terminates the generation
+    terminate_on_evaluation: bool
+
+
+class CodeEnvMetadata(TypedDict):
+    context: Dict[str, Any]  # Hold functions and variables defined in the code
+    working_dir: str  # Working directory for file operations
+
+
+@ray.remote  # pragma: no cover
+class CodeExecutionWorker:
+    """Helper class to process individual code execution steps."""
+
+    def __init__(self):
+        # Create sandbox with safe builtins
+        builtin_dict = {k: getattr(builtins, k) for k in dir(builtins)}
+        builtin_dict["open"] = self.safe_open
+        builtin_dict["__import__"] = self.safe_import
+        self.sandbox = {"__builtins__": builtin_dict}
+
+    def sanitize(self, obj: Any) -> Any:
+        # TODO: better handling of unpicklable objects: custom __getstate__ and __setstate__
+        # recursively remove all file objects as they are not picklable by ray
+        if isinstance(obj, (IOBase, ModuleType)):
+            # replace unpickable objects with a string representation
+            return repr(obj)
+        if isinstance(obj, Mapping):
+            return obj.__class__(
+                {self.sanitize(k): self.sanitize(v) for k, v in obj.items()}
+            )
+        if isinstance(obj, Sequence) and not isinstance(obj, str):
+            return obj.__class__(self.sanitize(v) for v in obj)
+        if hasattr(obj, "__dict__"):
+            new_obj = copy(obj)
+            new_obj.__dict__ = {
+                self.sanitize(k): self.sanitize(v) for k, v in obj.__dict__.items()
+            }
+            return new_obj
+        return obj
+
+    def format_result(
+        self, result: Any, code: Optional[str] = None, lookahead: Optional[str] = None
+    ) -> str:
+        if result is None:
+            # no return value
+            return ""
+        result = pformat(result)
+        multiline = (code and "\n" in code) or "\n" in result
+        if multiline:
+            # multi-line format
+            result = f"\n\n<result>\n{result}\n</result>"
+        else:
+            # inline format
+            result = f"<result>{result}</result>"
+        if lookahead:
+            if result.startswith(lookahead):
+                # The generation may look like "</code>\n" if ">\n" is a single token.
+                # We trim \n from the result if the model has already generated it.
+                result = result[len(lookahead) :]
+        return result
+
+    def execute(
+        self, message_batch: str, metadata_batch: List[CodeEnvMetadata]
+    ) -> Tuple[List[Dict[str, str]], List[bool], List[Any]]:
+        """Execute code in a sandboxed environment."""
+        results = []
+        terminateds = []
+
+        for message, metadata in zip(message_batch, metadata_batch):
+            match = re.search(r"<code>(.*)</code>(.*)", message, re.DOTALL)
+            if not match:
+                results.append("")
+                terminateds.append(False)
+                continue
+
+            code, lookahead = match.groups()
+            tree = ast.parse(code)
+
+            if tree.body and isinstance(tree.body[-1], ast.Expr):
+                # Interactive mode
+                exec_code = ast.unparse(tree.body[:-1])
+                eval_code = ast.unparse(tree.body[-1])
+            else:
+                # Silent mode
+                exec_code = code
+                eval_code = None
+
+            result = None
+            terminated = False
+            with self.chdir(metadata["working_dir"]):
+                try:
+                    # isolate the code in a sandbox
+                    # capture local variables in metadata["context"]
+                    exec(exec_code, self.sandbox, metadata["context"])
+                    if eval_code:
+                        result = eval(eval_code, self.sandbox, metadata["context"])
+                        terminated = True
+                except Exception as err:
+                    result = err
+
+            result = self.format_result(result, code, lookahead)
+            results.append(result)
+            terminateds.append(terminated)
+
+        observations = [
+            {"role": "environment", "content": result} for result in results
+        ]
+        metadata_batch = self.sanitize(metadata_batch)
+
+        return observations, terminateds, metadata_batch
+
+    @contextmanager
+    def chdir(self, dir: str):
+        """Change to temporary directory for file operations."""
+        current_dir = os.getcwd()
+        os.chdir(dir)
+        try:
+            yield
+        finally:
+            os.chdir(current_dir)
+
+    def safe_open(self, file: str, *args, **kwargs):
+        """Safe version of open() that only allows access to temporary directory."""
+        real_file = os.path.realpath(file)
+        working_dir = os.path.realpath(os.getcwd())
+        if os.path.commonpath([real_file, working_dir]) != working_dir:
+            raise PermissionError(
+                "Access beyond the temporary working directory is blocked"
+            )
+        return open(file, *args, **kwargs)
+
+    def safe_import(self, name: str, *args, **kwargs):
+        """Safe version of import that blocks risky modules."""
+        risky_modules = {
+            "os",
+            "shutil",  # erase filesystem
+            "sys",
+            "signal",  # exit the current program
+            "socket",  # network communication
+            "subprocess",
+            "threading",
+            "multiprocessing",  # spawn threads or processes
+            "builtins",
+            "importlib",  # bypass current blockers
+        }
+        if name in risky_modules:
+            raise PermissionError("Importing system and network modules is blocked")
+        return builtins.__import__(name, *args, **kwargs)
+
+
+@ray.remote  # pragma: no cover
+class CodeEnvironment(EnvironmentInterface):
+    """Code execution environment that maintains state between steps."""
+
+    def __init__(self, cfg: CodeEnvConfig):
+        self.cfg = cfg
+        self.num_workers = cfg["num_workers"]
+        self.terminate_on_evaluation = cfg["terminate_on_evaluation"]
+        self.workers = [
+            CodeExecutionWorker.options(
+                runtime_env={"py_executable": PY_EXECUTABLES.SYSTEM}
+            ).remote()
+            for _ in range(self.num_workers)
+        ]
+
+    def step(
+        self,
+        message_log_batch: List[LLMMessageLogType],
+        metadata_batch: List[CodeEnvMetadata],
+        return_extracted_answer: bool = False,
+    ) -> EnvironmentReturn:
+        """Process a batch of code execution steps."""
+        message_batch = [ml[-1]["content"] for ml in message_log_batch]
+        chunked_message_batch = chunk_list_to_workers(message_batch, self.num_workers)
+        chunked_metadata_batch = chunk_list_to_workers(metadata_batch, self.num_workers)
+
+        # Process each chunk in parallel
+        futures = [
+            self.workers[i].execute.remote(message_chunk, metadata_chunk)
+            for i, (message_chunk, metadata_chunk) in enumerate(
+                zip(chunked_message_batch, chunked_metadata_batch)
+            )
+        ]
+
+        results = ray.get(futures)
+
+        # Unpack results
+        observations = []
+        terminateds = []
+        new_metadata_batch = []
+
+        for obs, term, meta in results:
+            observations += obs
+            terminateds += term
+            new_metadata_batch += meta
+
+        if self.terminate_on_evaluation:
+            terminated_tensor = torch.tensor(terminateds, dtype=torch.bool)
+        else:
+            terminated_tensor = torch.zeros(len(terminateds), dtype=torch.bool)
+        rewards_tensor = torch.zeros_like(terminated_tensor, dtype=torch.float32)
+
+        next_stop_strings = [["</code>"]] * len(message_log_batch)
+
+        assert return_extracted_answer == False, (
+            "return_extracted_answer is not supported in CodeEnvironment. Please set it to False."
+        )
+        extracted_answers = None
+
+        return EnvironmentReturn(
+            observations=observations,
+            metadata=new_metadata_batch,
+            next_stop_strings=next_stop_strings,
+            rewards=rewards_tensor,
+            terminateds=terminated_tensor,
+            answers=extracted_answers,
+        )
+
+    def shutdown(self):
+        # shutdown all workers
+        for worker in self.workers:
+            ray.kill(worker)
+
+    def global_post_process_and_metrics(
+        self, batch: BatchedDataDict
+    ) -> Tuple[BatchedDataDict, dict]:
+        """Compute metrics for the batch."""
+        # No specific metrics for code execution
+        return batch, {}
diff --git a/nemo_rl/environments/dapo_math_verifier.py b/nemo_rl/environments/dapo_math_verifier.py
new file mode 100644
index 0000000000..e84b852e49
--- /dev/null
+++ b/nemo_rl/environments/dapo_math_verifier.py
@@ -0,0 +1,283 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+# Copyright 2024 Bytedance Ltd. and/or its affiliates
+# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# Adapted from https://github.com/volcengine/verl/blob/main/verl/utils/reward_score/math_dapo.py
+
+import re
+from typing import Optional
+
+
+def last_boxed_only_string(string: str) -> Optional[str]:
+    """Extract the last LaTeX boxed expression from a string.
+
+    Args:
+        string: Input string containing LaTeX code
+
+    Returns:
+        The last boxed expression or None if not found
+    """
+    idx = string.rfind("\\boxed{")
+    if idx < 0:
+        return None
+
+    i = idx
+    right_brace_idx = None
+    num_left_braces_open = 0
+
+    while i < len(string):
+        if string[i] == "{":
+            num_left_braces_open += 1
+        if string[i] == "}":
+            num_left_braces_open -= 1
+            if num_left_braces_open == 0:
+                right_brace_idx = i
+                break
+        i += 1
+
+    return string[idx : right_brace_idx + 1] if right_brace_idx is not None else None
+
+
+def remove_boxed(s: str) -> str:
+    r"""Remove the LaTeX boxed command from a string.
+
+    Args:
+        s: String with format "\\boxed{content}"
+
+    Returns:
+        The content inside the boxed command
+    """
+    left = "\\boxed{"
+    assert s[: len(left)] == left, f"box error: {s}"
+    assert s[-1] == "}", f"box error: {s}"
+    return s[len(left) : -1]
+
+
+# Constants for normalization
+SUBSTITUTIONS = [
+    ("an ", ""),
+    ("a ", ""),
+    (".$", "$"),
+    ("\\$", ""),
+    (r"\ ", ""),
+    (" ", ""),
+    ("mbox", "text"),
+    (",\\text{and}", ","),
+    ("\\text{and}", ","),
+    ("\\text{m}", "\\text{}"),
+]
+
+REMOVED_EXPRESSIONS = [
+    "square",
+    "ways",
+    "integers",
+    "dollars",
+    "mph",
+    "inches",
+    "hours",
+    "km",
+    "units",
+    "\\ldots",
+    "sue",
+    "points",
+    "feet",
+    "minutes",
+    "digits",
+    "cents",
+    "degrees",
+    "cm",
+    "gm",
+    "pounds",
+    "meters",
+    "meals",
+    "edges",
+    "students",
+    "childrentickets",
+    "multiples",
+    "\\text{s}",
+    "\\text{.}",
+    "\\text{\ns}",
+    "\\text{}^2",
+    "\\text{}^3",
+    "\\text{\n}",
+    "\\text{}",
+    r"\mathrm{th}",
+    r"^\circ",
+    r"^{\circ}",
+    r"\;",
+    r",\!",
+    "{,}",
+    '"',
+    "\\dots",
+]
+
+
+def normalize_final_answer(final_answer: str) -> str:
+    """Normalize a final answer to a quantitative reasoning question.
+
+    Args:
+        final_answer: The answer string to normalize
+
+    Returns:
+        Normalized answer string
+    """
+    final_answer = final_answer.split("=")[-1]
+
+    # Apply substitutions and removals
+    for before, after in SUBSTITUTIONS:
+        final_answer = final_answer.replace(before, after)
+    for expr in REMOVED_EXPRESSIONS:
+        final_answer = final_answer.replace(expr, "")
+
+    # Extract and normalize LaTeX math
+    final_answer = re.sub(r"(.*?)(\$)(.*?)(\$)(.*)", "$\\3$", final_answer)
+    final_answer = re.sub(r"(\\text\{)(.*?)(\})", "\\2", final_answer)
+    final_answer = re.sub(r"(\\textbf\{)(.*?)(\})", "\\2", final_answer)
+    final_answer = re.sub(r"(\\overline\{)(.*?)(\})", "\\2", final_answer)
+    final_answer = re.sub(r"(\\boxed\{)(.*)(\})", "\\2", final_answer)
+
+    # Normalize shorthand TeX:
+    #  \fracab -> \frac{a}{b}
+    #  \frac{abc}{bef} -> \frac{abc}{bef}
+    #  \fracabc -> \frac{a}{b}c
+    #  \sqrta -> \sqrt{a}
+    #  \sqrtab -> sqrt{a}b
+    final_answer = re.sub(r"(frac)([^{])(.)", "frac{\\2}{\\3}", final_answer)
+    final_answer = re.sub(r"(sqrt)([^{])", "sqrt{\\2}", final_answer)
+    final_answer = final_answer.replace("$", "")
+
+    # Normalize numbers
+    if final_answer.replace(",", "").isdigit():
+        final_answer = final_answer.replace(",", "")
+
+    return final_answer.strip()
+
+
+def is_correct_minerva(
+    solution_str: str,
+    gt: str,
+    gt_need_extract: bool = False,
+    answer_pattern: str = r"(?i)Answer\s*:\s*([^\n]+)",
+) -> tuple[bool, str]:
+    """Check if the solution is correct according to Minerva criteria.
+
+    Args:
+        solution_str: The solution string to check
+        gt: The ground truth answer
+        gt_need_extract: Whether the ground truth needs extraction
+        answer_pattern: Regex pattern to extract the answer
+
+    Returns:
+        Tuple of (is_correct, normalized_prediction)
+    """
+    # Extract answer from solution
+    match = re.findall(answer_pattern, solution_str)
+    extracted_answer = match[-1] if match else "[INVALID]"
+    pred = normalize_final_answer(extracted_answer)
+
+    # Process ground truth
+    if gt_need_extract:
+        gt = normalize_final_answer(remove_boxed(last_boxed_only_string(gt)))
+    else:
+        gt = normalize_final_answer(gt)
+
+    return (pred == gt), pred
+
+
+def is_correct_strict_box(
+    pred: str, gt: str, pause_tokens_index: Optional[list[int]] = None
+) -> tuple[int, Optional[str]]:
+    """Check if the prediction is correct using strict boxed answer criteria.
+
+    Args:
+        pred: The prediction string
+        gt: The ground truth answer
+        pause_tokens_index: Indices of pause tokens
+
+    Returns:
+        Tuple of (score, extracted_prediction)
+    """
+    # Extract the relevant part of the prediction
+    if pause_tokens_index is not None:
+        assert len(pause_tokens_index) == 4
+        pred = pred[pause_tokens_index[-1] - 100 :]
+    else:
+        pred = pred[-100:]
+
+    # Extract and check the boxed answer
+    boxed_pred = last_boxed_only_string(pred)
+    extracted_pred = remove_boxed(boxed_pred) if boxed_pred is not None else None
+
+    return 1 if (extracted_pred == gt) else -1, extracted_pred
+
+
+def verify(
+    solution_str: str,
+    answer: str,
+    strict_box_verify: bool = False,
+    pause_tokens_index: Optional[list[int]] = None,
+) -> bool:
+    """Verify if the solution is correct.
+
+    Args:
+        solution_str: The solution string to verify
+        answer: The ground truth answer
+        strict_box_verify: Whether to use strict box verification
+        pause_tokens_index: Indices of pause tokens
+
+    Returns:
+        True if the solution is correct, False otherwise
+    """
+    if strict_box_verify:
+        correct, pred = is_correct_strict_box(solution_str, answer, pause_tokens_index)
+        return correct == 1, pred
+
+    correct, pred = is_correct_minerva(solution_str, answer)
+    return correct, pred
+
+
+def compute_score(
+    solution_str: str,
+    ground_truth: str,
+    strict_box_verify: bool = False,
+    pause_tokens_index: Optional[list[int]] = None,
+) -> float:
+    """Compute the reward score for a solution.
+
+    Args:
+        solution_str: The solution string
+        ground_truth: The ground truth answer
+        strict_box_verify: Whether to use strict box verification
+        pause_tokens_index: Indices of pause tokens
+
+    Returns:
+        Reward score (1.0 for correct, 0.0 for incorrect)
+    """
+    # Limit solution length for efficiency
+    solution_str = solution_str[
+        -300:
+    ]  # The longest answer in MATH-500 has 159 characters
+
+    # Verify the solution
+    correct, pred = verify(
+        solution_str, ground_truth, strict_box_verify, pause_tokens_index
+    )
+
+    reward = 1.0 if correct else 0.0
+    acc = correct
+
+    return {
+        "score": reward,
+        "acc": acc,
+        "pred": pred,
+    }
diff --git a/nemo_rl/environments/games/sliding_puzzle.py b/nemo_rl/environments/games/sliding_puzzle.py
index 9ff521a182..659318b858 100644
--- a/nemo_rl/environments/games/sliding_puzzle.py
+++ b/nemo_rl/environments/games/sliding_puzzle.py
@@ -272,6 +272,7 @@ def process_turn(
         bool,
         Optional[list[str]],
         Optional[SlidingPuzzleMetadata],
+        Optional[list[str]],
     ]:
         """Processes a single turn for the sliding puzzle task."""
         game_state = metadata["game_state"]
@@ -297,6 +298,7 @@ def process_turn(
                 is_terminated,
                 None,
                 next_metadata,
+                None,
             )
 
         # Get last assistant message and parse action
@@ -328,13 +330,15 @@ def process_turn(
 
             if is_terminated:
                 next_metadata = None  # Clear metadata on termination
-
+        # answers save the extracted answer, only assigned in the verify function
+        next_answers = None
         return (
             {"role": "environment", "content": next_observation_content + "\n"},
             turn_reward,
             is_terminated,
             next_stop_strings,
             next_metadata,
+            next_answers,
         )
 
 
@@ -365,13 +369,15 @@ def step(
         terminateds = []
         all_stop_strings = []
         all_next_metadata = []
+        all_answers = []
 
-        for obs, rew, term, stops, meta in results:
+        for obs, rew, term, stops, meta, answ in results:
             observations.append(obs)
             rewards.append(rew)
             terminateds.append(term)
             all_stop_strings.append(stops)
             all_next_metadata.append(meta)
+            all_answers.append(answ)
 
         rewards_tensor = torch.tensor(rewards, dtype=torch.float32)
         terminated_tensor = torch.tensor(terminateds, dtype=torch.bool)
@@ -382,6 +388,7 @@ def step(
             next_stop_strings=all_stop_strings,
             rewards=rewards_tensor,
             terminateds=terminated_tensor,
+            answers=all_answers,
         )
 
     def shutdown(self):
diff --git a/nemo_rl/environments/interfaces.py b/nemo_rl/environments/interfaces.py
index 1d62900964..b869c32df7 100644
--- a/nemo_rl/environments/interfaces.py
+++ b/nemo_rl/environments/interfaces.py
@@ -38,6 +38,7 @@ class EnvironmentReturn(NamedTuple, Generic[MetadataT]):
                        similar. This field lets you control this per turn.
     rewards: the rewards for this turn.
     terminateds: whether the episode ended this turn.
+    answers: the answers for this turn.
     """
 
     observations: list[dict[str, str]]
@@ -45,6 +46,7 @@ class EnvironmentReturn(NamedTuple, Generic[MetadataT]):
     next_stop_strings: list[list[str] | None] | list[None]
     rewards: Tensor
     terminateds: Tensor
+    answers: list[str | None] | None
 
 
 class EnvironmentInterface(abc.ABC, Generic[MetadataT]):
diff --git a/nemo_rl/environments/math_environment.py b/nemo_rl/environments/math_environment.py
index cafc7d90f7..8de2da805a 100644
--- a/nemo_rl/environments/math_environment.py
+++ b/nemo_rl/environments/math_environment.py
@@ -15,10 +15,11 @@
 import io
 import logging
 import re
-from typing import Any, Optional, TypedDict
+from typing import Any, NotRequired, TypedDict, Union
 
 import ray
 import torch
+from math_verify import grader
 from math_verify.errors import TimeoutException
 from math_verify.metric import math_metric
 from math_verify.parser import ExprExtractionConfig, LatexExtractionConfig
@@ -26,6 +27,7 @@
 from nemo_rl.data.interfaces import LLMMessageLogType
 from nemo_rl.distributed.batched_data_dict import BatchedDataDict
 from nemo_rl.distributed.virtual_cluster import PY_EXECUTABLES
+from nemo_rl.environments.dapo_math_verifier import compute_score as dapo_math_verify
 from nemo_rl.environments.interfaces import (
     EnvironmentInterface,
     EnvironmentReturn,
@@ -39,8 +41,10 @@
 
 class MathEnvConfig(TypedDict):
     num_workers: int
-    stop_strings: Optional[list[str]]  # Default stop strings for this env
-    verifier_type: Optional[str]
+    stop_strings: NotRequired[list[str] | None]  # Default stop strings for this env
+    # The verifier type. None defaults to "math".
+    verifier_type: NotRequired[str | None]
+    math_verify_impl: NotRequired[str | None]
 
 
 @contextlib.contextmanager
@@ -69,8 +73,12 @@ def __init__(self) -> None:
         )
 
     def verify(
-        self, pred_responses: list[str], ground_truths: list[str]
-    ) -> list[float]:
+        self,
+        pred_responses: list[str],
+        ground_truths: list[str],
+        return_extracted_answer: bool = False,
+        **kwargs,
+    ) -> Union[list[float], tuple[list[float], list[str | None]]]:
         """Verify the correctness of the predicted responses against the ground truth.
 
         Args:
@@ -78,34 +86,70 @@ def verify(
             ground_truths: list[str]. The ground truth responses.
 
         Returns:
-            list[float]. The rewards for each predicted response.
+            Union[list[float], tuple[list[float], list[str | None]]].
+            If return_extracted_answer is False, returns only the scores.
+            If return_extracted_answer is True, returns (scores, extracted_answers).
         """
         results = []
+        extracted_answers: list[str | None] = []
+
         for response, ground_truth in zip(pred_responses, ground_truths):
             try:
-                ground_truth_parsable = "\\boxed{" + ground_truth + "}"
                 with _mute_output():
-                    try:
-                        ret_score, _ = self.verify_func(
+                    math_verify_impl = kwargs.get("math_verify_impl", "hf_math_verify")
+                    if kwargs.get("math_verify_impl") == "dapo_math_verify":
+                        # This compute_score is from the DAPO Math Verifier from Verl
+                        reward_dict = dapo_math_verify(response, ground_truth)
+                        ret_score = reward_dict["score"]
+                        extracted_answer = reward_dict["pred"]
+                    elif kwargs.get("math_verify_impl") == "hf_math_verify":
+                        ground_truth_parsable = "\\boxed{" + ground_truth + "}"
+                        ret_score, extracted_answer = self.verify_func(
                             [ground_truth_parsable], [response]
                         )
-                    # It's possible to emit a TimeoutException and that wouldn't be caught since
-                    # it actually subclasses from BaseException and math-verify itself does not
-                    # to catch it.
-                    except (Exception, TimeoutException):
-                        ret_score = 0.0
+                    else:
+                        raise ValueError(
+                            f"Unknown math_verify_impl: {math_verify_impl}. Expected 'hf_math_verify' or 'dapo_math_verify'."
+                        )
 
                 results.append(float(ret_score))
-            except Exception:
+
+                if return_extracted_answer:
+                    # Make sure the extracted answer is not None and is a list of two elements
+                    assert extracted_answer is not None
+                    assert len(extracted_answer) == 2
+                    extracted_gold, extracted_prediction = extracted_answer
+                    # Get the extracted answer with the same logic as in the HFVerifyWorker
+                    for pred in extracted_prediction:
+                        if any(grader.verify(gold, pred) for gold in extracted_gold):
+                            extracted_answers.append(pred)
+                            break
+                    else:
+                        # If no match is found, means all answers are incorrect, just use the first prediction
+                        extracted_answers.append(extracted_prediction[0][0])
+
+            # It's possible to emit a TimeoutException and that wouldn't be caught since
+            # it actually subclasses from BaseException and math-verify itself does not
+            # to catch it.
+            except (Exception, TimeoutException):
                 results.append(0.0)
-        return results
+                extracted_answers.append(None)
+
+        if return_extracted_answer:
+            return results, extracted_answers
+        else:
+            return results
 
 
 @ray.remote  # pragma: no cover
 class MultilingualMultichoiceVerifyWorker:
     def verify(
-        self, pred_responses: list[str], ground_truths: list[str]
-    ) -> list[float]:
+        self,
+        pred_responses: list[str],
+        ground_truths: list[str],
+        return_extracted_answer: bool = False,
+        **kwargs,
+    ) -> Union[list[float], tuple[list[float], list[str | None]]]:
         """Verify the correctness of the predicted responses against the ground truth.
 
         Args:
@@ -113,9 +157,13 @@ def verify(
             ground_truths: list[str]. The ground truth responses.
 
         Returns:
-            list[float]. The rewards for each predicted response.
+            Union[list[float], tuple[list[float], list[str | None]]].
+            If return_extracted_answer is False, returns only the scores.
+            If return_extracted_answer is True, returns (scores, extracted_answers).
         """
         results = []
+        extracted_answers: list[str | None] = []
+
         for response, ground_truth in zip(pred_responses, ground_truths):
             response = answer_parsing.normalize_response(response)
             extracted_answer = None
@@ -131,14 +179,23 @@ def verify(
                     break
             score = 1.0 if extracted_answer == ground_truth else 0.0
             results.append(score)
-        return results
+            extracted_answers.append(extracted_answer)
+
+        if return_extracted_answer:
+            return results, extracted_answers
+        else:
+            return results
 
 
 @ray.remote  # pragma: no cover
 class EnglishMultichoiceVerifyWorker:
     def verify(
-        self, pred_responses: list[str], ground_truths: list[str]
-    ) -> list[float]:
+        self,
+        pred_responses: list[str],
+        ground_truths: list[str],
+        return_extracted_answer: bool = False,
+        **kwargs,
+    ) -> Union[list[float], tuple[list[float], list[str | None]]]:
         """Verify the correctness of the predicted responses against the ground truth.
 
         Args:
@@ -146,9 +203,13 @@ def verify(
             ground_truths: list[str]. The ground truth responses.
 
         Returns:
-            list[float]. The rewards for each predicted response.
+            Union[list[float], tuple[list[float], list[str | None]]].
+            If return_extracted_answer is False, returns only the scores.
+            If return_extracted_answer is True, returns (scores, extracted_answers).
         """
         results = []
+        extracted_answers: list[str | None] = []
+
         for response, ground_truth in zip(pred_responses, ground_truths):
             ground_truth = answer_parsing.normalize_response(ground_truth)
             response = answer_parsing.normalize_response(response)
@@ -160,11 +221,18 @@ def verify(
                 )
             score = 1.0 if extracted_answer == ground_truth else 0.0
             results.append(score)
-        return results
+            if return_extracted_answer:
+                extracted_answers.append(extracted_answer)
+
+        if return_extracted_answer:
+            return results, extracted_answers
+        else:
+            return results
 
 
 class MathEnvironmentMetadata(TypedDict):
     ground_truth: str
+    extracted_answer: str | None
 
 
 @ray.remote(max_restarts=-1, max_task_retries=-1)  # pragma: no cover
@@ -177,6 +245,7 @@ def __init__(self, cfg: MathEnvConfig):
         assert isinstance(verifier_type, str), (
             f"{verifier_type=} must be a string but was {type(verifier_type)}"
         )
+
         worker_cls = {
             "math": HFVerifyWorker,
             "english_multichoice": EnglishMultichoiceVerifyWorker,
@@ -198,12 +267,13 @@ def step(
         self,
         message_log_batch: list[LLMMessageLogType],
         metadata: list[MathEnvironmentMetadata],
+        return_extracted_answer: bool = False,
     ) -> EnvironmentReturn[MathEnvironmentMetadata]:
         """Runs a step in the math environment.
 
         Args:
             message_log: list[list[dict[str, str]]]. A batch of OpenAI-API-like message logs that represent interactions with the LLM.
-            metadata: list[MathEnvironmentMetadata]. The grader will use the 'ground_truth' key to evaluate correctness.
+            metadata: list[MathEnvironmentMetadata]. The grader will use the 'ground_truth' key to evaluate correctness. The extracted answer will be stored to caculate cons@k.
 
         Returns:
             EnvironmentReturn: A tuple containing:
@@ -231,18 +301,35 @@ def step(
         )
         chunked_ground_truths = chunk_list_to_workers(ground_truths, self.num_workers)
 
-        # # Process each chunk in parallel
+        # Process each chunk in parallel
         futures = [
-            self.workers[i].verify.remote(chunk, ground_truth_chunk)
+            self.workers[i].verify.remote(
+                chunk,
+                ground_truth_chunk,
+                return_extracted_answer,
+                math_verify_impl=self.cfg.get("math_verify_impl", "hf_math_verify"),
+            )
             for i, (chunk, ground_truth_chunk) in enumerate(
                 zip(chunked_assistant_response_batch, chunked_ground_truths)
             )
         ]
 
-        results = ray.get(futures)
+        worker_results = ray.get(futures)
+
+        # Flatten the results and extract both scores and answers
+        results = []
+        extracted_answers: list[str | None] | None = (
+            [] if return_extracted_answer else None
+        )
+
+        for worker_result in worker_results:
+            if return_extracted_answer:
+                worker_scores, worker_answers = worker_result
+                results.extend(worker_scores)
+                extracted_answers.extend(worker_answers)
+            else:
+                results.extend(worker_result)
 
-        # flatten the results
-        results = [item for sublist in results for item in sublist]
         observations = [
             {
                 "role": "environment",
@@ -256,7 +343,6 @@ def step(
         # create a tensor of rewards and done flags
         rewards = torch.tensor(results).cpu()
         done = torch.ones_like(rewards).cpu()
-
         next_stop_strings = [None] * len(message_log_batch)
 
         return EnvironmentReturn(
@@ -265,6 +351,7 @@ def step(
             next_stop_strings=next_stop_strings,
             rewards=rewards,
             terminateds=done,
+            answers=extracted_answers,
         )
 
     def global_post_process_and_metrics(
diff --git a/nemo_rl/environments/penguin.py b/nemo_rl/environments/penguin.py
new file mode 100644
index 0000000000..1f7462a866
--- /dev/null
+++ b/nemo_rl/environments/penguin.py
@@ -0,0 +1,248 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from pathlib import Path
+from typing import Any, Dict, List, TypedDict
+
+import ray
+import torch
+from transformers import PreTrainedTokenizerBase
+
+from nemo_rl.data.interfaces import DatumSpec
+from nemo_rl.distributed.virtual_cluster import _get_free_port_local, _get_node_ip_local
+from nemo_rl.environments.interfaces import EnvironmentInterface
+from nemo_rl.utils.timer import Timer
+
+
+class PenguinConfig(TypedDict):
+    model_name: str
+    base_urls: List[str]
+    initial_global_config_dict: Dict[str, Any]
+
+
+@ray.remote(max_restarts=-1, max_task_retries=-1)  # pragma: no cover
+class Penguin(EnvironmentInterface):
+    """This environment class isn't really used for training. It's really meant as an integration wrapper around Penguin that hooks into the existing NeMo RL resource management via ray. So there is still one source of truth for resource management in NeMo RL."""
+
+    def __init__(self, cfg: PenguinConfig):
+        self.cfg = cfg
+
+        self.node_ip = _get_node_ip_local()
+        self.head_server_port = _get_free_port_local()
+
+        from omegaconf import DictConfig
+        from penguin.cli import GlobalConfigDictParserConfig, RunHelper
+        from penguin.rollout_collection import RolloutCollectionHelper
+        from penguin.server_utils import HEAD_SERVER_KEY_NAME, BaseServerConfig
+
+        RELATIVE_PATH = "nemo_rl/environments/penguin.py"
+        assert __file__.endswith(RELATIVE_PATH)
+
+        initial_global_config_dict = (
+            self.cfg.get("initial_global_config_dict") or dict()
+        )
+        # Policy information
+        initial_global_config_dict["policy_model_name"] = self.cfg["model_name"]
+        initial_global_config_dict["policy_api_key"] = (
+            "dummy_key"  # No key necessary for training.
+        )
+        initial_global_config_dict["policy_base_url"] = self.cfg["base_urls"]
+
+        initial_global_config_dict.setdefault(
+            "global_aiohttp_connector_limit_per_host", 16_384
+        )
+        initial_global_config_dict.setdefault("global_aiohttp_connector_limit", 65_536)
+        print(
+            f"""Set global_aiohttp_connector_limit_per_host={initial_global_config_dict["global_aiohttp_connector_limit_per_host"]} and global_aiohttp_connector_limit={initial_global_config_dict["global_aiohttp_connector_limit"]}.
+Depending on your data shape, you may want to change these values."""
+        )
+
+        # Get Ray head node address if Ray is initialized
+        assert ray.is_initialized(), (
+            "Ray must be initialized before using Penguin environment"
+        )
+        ray_context = ray.get_runtime_context()
+        assert ray_context.gcs_address, "Ray must have a GCS address"
+
+        initial_global_config_dict["ray_head_node_address"] = ray_context.gcs_address
+        print(f"Ray head node address: {ray_context.gcs_address}")
+
+        # Head server
+        initial_global_config_dict[HEAD_SERVER_KEY_NAME] = {
+            "host": "0.0.0.0",
+            "port": self.head_server_port,
+        }
+
+        self.rh = RunHelper()
+        self.rh.start(
+            global_config_dict_parser_config=GlobalConfigDictParserConfig(
+                dotenv_path=Path(__file__.removesuffix(RELATIVE_PATH)).absolute()
+                / "penguin_env.yaml",
+                initial_global_config_dict=DictConfig(initial_global_config_dict),
+                skip_load_from_cli=True,
+            )
+        )
+
+        # Setup for rollout collection
+        self.head_server_config = BaseServerConfig(
+            host=self.node_ip,
+            port=self.head_server_port,
+        )
+        self.rch = RolloutCollectionHelper()
+
+    def health_check(self) -> bool:
+        return True
+
+    async def run_rollouts(
+        self,
+        penguin_examples: list[dict],
+        tokenizer: PreTrainedTokenizerBase,
+        timer_prefix: str,
+    ) -> list[dict]:
+        timer = Timer()
+
+        penguin_result_iterator = self.rch.run_examples(
+            examples=penguin_examples, head_server_config=self.head_server_config
+        )
+
+        timer.start("_run_rollouts_total")
+        nemo_rl_results = []
+        for task in penguin_result_iterator:
+            with timer.time(label=f"{timer_prefix}/await_results"):
+                penguin_result = await task
+
+            with timer.time(label=f"{timer_prefix}/postprocess_results"):
+                nemo_rl_result = self._postprocess_penguin_to_nemo_rl_result(
+                    penguin_result, tokenizer
+                )
+
+            nemo_rl_results.append(nemo_rl_result)
+
+        timer.stop("_run_rollouts_total")
+        timing_metrics = timer.get_timing_metrics("sum")
+        total_time = timing_metrics.pop("_run_rollouts_total")
+        timing_metrics[f"{timer_prefix}/postprocess_results_pct"] = (
+            100 * timing_metrics[f"{timer_prefix}/postprocess_results"] / total_time
+        )
+
+        return nemo_rl_results, timing_metrics
+
+    def _postprocess_penguin_to_nemo_rl_result(
+        self, penguin_result: dict, tokenizer: PreTrainedTokenizerBase
+    ) -> dict:
+        nemo_rl_message_log = []
+        seen_token_ids: List[int] = []
+        for output_item_dict in penguin_result["response"]["output"]:
+            # Nemo RL really only has two types of messages: assistant and not assistant since that is all that it is concerned with (i.e. to train or not to train)
+            # Here we map all the trainable messages to assistant and all the non-trainable messages to user.
+            # Eventually we can maybe be smarter about this, but this is functional for now.
+
+            # Note that Penguin will only return token ids on "assistant" messages and not other message types.
+            if "generation_token_ids" not in output_item_dict:
+                continue
+
+            assert (
+                seen_token_ids
+                == output_item_dict["prompt_token_ids"][: len(seen_token_ids)]
+            ), f"""Non-contiguous messages found! This may be a tokenization issue where certain tokens are combined when messages are concatenated, or it may be due to part of the chat history being truncated (like if super long history is truncated or if reasoning is stripped out).
+Seen token IDs: {seen_token_ids}
+Output prompt token IDs: {output_item_dict["prompt_token_ids"]}
+"""
+
+            nemo_rl_message_log.append(
+                {
+                    "role": "user",
+                    "content": "",
+                    "token_ids": torch.tensor(
+                        output_item_dict["prompt_token_ids"][len(seen_token_ids) :]
+                    ),
+                }
+            )
+            nemo_rl_message_log.append(
+                {
+                    "role": "assistant",
+                    "content": "",
+                    "token_ids": torch.tensor(output_item_dict["generation_token_ids"]),
+                    "generation_logprobs": torch.tensor(
+                        output_item_dict["generation_log_probs"]
+                    ),
+                }
+            )
+
+            seen_token_ids.extend(nemo_rl_message_log[-2]["token_ids"])
+            seen_token_ids.extend(nemo_rl_message_log[-1]["token_ids"])
+
+            # We pop to remove larger tensors from logging.
+            output_item_dict["prompt_str"] = tokenizer.decode(
+                output_item_dict.pop("prompt_token_ids")
+            )
+            output_item_dict["generation_str"] = tokenizer.decode(
+                output_item_dict.pop("generation_token_ids")
+            )
+            output_item_dict.pop("generation_log_probs")
+
+        return {
+            "message_log": nemo_rl_message_log,
+            "input_message_log": nemo_rl_message_log[:1],
+            "full_result": penguin_result,
+        }
+
+    def shutdown(self) -> None:
+        self.rh.shutdown()
+
+    def step(self, message_log_batch, metadata):
+        # This is not used since Penguin will handle the rollouts entirely.
+        raise NotImplementedError
+
+    def global_post_process_and_metrics(self, batch):
+        # Similar to the step function, this is not used.
+        raise NotImplementedError
+
+
+########################################
+# Global config utils
+########################################
+
+
+def setup_penguin_config(config, tokenizer) -> None:
+    generation_config = config["policy"]["generation"]
+
+    # Enable the http server. Requires both async engine and the expose_http_server flag
+    generation_config["vllm_cfg"]["async_engine"] = True
+    generation_config["vllm_cfg"]["expose_http_server"] = True
+
+    # Stop strings or token ids are not supported
+    generation_config["stop_strings"] = None
+    generation_config["stop_token_ids"] = None
+
+
+########################################
+# Data utils
+########################################
+
+
+# We do some light preprocessing here to make our data format compatible with nemo rl format
+def penguin_example_to_nemo_rl_datum_spec(penguin_example: dict, idx: int) -> DatumSpec:
+    return DatumSpec(
+        message_log=[
+            {"role": "user", "content": "", "token_ids": torch.tensor([])}
+        ],  # Fake message
+        length=0,
+        extra_env_info=penguin_example,
+        loss_multiplier=1.0,  # Fix to 1.0 to backprop on all examples
+        idx=idx,
+        task_name="penguin",
+        stop_strings=None,
+        # Extra vars
+        token_ids=[],  # Just need this empty key to be compatible with the current NeMo RL GRPO impl
+    )
diff --git a/nemo_rl/environments/reward_model_environment.py b/nemo_rl/environments/reward_model_environment.py
new file mode 100644
index 0000000000..eee7af9a16
--- /dev/null
+++ b/nemo_rl/environments/reward_model_environment.py
@@ -0,0 +1,366 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+from typing import Any, Dict, List, NotRequired, Optional, Tuple, TypedDict
+
+import ray
+import torch
+
+from nemo_rl.algorithms.utils import get_tokenizer
+from nemo_rl.data.interfaces import LLMMessageLogType, TaskDataSpec
+from nemo_rl.data.llm_message_utils import (
+    batched_message_log_to_flat_message,
+    get_formatted_message_log,
+)
+from nemo_rl.distributed.batched_data_dict import BatchedDataDict
+from nemo_rl.distributed.virtual_cluster import PY_EXECUTABLES, RayVirtualCluster
+from nemo_rl.environments.interfaces import EnvironmentInterface, EnvironmentReturn
+from nemo_rl.models.generation.interfaces import GenerationDatumSpec
+from nemo_rl.models.generation.vllm import VllmConfig
+from nemo_rl.models.policy import DynamicBatchingConfig, SequencePackingConfig
+from nemo_rl.models.policy.lm_policy import Policy
+
+
+class RewardModelEnvironmentConfig(TypedDict):
+    """Configuration for RewardModelEnvironment.
+
+    Attributes:
+        enabled: Whether the reward model environment is enabled
+        model_name: Name of the reward model to use (e.g., "Skywork/Skywork-Reward-V2-Qwen3-0.6B")
+        tokenizer: Tokenizer configuration
+        precision: Model precision (e.g., "bfloat16", "float16", "float32")
+        batch_size: Batch size for processing conversations
+        checkpoint_path: Path to model checkpoint (optional)
+        max_model_len: Maximum sequence length for the model
+        logprob_batch_size: Batch size for log probability computation
+        resources: Resource allocation configuration
+        reward_model_cfg: Reward model specific configuration
+        dtensor_cfg: DTensor configuration for distributed training
+        dynamic_batching: Dynamic batching configuration
+        sequence_packing: Sequence packing configuration
+        max_grad_norm: Maximum gradient norm for training
+        generation: Generation configuration for VLLM
+    """
+
+    enabled: bool
+    model_name: str
+    precision: str
+    batch_size: int
+    checkpoint_path: str
+    logprob_batch_size: int
+    resources: Dict[str, Any]
+    dtensor_cfg: Optional[Dict[str, Any]]
+    dynamic_batching: DynamicBatchingConfig = {"enabled": False}
+    sequence_packing: NotRequired[SequencePackingConfig] = {"enabled": False}
+    max_grad_norm: Optional[float] = None
+    generation: Optional[VllmConfig] = None
+
+
+@ray.remote
+class RewardModelEnvironment(EnvironmentInterface):
+    """Environment that uses a reward model to score conversations.
+
+    This environment implements a reward model-based scoring system for reinforcement
+    learning tasks. It takes conversation logs as input and returns rewards based on
+    the quality of the assistant's responses as judged by a pre-trained reward model.
+
+    Attributes:
+        config: Configuration dictionary containing all environment settings
+        virtual_cluster: Ray virtual cluster for resource management
+        tokenizer: Tokenizer for text processing
+        reward_model_policy: Policy object containing the reward model
+    """
+
+    DEFAULT_PY_EXECUTABLE = PY_EXECUTABLES.BASE
+
+    def __init__(self, config: Dict[str, Any]):
+        """Initialize the reward model environment.
+
+        Args:
+            config: Configuration dictionary containing reward model settings.
+                   Must include model_name, tokenizer, resources, and other
+                   required parameters as defined in RewardModelEnvironmentConfig.
+        """
+        print("🚀 REWARD MODEL ENVIRONMENT INITIALIZATION STARTED")
+        print("=" * 60)
+        print(f"📋 Received config: {config}")
+
+        self.config = config
+
+        assert self.config["reward_model_cfg"]["enabled"], (
+            "Please set reward_model_cfg.enabled = True in the reward model environment config to enable reward model."
+        )
+        assert (
+            self.config["reward_model_cfg"]["reward_model_type"] == "bradley_terry"
+        ), (
+            "Reward model environment currently only support with Bradley-Terry reward model."
+        )
+        assert not self.config["dynamic_batching"]["enabled"], (
+            "Dynamic batching is currently not supported with reward model environment."
+        )
+        assert not self.config["sequence_packing"]["enabled"], (
+            "Sequence packing is currently not supported with reward model environment."
+        )
+        assert self.config["dtensor_cfg"]["enabled"], (
+            "Reward model environment currently only support with DTensor. You can show your interest in mcore path by upvoting on https://github.com/NVIDIA-NeMo/RL/issues/1154"
+        )
+        assert self.config["max_grad_norm"] == None, (
+            "Max grad norm must be None in reward model environment."
+        )
+        assert not self.config["dtensor_cfg"]["cpu_offload"], (
+            "CPU offload is currently not supported with reward model environment."
+        )
+        assert not self.config["dtensor_cfg"]["activation_checkpointing"], (
+            "Activation checkpointing is currently not supported with reward model environment."
+        )
+        # Add values for reward model cfg. reward_model_cfg must be enabled in reward model environment config.
+        self.config.setdefault("reward_model_cfg", {})
+        self.config["reward_model_cfg"]["enabled"] = True
+        self.config["reward_model_cfg"]["reward_model_type"] = "bradley_terry"
+        # Dynamic batching and sequence packing are disabled in reward model environment config.
+        self.config.setdefault("dynamic_batching", {})
+        self.config.setdefault("sequence_packing", {})
+        self.config["dynamic_batching"]["enabled"] = False
+        self.config["sequence_packing"]["enabled"] = False
+        self.config["max_grad_norm"] = None
+        # Reward model environment is always using DTensor
+        self.config["dtensor_cfg"]["enabled"] = True
+        self.config["dtensor_cfg"]["cpu_offload"] = False
+        self.config["dtensor_cfg"]["activation_checkpointing"] = False
+
+        self.task_data_spec = TaskDataSpec(
+            task_name="reward_model_env",
+        )
+
+        # Remove CUDA_VISIBLE_DEVICES to let ray fully control the GPU allocation
+        os.environ.pop("CUDA_VISIBLE_DEVICES", None)
+        self.virtual_cluster = RayVirtualCluster(
+            name="grpo_reward_model_cluster",
+            bundle_ct_per_node_list=[self.config["resources"]["gpus_per_node"]]
+            * self.config["resources"]["num_nodes"],
+            use_gpus=True,
+            num_gpus_per_node=self.config["resources"]["gpus_per_node"],
+            max_colocated_worker_groups=1,
+        )
+        print(
+            f"🔧 Virtual cluster created with {self.virtual_cluster.get_placement_groups()} "
+        )
+        # Initialize reward model worker with proper resource management
+        print("🔧 Setting up reward model worker...")
+        weights_path = self.config.get("checkpoint_path", None)
+        # Initialize tokenizer
+        self.tokenizer = get_tokenizer(self.config["tokenizer"])
+
+        print(
+            f"✅ Tokenizer initialized with pad_token_id: {self.tokenizer.pad_token_id}"
+        )
+        self.reward_model_policy = None
+        self.reward_model_policy = Policy(
+            cluster=self.virtual_cluster,
+            config=self.config,
+            tokenizer=self.tokenizer,
+            name_prefix="reward_model_policy",
+            init_optimizer=False,
+            init_reference_model=False,
+            weights_path=weights_path,
+        )
+
+        print("✅ REWARD MODEL ENVIRONMENT INITIALIZATION COMPLETE")
+
+    def preprocess_data(
+        self, message_logs: List[LLMMessageLogType]
+    ) -> BatchedDataDict[GenerationDatumSpec]:
+        """Preprocess the message logs for the reward model.
+
+        This method tokenizes and formats conversation logs into the format expected
+        by the reward model. It handles:
+        - Tokenization of user and assistant messages
+        - Formatting with proper special tokens
+        - Batching and padding for efficient processing
+        - Sequence length validation and truncation
+
+        Args:
+            message_logs: List of conversation message logs, where each log contains
+                         a list of messages with 'role' and 'content' fields.
+
+        Returns:
+            BatchedDataDict containing tokenized and formatted data ready for
+            reward model inference.
+        """
+        # Tokenize each message_log
+        tokenized_message_logs = []
+        for message_log in message_logs:
+            tokenized_log = get_formatted_message_log(
+                message_log,
+                tokenizer=self.tokenizer,
+                task_data_spec=self.task_data_spec,
+                add_bos_token=True,
+                add_eos_token=True,
+                add_generation_prompt=False,
+            )
+            tokenized_message_logs.append(tokenized_log)
+
+        # Convert message logs to flat representation and pad for batching
+        cat_and_padded, input_lengths = batched_message_log_to_flat_message(
+            tokenized_message_logs,
+            pad_value_dict={"token_ids": self.tokenizer.pad_token_id},
+        )
+
+        # Create data in the format expected by DTensorRewardModelWorker
+        reward_data = BatchedDataDict[GenerationDatumSpec](
+            {
+                "input_ids": cat_and_padded["token_ids"],
+                "input_lengths": input_lengths,
+            }
+        )
+        return reward_data
+
+    def step(
+        self,
+        message_logs: List[LLMMessageLogType],
+        env_infos: List[Dict[str, Any]],
+    ) -> EnvironmentReturn:
+        """Calculate rewards for the given message logs using the reward model.
+
+        This method processes conversation logs through the reward model to compute
+        quality scores for each conversation. The rewards are based on the reward
+        model's assessment of how well the assistant's responses align with human
+        preferences.
+
+        Args:
+            message_logs: List of conversation message logs to be scored.
+                         Each log should contain alternating user and assistant messages.
+            env_infos: List of environment info dictionaries (currently unused
+                      but required by the interface).
+
+        Returns:
+            EnvironmentReturn containing:
+            - observations: List of observation dictionaries with reward information
+            - metadata: List of metadata dictionaries (currently None)
+            - next_stop_strings: List of stop strings (currently None)
+            - rewards: Tensor of computed rewards for each conversation
+            - terminateds: Tensor indicating episode termination (all True)
+            - answers: List of assistant responses from the conversations
+
+        """
+        # Preprocess the message logs
+        reward_data = self.preprocess_data(message_logs)
+
+        # Score the message logs
+        rewards = self.reward_model_policy.score(reward_data)["scores"]
+
+        # Create observations with meaningful content based on rewards (like math environment)
+        observations = []
+        for i, reward in enumerate(rewards):
+            content = "Environment: " + str(reward)
+            observations.append({"role": "environment", "content": content})
+
+        # All episodes terminate after one step in reward model environment
+        terminateds = [True] * len(message_logs)
+
+        # No additional metadata
+        metadata = [None] * len(message_logs)
+
+        # No stop strings needed
+        next_stop_strings = [None] * len(message_logs)
+
+        answers = [message_log[-1]["content"] for message_log in message_logs]
+
+        return EnvironmentReturn(
+            observations=observations,
+            metadata=metadata,
+            next_stop_strings=next_stop_strings,
+            rewards=rewards.cpu(),
+            terminateds=torch.tensor(terminateds, dtype=torch.bool).cpu(),
+            answers=answers,
+        )
+
+    def global_post_process_and_metrics(
+        self, batch: BatchedDataDict
+    ) -> Tuple[BatchedDataDict, dict]:
+        """Post processing function after all rollouts are done for the batch and returns metrics.
+
+        This method computes aggregate statistics and metrics from the processed batch.
+        It provides insights into reward distribution and processing statistics.
+
+        Args:
+            batch: The batch data dictionary containing processed conversations and rewards.
+
+        Returns:
+            Tuple of (processed_batch, metrics_dict) where:
+            - processed_batch: The input batch (no modifications)
+            - metrics_dict: Dictionary containing computed metrics including:
+              - reward_model_env/num_samples: Number of samples processed
+              - reward_model_env/mean_reward: Average reward across the batch
+              - reward_model_env/std_reward: Standard deviation of rewards
+              - reward_model_env/min_reward: Minimum reward in the batch
+              - reward_model_env/max_reward: Maximum reward in the batch
+        """
+        # For reward model environment, no post-processing is needed
+        # Just return the batch as-is and empty metrics
+        metrics = {
+            "reward_model_env/num_samples": len(batch.get("message_log", [])),
+        }
+
+        # Add reward statistics if available
+        if "rewards" in batch:
+            rewards = batch["rewards"]
+            if isinstance(rewards, torch.Tensor):
+                metrics.update(
+                    {
+                        "reward_model_env/mean_reward": float(rewards.mean()),
+                        "reward_model_env/std_reward": float(rewards.std()),
+                        "reward_model_env/min_reward": float(rewards.min()),
+                        "reward_model_env/max_reward": float(rewards.max()),
+                    }
+                )
+
+        return batch, metrics
+
+    def shutdown(self):
+        """Shutdown the reward model worker and virtual cluster.
+
+        This method properly cleans up resources by shutting down the reward model
+        policy and virtual cluster. It should be called when the environment is
+        no longer needed to prevent resource leaks.
+
+        Note:
+            The environment will also automatically call this method in its destructor,
+            but it's recommended to call it explicitly for better resource management.
+        """
+        if (
+            hasattr(self, "reward_model_policy")
+            and self.reward_model_policy is not None
+        ):
+            try:
+                self.reward_model_policy.shutdown()
+            except Exception as e:
+                print(f"Warning: Error shutting down reward model policy: {e}")
+            self.reward_model_policy = None
+            try:
+                self.virtual_cluster.shutdown()
+            except Exception as e:
+                print(f"Warning: Error shutting down virtual cluster: {e}")
+            self.virtual_cluster = None
+
+    def __del__(self):
+        """Destructor that ensures proper cleanup when the object is garbage collected.
+
+        This is an extra safety net in case the user forgets to call shutdown() and
+        the pointer to the object is lost due to leaving a function scope. It's always
+        recommended that the user calls shutdown() explicitly for better resource
+        management.
+        """
+        self.shutdown()
diff --git a/nemo_rl/environments/rewards.py b/nemo_rl/environments/rewards.py
new file mode 100644
index 0000000000..3372796968
--- /dev/null
+++ b/nemo_rl/environments/rewards.py
@@ -0,0 +1,173 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import re
+from typing import Callable, Optional
+
+import numpy as np
+from math_verify.errors import TimeoutException
+from math_verify.metric import math_metric
+from math_verify.parser import ExprExtractionConfig, LatexExtractionConfig
+
+# initialize math_verify_func once
+math_verify_func = math_metric(
+    gold_extraction_target=(LatexExtractionConfig(),),
+    pred_extraction_target=(
+        ExprExtractionConfig(),
+        LatexExtractionConfig(),
+    ),
+)
+
+boxed = lambda x: "\\boxed{" + x + "}" if not x.startswith("\\boxed{") else x
+
+
+def math_expression_reward(
+    ground_truth: str, response: str, tag: str = "answer"
+) -> tuple[float, bool]:
+    """Reward the agent when the answer within the <{tag}> tags is the same expression as the ground truth.
+
+    The `tag` is customizable and must be specified as part of the user COT prompt text file.
+    """
+    match = re.search(rf"<{tag}>([\s\S]*)</{tag}>", response)
+    if match:
+        answer = match.group(1)
+        try:
+            score, _ = math_verify_func([boxed(ground_truth)], [boxed(answer)])
+            return float(score), score > 0.1
+        except (Exception, TimeoutException) as e:
+            return 0.0, False
+    return 0.0, False
+
+
+def format_reward(
+    ground_truth: str,
+    response: str,
+    think_tag: str = "think",
+    answer_tag: str = "answer",
+) -> tuple[float, Optional[bool]]:
+    """Reward the agent when the response follows the format: (.*) <think> (.*) </think> <answer> (.*) </answer>.
+
+    The `think_tag` and `answer_tag` are customizable and must be specified as part of the user COT prompt text file.
+    """
+    rew = 0.0
+    if re.search(rf"<{think_tag}>[\s\S]*</{think_tag}>", response):
+        rew += 0.25  # 0.25 points for having think tags
+    if re.search(rf"<{answer_tag}>[\s\S]*</{answer_tag}>", response):
+        rew += 0.75  # 0.75 points for having answer tags
+    return rew, None
+
+
+def exact_answer_alphanumeric_reward(
+    ground_truth: str, response: str, answer_tag: str = "answer"
+) -> tuple[float, bool]:
+    """Reward the agent when the answer within the <{answer_tag}> tags is the same as the ground truth (case-insensitive).
+
+    The `answer_tag` is customizable and must be specified as part of the user COT prompt text file.
+    """
+    match = re.search(rf"<{answer_tag}>([\s\S]*)</{answer_tag}>", response)
+    if match:
+        answer = match.group(1)
+        # Remove all non-alphanumeric characters (including whitespace, punctuation, etc.)
+        answer_clean = "".join(c for c in answer if c.isalnum()).lower()
+        ground_truth_clean = "".join(c for c in ground_truth if c.isalnum()).lower()
+        if answer_clean == ground_truth_clean:
+            return 1.0, True
+    return 0.0, False
+
+
+def bbox_giou_reward(
+    ground_truth: str,
+    response: str,
+    giou_penalty_thres: float = 10.0,
+    answer_tag: str = "answer",
+) -> tuple[float, bool]:
+    """Given [x1, y1, x2, y2] normalized bounding box coordinates within the <{answer_tag}> tags, compute the GIoU between the ground truth and the response.
+
+    The `answer_tag` is customizable and must be specified as part of the user COT prompt text file.
+    """
+    match = re.search(rf"<{answer_tag}>([\s\S]*)</{answer_tag}>", response)
+    if match:
+        answer = match.group(1)
+    else:
+        return 0.0, False
+
+    try:
+        x1g, y1g, x2g, y2g = [
+            float(x) for x in ground_truth.replace("[", "").replace("]", "").split(",")
+        ]
+        x1r, y1r, x2r, y2r = [
+            float(x) for x in answer.replace("[", "").replace("]", "").split(",")
+        ]
+    except ValueError:
+        return 0.0, False
+
+    # compute iou function
+    # compute the area of the ground truth and response bounding boxes
+    area_g = (x2g - x1g) * (y2g - y1g)
+    area_r = (x2r - x1r) * (y2r - y1r)
+    # compute the intersection of the ground truth and response bounding boxes
+    x1i = max(x1g, x1r)
+    y1i = max(y1g, y1r)
+    x2i = min(x2g, x2r)
+    y2i = min(y2g, y2r)
+    # compute the area of the intersection
+    area_i = max(0.0, x2i - x1i) * max(0.0, y2i - y1i)
+    # compute the area of the union
+    area_u = max(1e-3, area_g + area_r - area_i)
+    # compute the iou
+    iou = area_i / area_u
+    # if iou is too low, introduce a giou term to compensate
+    if iou < giou_penalty_thres:
+        # compute convex hull as min
+        x1c = min(x1g, x1r)
+        y1c = min(y1g, y1r)
+        x2c = max(x2g, x2r)
+        y2c = max(y2g, y2r)
+        # compute the area of the convex hull
+        area_c = max(1e-3, (x2c - x1c) * (y2c - y1c))
+        # compute the giou
+        giou = iou - (area_c - area_u) / area_c
+    else:
+        giou = iou
+    return giou, giou > 0.5
+
+
+def combine_reward_functions(
+    reward_functions: list[tuple[Callable[[str, str], tuple[float, bool]], float]],
+) -> Callable[[str, str], tuple[float, bool]]:
+    """Returns a callable function that takes (ground_truth, response) and collects multiple reward functions in sequence.
+
+    The reward functions are weighted by the second element of the tuple.
+    This information can be provided in the YAML config file and resolved in the VLMEnvironment class.
+
+    Args:
+        reward_functions: list[tuple[Callable[[str, str], tuple[float, bool]], float]]. A list of reward functions and their weights.
+
+    Returns:
+        Callable[[str, str], tuple[float, bool]]: A callable function that takes (ground_truth, response) and collects multiple reward functions in sequence
+    """
+    weights = [weight for _, weight in reward_functions]
+    weights = np.array(weights) / np.sum(weights)  # renormalize weights to 1
+
+    def combined_reward_func(ground_truth: str, response: str) -> tuple[float, bool]:
+        reward_env_score = [
+            reward_func(ground_truth, response) for reward_func, _ in reward_functions
+        ]
+        rewards = [x[0] for x in reward_env_score]
+        is_correct = [
+            x[1] for x in reward_env_score if x[1] is not None
+        ]  # skip None values, because they do not contribute to the "correctness" of the response (e.g. format_reward, because the answer can still be correct without <think> tags)
+        is_correct = all(is_correct)
+        return np.sum(np.array(rewards) * weights), is_correct
+
+    return combined_reward_func
diff --git a/nemo_rl/environments/tools/retriever.py b/nemo_rl/environments/tools/retriever.py
new file mode 100644
index 0000000000..f1bb56d77d
--- /dev/null
+++ b/nemo_rl/environments/tools/retriever.py
@@ -0,0 +1,213 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import math
+import re
+from collections import Counter
+from typing import Any, Dict, List, TypedDict
+
+import ray
+import torch
+from datasets import load_dataset
+from tqdm import tqdm
+from transformers import AutoTokenizer
+
+from nemo_rl.data.interfaces import LLMMessageLogType
+from nemo_rl.distributed.batched_data_dict import BatchedDataDict
+from nemo_rl.environments.interfaces import EnvironmentInterface, EnvironmentReturn
+
+
+class RAGEnvConfig(TypedDict):
+    dataset_name: str  # Name of the dataset to load
+    dataset_split: str  # Split of the dataset to use
+    text_column: str  # Column name containing the text to retrieve
+    num_results: int  # Number of documents to retrieve
+    k1: float  # BM25 parameter
+    b: float  # BM25 parameter
+    device: str  # Device to compute BM25
+
+
+class BM25Retriever:
+    """Sparse BM25 retriever.
+
+    Args:
+        documents: list of documents to retrieve from
+        num_result: retrieve top-k documents
+        k1: parameter of BM25. Values in [1.2, 2.0] are recommended.
+        b: parameter of BM25. 0.75 is recommended.
+        device: device to compute BM25
+    """
+
+    def __init__(
+        self,
+        documents: List[str] = None,
+        num_result: int = 10,
+        k1: float = 1.5,
+        b: float = 0.75,
+        device: str = "cpu",
+    ):
+        if documents is None:
+            dataset = load_dataset("wikimedia/wikipedia", "20231101.en")
+            self.documents = [sample["text"] for sample in dataset["train"]]
+        else:
+            self.documents = documents
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            "bert-base-uncased", use_fast=True
+        )
+        self.num_result = num_result
+        self.k1 = k1
+        self.b = b
+        self.device = device
+        self.corpus_size = len(self.documents)
+        self.vocab_size = self.tokenizer.vocab_size
+
+        self.build_index()
+
+    def build_index(self):
+        doc_ids = []
+        token_ids = []
+        tfs = []
+        lengths = []
+
+        for i, document in enumerate(
+            tqdm(self.documents, "Build index for BM25Retriever")
+        ):
+            input_ids = self.tokenizer.encode(document, add_special_tokens=False)
+            token2cnt = Counter(input_ids)
+            token_ids += token2cnt.keys()
+            tfs += token2cnt.values()
+            doc_ids += [i] * len(token2cnt)
+            lengths.append(len(input_ids))
+
+        avg_dl = sum(lengths) / self.corpus_size
+        for i, doc_id in enumerate(doc_ids):
+            tfs[i] = (
+                tfs[i]
+                * (self.k1 + 1)
+                / (tfs[i] + self.k1 * (1 - self.b + self.b * lengths[doc_id] / avg_dl))
+            )
+
+        indices = torch.tensor([doc_ids, token_ids], device=self.device)
+        values = torch.tensor(tfs, device=self.device)
+        self.doc_tfs = torch.sparse_coo_tensor(
+            indices, values, (self.corpus_size, self.vocab_size)
+        )
+
+        idfs = [0] * self.vocab_size
+        token2df = Counter(token_ids)
+        for token_id, df in token2df.items():
+            idfs[token_id] = math.log((self.corpus_size - df + 0.5) / (df + 0.5) + 1)
+        self.idfs = idfs
+
+    def __call__(self, query: str) -> List[str]:
+        input_ids = self.tokenizer.encode(query, add_special_tokens=False)
+        token2cnt = Counter(input_ids)
+        token_ids = []
+        query_idfs = []
+        for token_id, query_tf in token2cnt.items():
+            token_ids.append(token_id)
+            query_idfs.append(query_tf * self.idfs[token_id])
+
+        indices = torch.tensor([token_ids, [0] * len(token_ids)], device=self.device)
+        values = torch.tensor(query_idfs, device=self.device)
+        query_idfs = torch.sparse_coo_tensor(indices, values, (self.vocab_size, 1))
+
+        scores = torch.sparse.mm(self.doc_tfs, query_idfs)
+        scores = scores.to_dense().squeeze(-1)
+        results = []
+        for i in scores.topk(k=self.num_result).indices.tolist():
+            results.append(self.documents[i])
+
+        return results
+
+
+@ray.remote  # pragma: no cover
+class RAGEnvironment(EnvironmentInterface):
+    """RAG environment that uses BM25 for document retrieval."""
+
+    def __init__(self, cfg: RAGEnvConfig):
+        self.cfg = cfg
+
+        # Load dataset
+        dataset = load_dataset(cfg["dataset_name"], split=cfg["dataset_split"])
+        documents = [sample[cfg["text_column"]] for sample in dataset]
+
+        # Initialize BM25 retriever
+        self.retriever = BM25Retriever(
+            documents=documents,
+            num_result=cfg["num_results"],
+            k1=cfg["k1"],
+            b=cfg["b"],
+            device=cfg["device"],
+        )
+
+    def format_result(self, retrieved_docs: List[str]) -> str:
+        result = "<result>\n"
+        for i, doc in enumerate(retrieved_docs):
+            result += f"<{i + 1}>\n{doc}\n</{i + 1}>\n"
+        result += "</result>\n"
+        return result
+
+    def step(
+        self,
+        message_log_batch: List[LLMMessageLogType],
+        metadata_batch: List[Dict[str, Any]],
+        return_extracted_answer: bool = False,
+    ) -> EnvironmentReturn:
+        """Process a batch of retrieval steps."""
+        # Extract queries from the last message in each log
+        messages = [ml[-1]["content"] for ml in message_log_batch]
+
+        # Retrieve documents for each query
+        results = []
+        for message in messages:
+            match = re.search(r"<retrieve>(.*)</retrieve>", message, re.DOTALL)
+            if not match:
+                results.append(
+                    {"role": "environment", "content": "No retrieval query found!"}
+                )
+                continue
+            query = match.group(1)
+            retrieved_docs = self.retriever(query)
+            result = self.format_result(retrieved_docs)
+            results.append({"role": "environment", "content": result})
+
+        batch_size = len(message_log_batch)
+        rewards_tensor = torch.zeros(batch_size, dtype=torch.float32)
+        terminated_tensor = torch.ones(batch_size, dtype=torch.bool)
+        next_stop_strings = [["</retrieve>"]] * batch_size
+
+        assert return_extracted_answer == False, (
+            "return_extracted_answer is not supported in RAGEnvironment. Please set it to False."
+        )
+        extracted_answers = None
+
+        return EnvironmentReturn(
+            observations=results,
+            metadata=metadata_batch,
+            next_stop_strings=next_stop_strings,
+            rewards=rewards_tensor,
+            terminateds=terminated_tensor,
+            answers=extracted_answers,
+        )
+
+    def shutdown(self):
+        """Clean up resources."""
+        pass
+
+    def global_post_process_and_metrics(
+        self, batch: BatchedDataDict
+    ) -> tuple[BatchedDataDict, dict]:
+        """Compute metrics for the batch."""
+        # No specific metrics for RAG
+        return batch, {}
diff --git a/nemo_rl/environments/vlm_environment.py b/nemo_rl/environments/vlm_environment.py
new file mode 100644
index 0000000000..7e4943c3b2
--- /dev/null
+++ b/nemo_rl/environments/vlm_environment.py
@@ -0,0 +1,252 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import contextlib
+import io
+import logging
+from functools import partial
+from typing import Any, Callable, List, Optional, TypedDict
+
+import ray
+import torch
+
+from nemo_rl.distributed.batched_data_dict import BatchedDataDict
+from nemo_rl.distributed.virtual_cluster import PY_EXECUTABLES
+from nemo_rl.environments.interfaces import (
+    EnvironmentInterface,
+    EnvironmentReturn,
+)
+from nemo_rl.environments.metrics import (
+    calculate_pass_rate_per_prompt,
+)
+from nemo_rl.environments.rewards import (
+    bbox_giou_reward,
+    combine_reward_functions,
+    exact_answer_alphanumeric_reward,
+    format_reward,
+    math_expression_reward,
+)
+from nemo_rl.environments.utils import chunk_list_to_workers
+
+
+class VLMEnvConfig(TypedDict):
+    num_workers: int
+    stop_strings: Optional[list[str]]  # Default stop strings for this env
+    reward_functions: List[dict[str, Any]]  # list of reward functions and their weights
+
+
+@contextlib.contextmanager
+def _mute_output():
+    devnull_out, devnull_err = io.StringIO(), io.StringIO()
+    with (
+        contextlib.redirect_stdout(devnull_out),
+        contextlib.redirect_stderr(devnull_err),
+    ):
+        yield
+
+
+@ray.remote
+class VLMVerifyWorker:
+    def __init__(self, cfg: VLMEnvConfig) -> None:
+        logging.getLogger("vlm_worker").setLevel(logging.CRITICAL)
+        # this is a simple reward function that rewards the agent for correct answer and correct format
+        reward_functions = []
+        # loop over all configs
+        for reward_func_cfg in cfg["reward_functions"]:
+            # get name and weight
+            reward_func_name: str = reward_func_cfg["name"]
+            reward_func_weight: float = reward_func_cfg["weight"]
+            reward_func_kwargs: Optional[dict] = reward_func_cfg.get("kwargs", None)
+            reward_func: Callable[[str, str], tuple[float, Optional[bool]]]
+            if reward_func_name == "format":
+                reward_func = format_reward
+            elif reward_func_name == "exact_alnum":
+                reward_func = exact_answer_alphanumeric_reward
+            elif reward_func_name == "math_expr":
+                reward_func = math_expression_reward
+            elif reward_func_name == "bbox_giou":
+                reward_func = bbox_giou_reward
+            else:
+                raise ValueError(f"Invalid reward function: {reward_func_name}")
+
+            # check for additional kwargs
+            if reward_func_kwargs is not None:
+                reward_func = partial(reward_func, **reward_func_kwargs)
+
+            reward_functions.append((reward_func, reward_func_weight))
+
+        if len(reward_functions) == 0:
+            raise ValueError("No reward functions provided")
+
+        # combine the reward functions
+        self.verify_func = combine_reward_functions(reward_functions)
+
+    def verify(
+        self, pred_responses: list[str], ground_truths: list[str]
+    ) -> list[float]:
+        """Verify the correctness of the predicted responses against the ground truth.
+
+        Args:
+            pred_responses: list[str]. The predicted responses from the LLM.
+            ground_truths: list[str]. The ground truth responses.
+
+        Returns:
+            list[float]. The rewards for each predicted response.
+        """
+        results = []
+        for response, ground_truth in zip(pred_responses, ground_truths):
+            try:
+                with _mute_output():
+                    try:
+                        ret_score, _ = self.verify_func(ground_truth, response)
+                    except Exception as e:
+                        ret_score = 0.0
+                        print(f"Error in verify_func: {e}")
+                results.append(float(ret_score))
+            except Exception as e:
+                print(f"Error in verify: {e}")
+                results.append(0.0)
+        return results
+
+
+class VLMEnvironmentMetadata(TypedDict):
+    ground_truth: str
+
+
+@ray.remote(max_restarts=-1, max_task_retries=-1)
+class VLMEnvironment(EnvironmentInterface):
+    def __init__(self, cfg: VLMEnvConfig):
+        self.cfg = cfg
+        self.num_workers = cfg["num_workers"]
+        self.workers = [
+            VLMVerifyWorker.options(  # type: ignore # (decorated with @ray.remote)
+                runtime_env={"py_executable": PY_EXECUTABLES.SYSTEM}
+            ).remote(cfg)
+            for _ in range(self.num_workers)
+        ]
+
+    def shutdown(self) -> None:
+        # shutdown all workers
+        for worker in self.workers:
+            ray.kill(worker)
+
+    def step(  # type: ignore[override]
+        self,
+        message_log_batch: list[list[dict[str, str]]],
+        metadata: list[VLMEnvironmentMetadata],
+    ) -> EnvironmentReturn:
+        """Runs a step in the vlm environment.
+
+        Args:
+            message_log: list[list[dict[str, str]]]. A batch of OpenAI-API-like message logs that represent interactions with the VLM.
+            metadata: list[VLMEnvironmentMetadata]. The grader will use the 'ground_truth' key to evaluate correctness.
+
+        Returns:
+            EnvironmentReturn: A tuple containing:
+                - list[dict[str, str]]: Observations/responses batch
+                - list[dict]: Updated metadata
+                - list[str]: Next stop strings for the next turn
+                - Tensor: Rewards tensor
+                - Tensor: Done flags tensor
+        """
+        # Extract the assistant's responses from the message history
+        # Each message list should have at least one assistant response
+        assistant_response_batch = []
+        for conversation in message_log_batch:
+            assistant_responses = [
+                interaction["content"]
+                for interaction in conversation
+                if interaction["role"] == "assistant"
+            ]
+            assistant_response_batch.append("".join(assistant_responses))
+
+        ground_truths = [g["ground_truth"] for g in metadata]
+
+        chunked_assistant_response_batch = chunk_list_to_workers(
+            assistant_response_batch, self.num_workers
+        )
+        chunked_ground_truths = chunk_list_to_workers(ground_truths, self.num_workers)
+
+        # # Process each chunk in parallel
+        futures = [
+            self.workers[i].verify.remote(chunk, ground_truth_chunk)
+            for i, (chunk, ground_truth_chunk) in enumerate(
+                zip(chunked_assistant_response_batch, chunked_ground_truths)
+            )
+        ]
+
+        results = ray.get(futures)
+
+        # flatten the results
+        results = [item for sublist in results for item in sublist]
+        observations = [
+            {
+                "role": "environment",
+                "content": "Environment: correct"
+                if result
+                else "Environment: incorrect",
+            }
+            for result in results
+        ]
+
+        # create a tensor of rewards and done flags
+        rewards = torch.tensor(results).cpu()
+        done = torch.ones_like(rewards).cpu()
+
+        next_stop_strings = [None] * len(message_log_batch)
+
+        return EnvironmentReturn(
+            observations=observations,
+            metadata=metadata,
+            next_stop_strings=next_stop_strings,
+            rewards=rewards,
+            terminateds=done,
+            answers=None,
+        )
+
+    def global_post_process_and_metrics(
+        self, batch: BatchedDataDict[Any]
+    ) -> tuple[BatchedDataDict[Any], dict[str, float | int]]:
+        """Computes metrics for this environment given a global rollout batch.
+
+        Every rank will run this function, so you're free to use distributed
+        calculations if you'd prefer for heavy metrics.
+        """
+        batch["rewards"] = (
+            batch["rewards"] * batch["is_end"]
+        )  # set a reward of 0 for any incorrectly ended sequences
+        if (batch["rewards"] == 1).float().sum() > 0:
+            correct_solution_generation_lengths = (
+                (batch["generation_lengths"] - batch["prompt_lengths"])[
+                    batch["rewards"] == 1
+                ]
+                .float()
+                .mean()
+                .item()
+            )
+        else:
+            correct_solution_generation_lengths = 0
+
+        metrics = {
+            "accuracy": batch["rewards"].mean().item(),
+            "pass@samples_per_prompt": calculate_pass_rate_per_prompt(
+                batch["text"], batch["rewards"]
+            ),
+            "fraction_of_samples_properly_ended": batch["is_end"].float().mean().item(),
+            "num_problems_in_batch": batch["is_end"].shape[0],
+            "generation_lengths": batch["generation_lengths"].float().mean().item(),
+            "prompt_lengths": batch["prompt_lengths"].float().mean().item(),
+            "correct_solution_generation_lengths": correct_solution_generation_lengths,
+        }
+
+        return batch, metrics
diff --git a/nemo_rl/evals/eval.py b/nemo_rl/evals/eval.py
index b1fb7a88b0..d67255ef1e 100644
--- a/nemo_rl/evals/eval.py
+++ b/nemo_rl/evals/eval.py
@@ -15,6 +15,8 @@
 import asyncio
 import json
 import os
+from collections import Counter
+from itertools import combinations
 from typing import TypedDict
 
 import ray
@@ -23,8 +25,9 @@
 from transformers import AutoTokenizer
 
 from nemo_rl.algorithms.utils import set_seed
-from nemo_rl.data import MathDataConfig
-from nemo_rl.data.datasets import AllTaskProcessedDataset, eval_collate_fn
+from nemo_rl.data import EvalDataConfigType
+from nemo_rl.data.collate_fn import eval_collate_fn
+from nemo_rl.data.datasets import AllTaskProcessedDataset
 from nemo_rl.data.llm_message_utils import get_keys_from_message_log
 from nemo_rl.distributed.batched_data_dict import BatchedDataDict
 from nemo_rl.distributed.virtual_cluster import ClusterConfig, RayVirtualCluster
@@ -42,16 +45,21 @@ class EvalConfig(TypedDict):
     metric: str
     num_tests_per_prompt: int
     seed: int
-    pass_k_value: int
+    k_value: int
     save_path: str | None
 
 
+# TODO: this should updated, but is left to avoid breaking changes
+class _PassThroughMathConfig(TypedDict):
+    math: MathEnvConfig
+
+
 class MasterConfig(TypedDict):
     eval: EvalConfig
     generation: GenerationConfig  # Fixed: was 'generate'
     tokenizer: TokenizerConfig  # Added missing tokenizer key
-    data: MathDataConfig
-    env: MathEnvConfig
+    data: EvalDataConfigType
+    env: _PassThroughMathConfig
     cluster: ClusterConfig
 
 
@@ -90,24 +98,21 @@ def setup(
 
     # Check settings
     metric = eval_config["metric"]
-    pass_k_value = eval_config["pass_k_value"]
+    k_value = eval_config["k_value"]
     num_tests_per_prompt = eval_config["num_tests_per_prompt"]
     temperature = generation_config["temperature"]
     top_k = generation_config["top_k"]
 
-    # TODO @yukih: support cons@k
     # Validate metrics
-    assert metric in ["pass@k"], f"Invalid metric: {metric}"
+    assert metric in ["pass@k", "cons@k"], f"Invalid metric: {metric}"
     if num_tests_per_prompt > 1:
         assert temperature > 0 and top_k != 1, (
             "temperature > 0 and top_k != 1 are required for multiple samples"
         )
 
-    assert pass_k_value >= 1, (
-        "pass_k_value must be greater than or equal to 1 for pass@k metric"
-    )
-    assert num_tests_per_prompt >= pass_k_value, (
-        "num_tests_per_prompt must be greater than or equal to pass_k_value for pass@k metric"
+    assert k_value >= 1, "k_value must be greater than or equal to 1"
+    assert num_tests_per_prompt >= k_value, (
+        "num_tests_per_prompt must be greater than or equal to k_value "
     )
 
     # ==========================
@@ -195,6 +200,80 @@ def eval_single_chunk(n: int, c: int, k: int) -> float:
     return pass_k_score
 
 
+def eval_cons_k(
+    rewards: torch.Tensor,
+    num_tests_per_prompt: int,
+    k: int,
+    extracted_answers: list[str | None],
+) -> float:
+    """Evaluate cons@k score using an unbiased estimator.
+
+    Args:
+        rewards: Tensor of shape (batch_size * num_tests_per_prompt)
+        num_tests_per_prompt: int
+        k: int
+        extracted_answers: list[str| None]
+
+    Returns:
+        cons_k_score: float
+    """
+
+    def majority_vote(answers: list[str | None]) -> str | None:
+        """Find the most common answer in the list of answers."""
+        if not answers:
+            return None
+        # To fix@rayentian: How to deal with the case that there are multiple most common answers? Now we just return the first one.
+        return Counter(answers).most_common(1)[0][0]
+
+    def eval_single_cons_k(
+        chunk_rewards: torch.Tensor, chunk_answers: list[str | None], n: int, k: int
+    ) -> float:
+        if chunk_answers is None or n == 0 or k > n:
+            return 0.0
+
+        total_subsets = 0
+        correct_subsets = 0
+        # For each subset of k answers, we vote for the most common answer.
+        # If the most common answer is the same as the gold answer, we consider the subset as correct.
+        for subset_indices in combinations(range(n), k):
+            subset_answers = [chunk_answers[i] for i in subset_indices]
+            majority_answer = majority_vote(subset_answers)
+            reward_idx = chunk_answers.index(majority_answer)
+            reward = chunk_rewards[reward_idx].item()
+            total_subsets += 1
+            if reward == 1.0:
+                correct_subsets += 1
+
+        return correct_subsets / total_subsets
+
+    assert len(extracted_answers) == len(rewards), (
+        "The number of extracted answers must be the same as the number of rewards"
+    )
+    # Split the rewards and extracted answers into groups of num_tests_per_prompt.
+    group_rewards = rewards.split(num_tests_per_prompt)
+    group_extracted_answers = [
+        extracted_answers[i : i + num_tests_per_prompt]
+        for i in range(0, len(extracted_answers), num_tests_per_prompt)
+    ]
+    assert len(group_rewards) == len(group_extracted_answers), (
+        "The number of rewards and extracted answers must be the same"
+    )
+    num_groups = len(group_rewards)
+    cons_k_score = 0.0
+    # For each group of num_tests_per_prompt rewards and extracted answers, we evaluate the cons@k score.
+    for i in range(num_groups):
+        chunk_rewards = group_rewards[i]
+        chunk_answers = group_extracted_answers[i]
+        assert len(chunk_rewards) == len(chunk_answers), (
+            "The number of rewards and extracted answers must be the same"
+        )
+        cons_k_score += eval_single_cons_k(
+            chunk_rewards, chunk_answers, len(chunk_answers), k
+        )
+
+    return cons_k_score
+
+
 def run_env_eval(vllm_generation, dataloader, env, master_config):
     """Main entry point for running evaluation using environment.
 
@@ -230,7 +309,7 @@ async def _run_env_eval_impl(
     eval_config = master_config["eval"]
     metric = eval_config["metric"]
     num_tests_per_prompt = eval_config["num_tests_per_prompt"]
-    pass_k_value = eval_config["pass_k_value"]
+    k_value = eval_config["k_value"]
 
     # List to collect evaluation data for parquet file
     evaluation_data = []
@@ -267,7 +346,8 @@ async def _run_env_eval_impl(
             get_keys_from_message_log(batch["message_log"][i], ["role", "content"])
             for i in range(len(batch["message_log"]))
         ]
-        env_return = ray.get(env.step.remote(to_env, batch["extra_env_info"]))
+
+        env_return = ray.get(env.step.remote(to_env, batch["extra_env_info"], True))
         rewards = env_return.rewards
 
         # Collect data for JSON file
@@ -293,7 +373,12 @@ async def _run_env_eval_impl(
 
         # update stats
         if metric == "pass@k":
-            score += eval_pass_k(rewards, num_tests_per_prompt, pass_k_value)
+            score += eval_pass_k(rewards, num_tests_per_prompt, k_value)
+        elif metric == "cons@k":
+            extracted_answers = env_return.answers
+            score += eval_cons_k(
+                rewards, num_tests_per_prompt, k_value, extracted_answers
+            )
         else:
             raise ValueError(f"Invalid metric: {metric}")
 
@@ -313,7 +398,7 @@ async def _run_env_eval_impl(
         score,
         len(dataloader.dataset),
         metric,
-        pass_k_value,
+        k_value,
         num_tests_per_prompt,
     )
 
@@ -348,7 +433,7 @@ def _save_evaluation_data_to_json(evaluation_data, master_config, save_path):
         "model_name": master_config["generation"]["model_name"],
         "dataset_name": master_config["data"]["dataset_name"],
         "metric": master_config["eval"]["metric"],
-        "pass_k_value": master_config["eval"]["pass_k_value"],
+        "k_value": master_config["eval"]["k_value"],
         "num_tests_per_prompt": master_config["eval"]["num_tests_per_prompt"],
         "temperature": master_config["generation"]["temperature"],
         "top_p": master_config["generation"]["top_p"],
@@ -399,13 +484,14 @@ def _print_results(
     score,
     dataset_size,
     metric,
-    pass_k_value,
+    k_value,
     num_tests_per_prompt,
 ):
     """Print evaluation results."""
     dataset_name = os.path.basename(master_config["data"]["dataset_name"])
     model_name = os.path.basename(generation_config["model_name"])
     max_new_tokens = generation_config["vllm_cfg"]["max_model_len"]
+    seed = master_config["eval"]["seed"]
     temperature = generation_config["temperature"]
     top_p = generation_config["top_p"]
     top_k = generation_config["top_k"]
@@ -413,7 +499,7 @@ def _print_results(
 
     print("\n" + "=" * 60)
     print(f"{model_name=} {dataset_name=}")
-    print(f"{max_new_tokens=} {temperature=} {top_p=} {top_k=}\n")
-    print(f"{metric=} {pass_k_value=} {num_tests_per_prompt=}\n")
+    print(f"{max_new_tokens=} {temperature=} {top_p=} {top_k=} {seed=}\n")
+    print(f"metric={metric[:-1]}{k_value} {num_tests_per_prompt=}\n")
     print(f"score={average_score:.4f} ({score}/{dataset_size})")
     print("=" * 60 + "\n")
diff --git a/nemo_rl/experience/rollouts.py b/nemo_rl/experience/rollouts.py
index 62e7ee1122..b8b378542c 100644
--- a/nemo_rl/experience/rollouts.py
+++ b/nemo_rl/experience/rollouts.py
@@ -17,11 +17,16 @@
 
 import asyncio
 import copy
-from typing import Any
+import json
+import statistics
+from collections import defaultdict
+from dataclasses import dataclass
+from typing import Any, Optional
 
 import ray
 import torch
 from transformers import PreTrainedTokenizerBase
+from wandb import Histogram, Table
 
 from nemo_rl.data.interfaces import (
     DatumSpec,
@@ -38,10 +43,12 @@
     EnvironmentReturn,
 )
 from nemo_rl.models.generation.interfaces import (
+    GenerationConfig,
     GenerationDatumSpec,
     GenerationInterface,
     GenerationOutputSpec,
 )
+from nemo_rl.utils.timer import Timer
 
 TokenizerType = PreTrainedTokenizerBase
 
@@ -158,10 +165,9 @@ async def generate_responses_async(
         "Generation returned no outputs for a non-empty batch."
     )
 
-    pad_token_id = policy_generation.cfg.get("pad_token_id", tokenizer.pad_token_id)
     generation_outputs = BatchedDataDict.from_batches(
         ordered_batched_data_dicts,
-        pad_value_dict={"output_ids": pad_token_id, "logprobs": 0.0},
+        pad_value_dict={"output_ids": tokenizer.pad_token_id, "logprobs": 0.0},
     )
 
     # Extract everything we need from the generation outputs
@@ -202,6 +208,16 @@ async def generate_responses_async(
         "mean_generation_length": generation_lengths.float().mean().item(),
         "total_generated_tokens": generation_lengths.sum().item(),
     }
+    # Attach worker metadata if present (async vLLM path)
+    if "gen_leader_worker_idx" in generation_outputs:
+        # generation_outputs carries this as a 1-length list per row; convert to int
+        v = generation_outputs["gen_leader_worker_idx"][0]
+        try:
+            gen_metrics["gen_leader_worker_idx"] = (
+                int(v[0]) if isinstance(v, list) else int(v)
+            )
+        except Exception as e:
+            print(f"Error occurred while extracting gen_leader_worker_idx: {e}")
 
     return batch, generated_ids, gen_metrics
 
@@ -264,15 +280,23 @@ def calculate_rewards(
     all_next_stop_strings = []
     all_metadata = []  # Store extracted metadata
     all_indices_order = []
+    all_answers = []
 
     for future, result in zip(futures, results):
         indices = future_to_indices[future]
         # Environment step returns: EnvironmentReturn
-        env_observations, metadata, next_stop_strings, task_rewards, terminateds = (
-            result
-        )
+        (
+            env_observations,
+            metadata,
+            next_stop_strings,
+            task_rewards,
+            terminateds,
+            answers,
+        ) = result
         if next_stop_strings is None:
             next_stop_strings = [None] * len(task_rewards)
+        if answers is None:
+            answers = [None] * len(task_rewards)
 
         # Store results with their original indices
         for i, idx in enumerate(indices):
@@ -282,6 +306,7 @@ def calculate_rewards(
             all_terminateds.append(terminateds[i])
             all_next_stop_strings.append(next_stop_strings[i])
             all_metadata.append(metadata[i])
+            all_answers.append(answers[i])
 
     # Sort results by original index to maintain order
     sorted_indices = sorted(
@@ -292,6 +317,7 @@ def calculate_rewards(
     terminateds = torch.tensor([all_terminateds[i] for i in sorted_indices])
     next_stop_strings = [all_next_stop_strings[i] for i in sorted_indices]
     metadata = [all_metadata[i] for i in sorted_indices]  # Sort metadata
+    answers = [all_answers[i] for i in sorted_indices]
 
     return EnvironmentReturn(
         observations=env_observations,
@@ -299,6 +325,7 @@ def calculate_rewards(
         next_stop_strings=next_stop_strings,
         rewards=rewards,
         terminateds=terminateds,
+        answers=answers,
     )
 
 
@@ -369,6 +396,7 @@ def run_multi_turn_rollout(
         # Extract input_ids and lengths from the flat messages
         active_input_ids = active_flat_messages["token_ids"]
 
+        # Prepare generation input data
         generation_input_data = BatchedDataDict[GenerationDatumSpec](
             {
                 "input_ids": active_input_ids,
@@ -376,6 +404,17 @@ def run_multi_turn_rollout(
                 "stop_strings": active_stop_strings,
             }
         )
+        # add the multimodal data to the generation input data
+        multimodal_data = active_flat_messages.get_multimodal_dict(as_tensors=False)
+        generation_input_data.update(multimodal_data)
+
+        # keep message log for generation
+        if "vllm_content" in active_batch:
+            generation_input_data["vllm_content"] = active_batch["vllm_content"]
+        if "vllm_images" in active_batch:
+            generation_input_data["vllm_images"] = active_batch["vllm_images"]
+        if "vllm_videos" in active_batch:
+            generation_input_data["vllm_videos"] = active_batch["vllm_videos"]
 
         # generate_responses updates active_batch["message_log"] in-place
         active_batch, generated_ids, gen_metrics = generate_responses(
@@ -410,6 +449,8 @@ def run_multi_turn_rollout(
             tokenized_obs = tokenizer(
                 env_obs_content, return_tensors="pt", add_special_tokens=False
             ).input_ids[0]
+            # tokenizer returns torch.float32 when env_obs_content is empty
+            tokenized_obs = tokenized_obs.to(dtype=torch.int64)
 
             # check if new message overflows max_seq_len
             if (
@@ -472,6 +513,7 @@ def run_multi_turn_rollout(
 
     # Add total rewards to the final batch
     current_batch["total_reward"] = total_rewards
+    current_batch["truncated"] = sample_truncated
 
     # Calculate aggregate metrics
     rollout_metrics = {
@@ -489,6 +531,9 @@ def run_multi_turn_rollout(
         "mean_gen_tokens_per_sample": float(
             sample_assistant_token_counts.float().mean().item()
         ),
+        "max_gen_tokens_per_sample": float(
+            sample_assistant_token_counts.float().max().item()
+        ),
         "mean_env_tokens_per_sample": float(
             sample_env_token_counts.float().mean().item()
         ),
@@ -609,6 +654,8 @@ async def run_sample_multi_turn_rollout(
 
     # Track per-turn metrics
     turn_gen_tokens = []
+    # Track per-turn per-worker token accounting if available
+    per_worker_token_counts = {}  # worker_idx -> token_count
 
     for turn in range(max_rollout_turns):
         if terminated or truncated:
@@ -638,6 +685,12 @@ async def run_sample_multi_turn_rollout(
             assistant_token_count += gen_token_count
             token_count += gen_token_count
             turn_gen_tokens.append(gen_token_count)
+            # Per-worker load accounting
+            if "gen_leader_worker_idx" in gen_metrics:
+                worker_idx = int(gen_metrics["gen_leader_worker_idx"])
+                per_worker_token_counts[worker_idx] = (
+                    per_worker_token_counts.get(worker_idx, 0) + gen_token_count
+                )
 
         except Exception as e:
             print(f"Error generating response for sample {sample_idx}: {e}")
@@ -717,6 +770,8 @@ async def run_sample_multi_turn_rollout(
         "max_turns_reached": max_turns_reached,
         "total_reward": total_reward,
         "turn_gen_tokens": turn_gen_tokens,
+        # Pass-through per-worker per-turn accounting for aggregation at batch level
+        "per_worker_token_counts": per_worker_token_counts,
     }
 
     return final_sample_state, sample_metrics
@@ -817,6 +872,10 @@ async def run_single_sample_with_error_handling(i, sample_state):
                 "idx": [
                     state.get("idx", i) for i, state in enumerate(final_sample_states)
                 ],
+                "truncated": torch.tensor(
+                    [metrics["truncated"] for metrics in all_sample_metrics],
+                    dtype=torch.bool,
+                ),
             }
         )
 
@@ -849,6 +908,9 @@ async def run_single_sample_with_error_handling(i, sample_state):
                 m["assistant_tokens"] for m in all_sample_metrics
             )
             / batch_size,
+            "max_gen_tokens_per_sample": max(
+                m["assistant_tokens"] for m in all_sample_metrics
+            ),
             "mean_env_tokens_per_sample": sum(
                 m["env_tokens"] for m in all_sample_metrics
             )
@@ -860,6 +922,224 @@ async def run_single_sample_with_error_handling(i, sample_state):
             "min_total_reward": min(m["total_reward"] for m in all_sample_metrics),
         }
 
+        # Calculate per-worker token counts
+        if "per_worker_token_counts" in all_sample_metrics[0]:
+            per_worker_token_counts = {}
+            for m in all_sample_metrics:
+                for k, v in m["per_worker_token_counts"].items():
+                    per_worker_token_counts[k] = per_worker_token_counts.get(k, 0) + v
+            rollout_metrics["per_worker_token_counts"] = per_worker_token_counts
+
         return final_batch, rollout_metrics
 
     return asyncio.run(_async_rollout_implementation())
+
+
+@dataclass
+class AsyncPenguinRolloutResult:
+    input_ids: torch.Tensor
+    final_batch: BatchedDataDict[DatumSpec]
+    rollout_metrics: dict[str, Any]
+
+
+def _calculate_single_metric(
+    values: list[float], batch_size: int, key_name: str
+) -> dict:
+    return {
+        f"{key_name}/mean": sum(values) / batch_size,
+        f"{key_name}/max": max(values),
+        f"{key_name}/min": min(values),
+        f"{key_name}/median": statistics.median(values),
+        f"{key_name}/stddev": statistics.stdev(values),
+        f"{key_name}/histogram": Histogram(values),
+    }
+
+
+def run_async_penguin_rollout(
+    policy_generation: GenerationInterface,
+    input_batch: BatchedDataDict[DatumSpec],
+    tokenizer: TokenizerType,
+    task_to_env: dict[str, EnvironmentInterface],
+    generation_config: GenerationConfig,
+    max_seq_len: Optional[int] = None,
+    max_rollout_turns: Optional[int] = None,
+    greedy: bool = False,
+) -> AsyncPenguinRolloutResult:
+    """Run multi-turn rollouts with Penguin. Please refer to the `run_async_multi_turn_rollout` docs for more information on the parameters."""
+    # We leverage the same `extra_env_info` key as `run_async_multi_turn_rollout`.
+    penguin_rows = input_batch["extra_env_info"]
+
+    # Handle generation parameters up front so we don't hide anything inside here to avoid being unintuitive to the user.
+    # Penguin policy is "What you see is what you get".
+    assert not greedy, "`greedy` is not supported in Penguin path!"
+    assert max_rollout_turns is None, (
+        "`max_rollout_turns` is not supported in Penguin path!"
+    )
+    assert max_seq_len is None, "`max_seq_len` is not supported in Penguin path!"
+    # We don't use these stop criteria
+    assert not generation_config["stop_strings"], (
+        "Stop strings is not supported in the generation config in Penguin path!"
+    )
+    assert not generation_config["stop_token_ids"], (
+        "Stop strings is not supported in the generation config in Penguin path!"
+    )
+    # Top k is not OpenAI compatible, so Penguin does not guarantee support over it.
+    assert not generation_config["top_k"], (
+        "Top k is not supported in the generation config in Penguin path!"
+    )
+
+    timer = Timer()
+    timer_prefix = "timing/rollout"
+    timer.start(f"{timer_prefix}/total")
+
+    for row in penguin_rows:
+        # We may need better handling here. The max tokens set here would be the max new generated tokens, not the total max tokens.
+        # Currently, we just rely on the underlying vLLM engine to do the truncation for us using the max model seq len set in the config.
+        # row["max_tokens"] = max_seq_len
+
+        responses_create_params = row["responses_create_params"]
+        responses_create_params["temperature"] = generation_config["temperature"]
+        responses_create_params["top_p"] = generation_config["top_p"]
+
+        # Max new tokens, just like max_seq_len above is ignored and we rely on the underlying vLLM engine for truncation.
+        # generation_config["max_new_tokens"]
+
+    with timer.time(f"{timer_prefix}/run_rollouts"):
+        penguin_environment = task_to_env["penguin"]
+        results, rollout_loop_timing_metrics = ray.get(
+            penguin_environment.run_rollouts.remote(
+                penguin_rows, tokenizer, timer_prefix
+            )
+        )
+
+    # Prepare for the rollout metrics calculation below. Not strictly necessary here, but good to have parity with `run_async_multi_turn_rollout`
+    with timer.time(f"{timer_prefix}/prepare_for_metrics_calculation"):
+        batch_size = len(penguin_rows)
+        max_total_tokens_per_sample = policy_generation.cfg["vllm_cfg"]["max_model_len"]
+        all_sample_metrics = [
+            {
+                "total_reward": r["full_result"]["reward"],
+                "assistant_tokens": sum(
+                    len(m["token_ids"])
+                    for m in r["message_log"]
+                    if m["role"] == "assistant"
+                ),
+                "total_tokens": sum(len(m["token_ids"]) for m in r["message_log"]),
+                "turn_count": sum(1 for m in r["message_log"] if m["role"] == "user"),
+                "hit_max_tokens": sum(len(m["token_ids"]) for m in r["message_log"])
+                == max_total_tokens_per_sample,
+            }
+            for r in results
+        ]
+
+    # Aggregate metrics across all samples
+    with timer.time(f"{timer_prefix}/aggregate_metrics"):
+        rollout_metrics = {
+            **rollout_loop_timing_metrics,
+            **_calculate_single_metric(
+                [m["turn_count"] for m in all_sample_metrics],
+                batch_size,
+                "turns_per_sample",
+            ),
+            **_calculate_single_metric(
+                [m["total_tokens"] for m in all_sample_metrics],
+                batch_size,
+                "total_tokens_per_sample",
+            ),
+            **_calculate_single_metric(
+                [m["assistant_tokens"] for m in all_sample_metrics],
+                batch_size,
+                "gen_tokens_per_sample",
+            ),
+            **_calculate_single_metric(
+                [m["total_reward"] for m in all_sample_metrics],
+                batch_size,
+                "total_reward",
+            ),
+            "natural_termination_rate": sum(
+                not m["hit_max_tokens"] for m in all_sample_metrics
+            )
+            / batch_size,
+            "truncation_rate": sum(m["hit_max_tokens"] for m in all_sample_metrics)
+            / batch_size,
+            # TODO enable this metric. We don't have a clear handle on which tokens are user or tool role.
+            # We would probably need to re-tokenize the messages post-hoc to kind of figure this out.
+            # "mean_env_tokens_per_sample": sum(
+            #     m["env_tokens"] for m in all_sample_metrics
+            # )
+            # / batch_size,
+        }
+
+    # Per-agent misc metrics
+    with timer.time(f"{timer_prefix}/per_agent_misc_metrics"):
+        agent_to_results: dict[str, list[dict]] = defaultdict(list)
+        for penguin_row, result in zip(penguin_rows, results):
+            agent_name = penguin_row["agent_ref"]["name"]
+            agent_to_results[agent_name].append(result["full_result"])
+
+        per_agent_metrics = {}
+        for agent_name, agent_results in agent_to_results.items():
+            keys = agent_results[0].keys()
+            for key in keys:
+                values = [
+                    float(r[key])
+                    for r in agent_results
+                    if isinstance(r.get(key), (bool, int, float))
+                ]
+                if values:
+                    per_agent_metrics.update(
+                        _calculate_single_metric(
+                            values, len(agent_results), f"{agent_name}/{key}"
+                        )
+                    )
+
+            # Log the full result
+            to_log = [[json.dumps(r, separators=((",", ":")))] for r in agent_results]
+            per_agent_metrics[f"{agent_name}/full_result"] = Table(
+                data=to_log, columns=["Full result"]
+            )
+
+        rollout_metrics.update(per_agent_metrics)
+
+    # Necessary for downstream nemo rl logging/printing.
+    rollout_metrics["mean_gen_tokens_per_sample"] = rollout_metrics[
+        "gen_tokens_per_sample/mean"
+    ]
+    timer.stop(f"{timer_prefix}/total")
+    rollout_metrics.update(timer.get_timing_metrics("sum"))
+
+    # Convert LLMMessageLogType to FlatMessagesType for generation
+    input_batch_for_input_ids = BatchedDataDict[DatumSpec](
+        {
+            "message_log": [r["input_message_log"] for r in results],
+        }
+    )
+    batched_flat, _ = batched_message_log_to_flat_message(
+        input_batch_for_input_ids["message_log"],
+        pad_value_dict={"token_ids": tokenizer.pad_token_id},
+    )
+    input_ids = batched_flat["token_ids"]
+
+    final_batch = BatchedDataDict[DatumSpec](
+        {
+            "message_log": [r["message_log"] for r in results],
+            # length is used downstream for mean_prompt_length
+            "length": torch.tensor(
+                [len(r["input_message_log"][0]["token_ids"]) for r in results]
+            ),
+            "loss_multiplier": input_batch["loss_multiplier"],
+            # Unnecessary parts of the DatumSpec unused by the GRPO algorithm
+            # extra_env_info: dict[str, Any]
+            # idx: int
+            # task_name: NotRequired[str]
+            # stop_strings: NotRequired[list[str]]  # Optional stop strings for generation
+            # Extra information not in the DatumSpec used by the GRPO algorithm
+            "total_reward": torch.tensor([r["full_result"]["reward"] for r in results]),
+        }
+    )
+
+    return AsyncPenguinRolloutResult(
+        input_ids=input_ids,
+        final_batch=final_batch,
+        rollout_metrics=rollout_metrics,
+    )
diff --git a/nemo_rl/metrics/__init__.py b/nemo_rl/metrics/__init__.py
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/nemo_rl/models/dtensor/parallelize.py b/nemo_rl/models/dtensor/parallelize.py
index e2af748d71..997c5dca2a 100644
--- a/nemo_rl/models/dtensor/parallelize.py
+++ b/nemo_rl/models/dtensor/parallelize.py
@@ -31,8 +31,6 @@
 from torch.distributed.tensor.parallel import (
     ColwiseParallel,
     ParallelStyle,
-    PrepareModuleInput,
-    PrepareModuleOutput,
     RowwiseParallel,
     SequenceParallel,
     parallelize_module,
@@ -44,10 +42,30 @@
     Gemma3ForConditionalGeneration,
 )
 from transformers.models.llama.modeling_llama import LlamaForCausalLM
+from transformers.models.llama4.modeling_llama4 import Llama4ForConditionalGeneration
+from transformers.models.llava.modeling_llava import LlavaForConditionalGeneration
+from transformers.models.llava_next.modeling_llava_next import (
+    LlavaNextForConditionalGeneration,
+)
+from transformers.models.llava_next_video.modeling_llava_next_video import (
+    LlavaNextVideoForConditionalGeneration,
+)
+from transformers.models.llava_onevision.modeling_llava_onevision import (
+    LlavaOnevisionForConditionalGeneration,
+)
+from transformers.models.mistral3.modeling_mistral3 import (
+    Mistral3ForConditionalGeneration,
+)
 from transformers.models.qwen2.modeling_qwen2 import Qwen2ForCausalLM
+from transformers.models.qwen2_5_vl.modeling_qwen2_5_vl import (
+    Qwen2_5_VLForConditionalGeneration,
+)
+from transformers.models.qwen2_vl.modeling_qwen2_vl import (
+    Qwen2VLForConditionalGeneration,
+)
 from transformers.models.qwen3.modeling_qwen3 import Qwen3ForCausalLM
+from transformers.models.smolvlm.modeling_smolvlm import SmolVLMForConditionalGeneration
 
-from nemo_rl.distributed.model_utils import dtensor_from_parallel_logits_to_logprobs
 from nemo_rl.models.policy.utils import import_class_from_path
 
 
@@ -93,18 +111,14 @@ def _parallelize_gemma3(
     model: Union[Gemma3ForCausalLM, Gemma3ForConditionalGeneration],
     sequence_parallel: bool = False,
 ) -> dict[str, ParallelStyle]:
-    """Parallelizes a Gemma3ForCausalLM model across data parallel dimensions.
-
-    Tensor parallelism is not supported for Gemma3 models because of tied word embeddings.
-    """
+    """Parallelizes a Gemma3ForCausalLM model across data and tensor parallel dimensions."""
     if isinstance(model, Gemma3ForConditionalGeneration):
         model_prefix = "model.language_model"
     else:
         model_prefix = "model"
 
-    # For gemma3 models, we don't include the model.embed_tokens and lm_head in the
-    # parallelization plans because they have tied weights.
     base_model_tp_plan: dict[str, ParallelStyle] = {
+        f"{model_prefix}.embed_tokens": RowwiseParallel(input_layouts=Replicate()),
         f"{model_prefix}.layers.*.self_attn.q_proj": ColwiseParallel(),
         f"{model_prefix}.layers.*.self_attn.k_proj": ColwiseParallel(),
         f"{model_prefix}.layers.*.self_attn.v_proj": ColwiseParallel(),
@@ -112,13 +126,12 @@ def _parallelize_gemma3(
         f"{model_prefix}.layers.*.mlp.up_proj": ColwiseParallel(),
         f"{model_prefix}.layers.*.mlp.gate_proj": ColwiseParallel(),
         f"{model_prefix}.layers.*.mlp.down_proj": RowwiseParallel(),
+        "lm_head": ColwiseParallel(output_layouts=Shard(-1), use_local_output=False),
     }
 
     base_model_sp_plan = {
-        f"{model_prefix}.embed_tokens": PrepareModuleOutput(
-            output_layouts=Replicate(),
-            desired_output_layouts=Shard(1),
-            use_local_output=False,
+        f"{model_prefix}.embed_tokens": RowwiseParallel(
+            input_layouts=Replicate(), output_layouts=Shard(1)
         ),
         f"{model_prefix}.rotary_emb": RotaryEmbedParallel(use_local_output=True),
         f"{model_prefix}.rotary_emb_local": RotaryEmbedParallel(use_local_output=True),
@@ -133,10 +146,8 @@ def _parallelize_gemma3(
         ),
         f"{model_prefix}.layers.*.post_feedforward_layernorm": SequenceParallel(),
         f"{model_prefix}.norm": SequenceParallel(),
-        "lm_head": PrepareModuleInput(
-            input_layouts=(Shard(1),),
-            desired_input_layouts=(Replicate(),),
-            use_local_output=True,
+        "lm_head": ColwiseParallel(
+            input_layouts=Shard(1), output_layouts=Shard(-1), use_local_output=False
         ),
     }
 
@@ -312,12 +323,45 @@ def get_hf_tp_plan(model: PreTrainedModel):
         AssertionError: If no TP plan is found
     """
     model_cls = type(model)
-    if model_cls == Gemma3ForConditionalGeneration:
+
+    # Handle VL models structure
+    if model_cls in [
+        Qwen2VLForConditionalGeneration,
+        Qwen2_5_VLForConditionalGeneration,
+    ]:
+        inner_model = model.model.language_model
+        model_prefix = "model.language_model"
+        config = model.model.language_model.config
+
+    elif model_cls == Gemma3ForConditionalGeneration:
         inner_model = model.language_model
         model_prefix = "language_model"
+        config = model.config.text_config
+
+    elif model_cls == Llama4ForConditionalGeneration:
+        inner_model = model.language_model.model
+        model_prefix = "language_model.model"
+        config = model.language_model.model.config
+
+    elif model_cls in [
+        LlavaForConditionalGeneration,
+        LlavaNextForConditionalGeneration,
+        LlavaNextVideoForConditionalGeneration,
+        LlavaOnevisionForConditionalGeneration,
+    ]:
+        inner_model = model.model.language_model
+        model_prefix = "model.language_model"
+        config = model.model.language_model.config
+
+    elif model_cls == Mistral3ForConditionalGeneration:
+        inner_model = model.model.language_model
+        model_prefix = "model.language_model"
+        config = model.model.language_model.config
+
     else:
         inner_model = model.model
         model_prefix = "model"
+        config = model.config
 
     hf_tp_plan = {}
 
@@ -342,19 +386,12 @@ def get_hf_tp_plan(model: PreTrainedModel):
     )
 
     # hf tp plan not contain embed_tokens, we add it and set to rowwise_rep
-    if (
-        f"{model_prefix}.embed_tokens" not in hf_tp_plan
-        and not model.config.tie_word_embeddings
-    ):
+    if f"{model_prefix}.embed_tokens" not in hf_tp_plan:
         hf_tp_plan[f"{model_prefix}.embed_tokens"] = "rowwise_rep"
 
     for k, v in hf_tp_plan.items():
         # speed up the tp plan for lm_head
-        if (
-            k == "lm_head"
-            and v == "colwise_rep"
-            and not model.config.tie_word_embeddings
-        ):
+        if (k == "lm_head" or k == "language_model.lm_head") and v == "colwise_rep":
             hf_tp_plan[k] = ColwiseParallel(
                 output_layouts=Shard(-1), use_local_output=False
             )
@@ -364,9 +401,80 @@ def get_hf_tp_plan(model: PreTrainedModel):
     return hf_tp_plan
 
 
+def _parallelize_nm5_h(
+    model,
+    dp_mesh: DeviceMesh,
+    tp_mesh: DeviceMesh,
+    param_dtype: torch.dtype,
+    sequence_parallel: bool = False,
+    activation_checkpointing: bool = False,
+    cpu_offload: bool = False,
+    custom_parallel_plan: Optional[Union[dict, str]] = None,
+) -> torch.distributed.fsdp.FSDPModule:
+    """Parallelize a NemotronHForCausalLM model across data and tensor parallel dimensions."""
+    assert not sequence_parallel, (
+        "Sequence parallelism is not supported for NemotronHForCausalLM"
+    )
+    assert custom_parallel_plan is None, (
+        "Custom parallel plan is not supported for NemotronHForCausalLM"
+    )
+
+    model_tp_plan: dict[str, ParallelStyle] = {
+        "lm_head": ColwiseParallel(output_layouts=Shard(-1), use_local_output=False),
+    }
+
+    mlp_tp_plan: dict[str, ParallelStyle] = {
+        "mixer.up_proj": ColwiseParallel(),
+        "mixer.down_proj": RowwiseParallel(),
+    }
+
+    layers: torch.nn.ModuleList = model.backbone.layers
+    parallelize_module(model, tp_mesh, model_tp_plan)
+
+    for layer in model.backbone.layers:
+        if layer.block_type == "mlp":
+            parallelize_module(layer, tp_mesh, mlp_tp_plan)
+
+    if activation_checkpointing:
+        for i in range(len(layers)):
+            if layers[i].block_type == "mlp":
+                layers[i] = checkpoint_wrapper(layers[i])
+
+            if layers[i].block_type == "mamba":
+                layers[i] = checkpoint_wrapper(layers[i])
+
+    mp_policy = MixedPrecisionPolicy(
+        param_dtype=param_dtype,
+        reduce_dtype=torch.float32,
+        output_dtype=torch.float32,
+    )
+
+    offload_policy = (
+        CPUOffloadPolicy(pin_memory=False)
+        if cpu_offload
+        else torch.distributed.fsdp.OffloadPolicy
+    )
+
+    for layer in layers:
+        fully_shard(
+            layer, mesh=dp_mesh, mp_policy=mp_policy, offload_policy=offload_policy
+        )
+
+    # do not reshard after forward for root model
+    # because its parameters will be used in backward immediately
+    return fully_shard(
+        model,
+        mesh=dp_mesh,
+        mp_policy=mp_policy,
+        offload_policy=offload_policy,
+        reshard_after_forward=False,
+    )
+
+
 def _parallelize_model(
     model: Union[
         Qwen2ForCausalLM,
+        Qwen3ForCausalLM,
         LlamaForCausalLM,
         Gemma3ForCausalLM,
         Gemma3ForConditionalGeneration,
@@ -401,11 +509,93 @@ def _parallelize_model(
         ValueError: If the model type is not supported for parallelization.
     """
     model_cls = type(model)
+
+    # Handle different model structures
     if model_cls == Gemma3ForConditionalGeneration:
+        # layers: torch.nn.ModuleList = model.language_model.layers  # type: ignore
+        layers: list = []
+        for layer in model.language_model.layers:
+            layers.append(layer)
+        # siglip encoder also has the same structure as clip encoder (being the same model after all)
+        for layer in model.vision_tower.vision_model.encoder.layers:
+            layers.append(layer)
         layers: torch.nn.ModuleList = model.language_model.layers  # type: ignore
         num_attention_heads = model.config.text_config.num_attention_heads
         num_key_value_heads = model.config.text_config.num_key_value_heads
+
+    elif model_cls.__name__ == "NemotronHForCausalLM":
+        # need to do something special for nm5, since it's harder to shard the mamba layers
+        # nm5 is not importable, so we check the __name__ attribute
+        return _parallelize_nm5_h(
+            model,
+            dp_mesh,
+            tp_mesh,
+            param_dtype,
+            sequence_parallel,
+            activation_checkpointing,
+            cpu_offload,
+            custom_parallel_plan,
+        )
+
+    elif model_cls in [
+        Qwen2_5_VLForConditionalGeneration,
+        Qwen2VLForConditionalGeneration,
+    ]:
+        # VL models have the language model at model.language_model
+        layers: list = []
+        # append language model layers
+        for layer in model.language_model.layers:
+            layers.append(layer)
+        # append visual model layers
+        for layer in model.visual.blocks:
+            layers.append(layer)
+
+        num_attention_heads = model.language_model.config.num_attention_heads
+        num_key_value_heads = model.language_model.config.num_key_value_heads
+
+    elif model_cls == SmolVLMForConditionalGeneration:
+        layers: list = []
+        for layer in model.model.text_model.layers:
+            layers.append(layer)
+        for layer in model.model.vision_model.encoder.layers:
+            layers.append(layer)
+        num_attention_heads = model.model.text_model.config.num_attention_heads
+        num_key_value_heads = model.model.text_model.config.num_key_value_heads
+
+    elif model_cls in [
+        LlavaForConditionalGeneration,
+        LlavaNextForConditionalGeneration,
+        LlavaNextVideoForConditionalGeneration,
+        LlavaOnevisionForConditionalGeneration,
+    ]:
+        layers: list = []
+        for layer in model.model.language_model.layers:
+            layers.append(layer)
+        for layer in model.vision_tower.vision_model.encoder.layers:
+            layers.append(layer)
+        num_attention_heads = model.language_model.config.num_attention_heads
+        num_key_value_heads = model.language_model.config.num_key_value_heads
+
+    elif model_cls == Mistral3ForConditionalGeneration:
+        layers: list = []
+        for layer in model.model.language_model.layers:
+            layers.append(layer)
+        for layer in model.model.vision_tower.transformer.layers:
+            layers.append(layer)
+        num_attention_heads = model.model.language_model.config.num_attention_heads
+        num_key_value_heads = model.model.language_model.config.num_key_value_heads
+
+    elif model_cls == Llama4ForConditionalGeneration:
+        layers: list = []
+        for layer in model.language_model.model.layers:
+            layers.append(layer)
+        for layer in model.vision_model.model.layers:
+            layers.append(layer)
+        num_attention_heads = model.language_model.model.config.num_attention_heads
+        num_key_value_heads = model.language_model.model.config.num_key_value_heads
+
     else:
+        # this is the default case for all other models (assumed to be a causal LM)
         layers: torch.nn.ModuleList = model.model.layers  # type: ignore
         num_attention_heads = model.config.num_attention_heads
         num_key_value_heads = model.config.num_key_value_heads
@@ -471,6 +661,25 @@ def _parallelize_model(
         for i in range(len(layers)):
             layers[i].mlp = checkpoint_wrapper(layers[i].mlp)  # type: ignore
 
+            """
+            the extra memory overhead for layer norm seems to be only present
+            in mistral models, where some intermediate state is converted to float32
+
+            need to find a better solution for checkpointing
+            """
+            if hasattr(layers[i], "self_attn"):
+                layers[i].self_attn = checkpoint_wrapper(layers[i].self_attn)  # type: ignore
+
+            if hasattr(layers[i], "input_layernorm"):
+                layers[i].input_layernorm = checkpoint_wrapper(
+                    layers[i].input_layernorm  # type: ignore
+                )
+
+            if hasattr(layers[i], "post_attention_layernorm"):
+                layers[i].post_attention_layernorm = checkpoint_wrapper(
+                    layers[i].post_attention_layernorm  # type: ignore
+                )
+
     mp_policy = MixedPrecisionPolicy(
         param_dtype=param_dtype,
         reduce_dtype=torch.float32,
@@ -510,7 +719,6 @@ def clip_grad_by_total_norm_(
     parameters: Union[list[Union[torch.Tensor, DTensor]], Union[torch.Tensor, DTensor]],
     max_grad_norm: Union[int, float],
     total_norm: float,
-    dtype: torch.dtype = torch.float32,
 ):
     """Clips gradient of an iterable of parameters by total norm.
 
@@ -528,17 +736,17 @@ def clip_grad_by_total_norm_(
     if isinstance(parameters, (torch.Tensor, DTensor)):
         parameters = [parameters]
 
-    # Grads.
-    grads = [
-        to_local_if_dtensor(p.grad.detach()).to(dtype)
-        for p in parameters
-        if p.grad is not None
-    ]
-
     # Scale.
     clip_coeff = max_grad_norm / (total_norm + 1.0e-6)
 
     if clip_coeff < 1.0:
+        # Grads.
+        grads = [
+            to_local_if_dtensor(p.grad.detach())
+            for p in parameters
+            if p.grad is not None
+        ]
+
         for g in grads:
             g.mul_(clip_coeff)
 
@@ -572,9 +780,7 @@ def get_grad_norm(
 
     # Grads.
     grads_for_norm = [
-        to_local_if_dtensor(p.grad.detach()).to(dtype)
-        for p in parameters
-        if p.grad is not None
+        to_local_if_dtensor(p.grad.detach()) for p in parameters if p.grad is not None
     ]
 
     # Norm parameters.
@@ -584,9 +790,7 @@ def get_grad_norm(
     # Calculate norm.
     if norm_type == torch.inf:
         total_norm = max(grad.abs().max().item() for grad in grads_for_norm)
-        total_norm_cuda = torch.tensor(
-            [float(total_norm)], dtype=torch.float, device="cuda"
-        )
+        total_norm_cuda = torch.tensor([float(total_norm)], dtype=dtype, device="cuda")
         # Take max across all data-parallel GPUs if using FSDP and then all model-parallel GPUs.
         torch.distributed.all_reduce(
             total_norm_cuda, op=torch.distributed.ReduceOp.MAX, group=dp_cp_group
@@ -598,66 +802,19 @@ def get_grad_norm(
         total_norm = float(total_norm_cuda[0].item())
 
     else:
-        total_norm = torch.tensor(0.0, dtype=torch.float32, device="cuda")
+        total_norm_cuda = torch.tensor(0.0, dtype=dtype, device="cuda")
         for grad in grads_for_norm:
-            grad_norm = torch.norm(grad, norm_type)
-            total_norm += torch.pow(grad_norm, norm_type)
+            grad_norm = torch.linalg.vector_norm(grad, ord=norm_type, dtype=dtype)
+            total_norm_cuda += torch.pow(grad_norm, norm_type)
 
         # Sum across all data-parallel GPUs if using FSDP and then all model-parallel GPUs.
         torch.distributed.all_reduce(
-            total_norm, op=torch.distributed.ReduceOp.SUM, group=dp_cp_group
+            total_norm_cuda, op=torch.distributed.ReduceOp.SUM, group=dp_cp_group
         )
 
         torch.distributed.all_reduce(
-            total_norm, op=torch.distributed.ReduceOp.SUM, group=tp_group
+            total_norm_cuda, op=torch.distributed.ReduceOp.SUM, group=tp_group
         )
-        total_norm = total_norm.item() ** (1.0 / norm_type)  # type: ignore
+        total_norm = float(total_norm_cuda.item() ** (1.0 / norm_type))
 
     return total_norm
-
-
-def get_logprobs_from_vocab_parallel_logits(
-    vocab_parallel_logits: DTensor,
-    input_ids: torch.Tensor | DTensor,
-    seq_index: Optional[torch.Tensor] = None,
-):
-    """Computes log probabilities from vocabulary-parallel logits.
-
-    This function takes logits that are sharded across the vocabulary dimension (tensor parallel)
-    and computes the log probabilities for the given input IDs.
-
-    Args:
-        vocab_parallel_logits (DTensor): Logits distributed across tensor parallel workers,
-            with shape [batch_size, seq_len, vocab_size/tp_size].
-        input_ids (torch.Tensor | DTensor): Input token IDs for which to compute log probabilities,
-            with shape [batch_size, seq_len].
-        seq_index (Optional[torch.Tensor]): Sequence index for the input IDs,
-            with shape [sequence_length].
-
-    Returns:
-        torch.Tensor: Log probabilities for the given input IDs.
-    """
-    device_mesh = vocab_parallel_logits.device_mesh
-    if seq_index is not None:
-        assert (
-            device_mesh.mesh_dim_names is not None
-            and "cp" in device_mesh.mesh_dim_names
-        ), "seq_index must be provided for cp sharded logits"
-
-    tp_size = 1
-
-    tp_group = device_mesh.get_group("tp")
-    tp_rank = tp_group.rank()
-    tp_size = tp_group.size()
-
-    vocab_interval_per_rank = vocab_parallel_logits.shape[-1] // tp_size
-
-    return dtensor_from_parallel_logits_to_logprobs(
-        vocab_parallel_logits.to_local(),
-        input_ids,
-        vocab_interval_per_rank * tp_rank,
-        (tp_rank + 1) * vocab_interval_per_rank,
-        tp_group,
-        inference_only=not torch.is_grad_enabled(),
-        seq_index=seq_index,
-    )
diff --git a/nemo_rl/models/generation/__init__.py b/nemo_rl/models/generation/__init__.py
index 6d25872ae5..c50598cb86 100644
--- a/nemo_rl/models/generation/__init__.py
+++ b/nemo_rl/models/generation/__init__.py
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import warnings
 from typing import cast
 
 from transformers import PreTrainedTokenizerBase
@@ -26,7 +27,13 @@ def configure_generation_config(
 ) -> GenerationConfig:
     """Apply specific configurations to generation config."""
     # tokenizer setting
-    config["pad_token_id"] = tokenizer.pad_token_id
+    if "_pad_token_id" in config:
+        warnings.warn(
+            "'_pad_token_id' found in generation config and will be overridden with tokenizer.pad_token_id. "
+            "Note: '_pad_token_id' is intended for internal use and has no effect when set in user-provided configs.",
+            UserWarning,
+        )
+    config["_pad_token_id"] = tokenizer.pad_token_id
     if config["stop_token_ids"] is None:
         config["stop_token_ids"] = [tokenizer.eos_token_id]
 
@@ -36,10 +43,12 @@ def configure_generation_config(
         # set load_format
         config["vllm_cfg"]["load_format"] = "auto" if is_eval else "dummy"
 
-        # set skip_tokenizer_init
-        if is_eval or config["stop_strings"] is not None:
-            config["vllm_cfg"]["skip_tokenizer_init"] = False
-        else:
-            config["vllm_cfg"]["skip_tokenizer_init"] = True
+        # Respect the skip_tokenizer_init setting from the config. VLMs for example, require this to be False.
+        if "skip_tokenizer_init" not in config["vllm_cfg"]:
+            # set skip_tokenizer_init
+            if is_eval or config["stop_strings"] is not None:
+                config["vllm_cfg"]["skip_tokenizer_init"] = False
+            else:
+                config["vllm_cfg"]["skip_tokenizer_init"] = True
 
     return config
diff --git a/nemo_rl/models/generation/fp8.py b/nemo_rl/models/generation/fp8.py
new file mode 100644
index 0000000000..474cf88a46
--- /dev/null
+++ b/nemo_rl/models/generation/fp8.py
@@ -0,0 +1,555 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+from dataclasses import dataclass, field
+from unittest.mock import patch
+
+import ray
+import torch
+from accelerate import init_empty_weights
+from transformers import AutoConfig, AutoModel
+from vllm.model_executor.layers.linear import LinearBase
+from vllm.triton_utils import tl, triton
+from vllm.v1.engine.core import EngineCoreProc
+from vllm.v1.engine.utils import CoreEngineProcManager
+
+FP8_BLOCK_QUANT_KWARGS = {
+    "activation_scheme": "dynamic",
+    "fmt": "e4m3",
+    "quant_method": "fp8",
+    "weight_block_size": [128, 128],
+}
+
+
+@dataclass(frozen=True)
+class FP8Config:
+    use_weight_pow2_scale: bool = False
+    use_activation_pow2_scale: bool = False
+    num_first_layers_in_bf16: int = 0
+    num_last_layers_in_bf16: int = 0
+    model_parallel_size: int = None
+
+
+@dataclass()
+class FP8State:
+    # A cache of fp8 parameter names, we can check this cache to see if a
+    # param name corresponds to a fp8 weight
+    seen_params: set = field(default_factory=lambda: set())
+    fp8_param_names: set = field(default_factory=lambda: set())
+    vllm_patches: list = field(default_factory=lambda: [])
+
+
+# Global FP8 config that can be accessed by patched vLLM functions
+# initialized by 'init_fp8_cfg()'
+global_fp8_config: FP8Config = None
+# Global FP8 state that holds runtime fp8 objects
+fp8_state: FP8State = FP8State()
+
+fp8_patches_applied = False
+
+original_run_engine_core = EngineCoreProc.run_engine_core
+original_init = CoreEngineProcManager.__init__
+
+
+def my_init(*args, **kwargs):
+    kwargs["vllm_config"].nrl_fp8_cfg = global_fp8_config
+    return original_init(*args, **kwargs)
+
+
+def my_run_engine_core(*args, **kwargs):
+    fp8_cfg = kwargs["vllm_config"].nrl_fp8_cfg
+    del kwargs["vllm_config"].nrl_fp8_cfg
+    monkey_patch_vllm_ray_executor(fp8_cfg)
+    return original_run_engine_core(*args, **kwargs)
+
+
+def monkey_patch_vllm_ray_executor(fp8_config):
+    if fp8_config.model_parallel_size > 1:
+        # we patch vllm's _run_workers so that before vllm initalizes the model on each rank, we execute
+        # a ray remote that patches each worker with the required fp8 vllm patches
+        from vllm.v1.executor.ray_distributed_executor import RayDistributedExecutor
+
+        original_run_workers = RayDistributedExecutor._run_workers
+
+        def patched_run_workers(self, *args, **kwargs):
+            global fp8_patches_applied
+            if not fp8_patches_applied:
+                futures = [
+                    worker.execute_method.remote(apply_fp8_patches, fp8_config)
+                    for worker in self.workers
+                ]
+                [ray.get(future) for future in futures]
+                fp8_patches_applied = True
+
+            return original_run_workers(self, *args, **kwargs)
+
+        RayDistributedExecutor._run_workers = patched_run_workers
+    else:
+        # for single gpu there is no ray, so just call the patches
+        apply_fp8_patches(None, fp8_config)
+
+        global fp8_patches_applied
+        fp8_patches_applied = True
+
+
+def apply_fp8_patches(self, fp8_config):
+    global global_fp8_config, fp8_patches_applied
+    assert not fp8_patches_applied
+
+    global_fp8_config = fp8_config
+
+    # This patch is used to support torch.compile with vllm parameter subclasses, such as
+    # PerTensorScaleParameter. Because we need weight loaders to update fp8 weights each
+    # refit, we patch fp8 parameters to have a reference to their weight loader. Eventually
+    # with pytorch 2.8, parameter subclassing with torch.compile will be natively supported, in
+    # which this patch can be removed.
+    func1_path = "vllm.model_executor.layers.quantization.fp8.Fp8LinearMethod.process_weights_after_loading"
+    patcher1 = patch(func1_path, process_weights_after_loading)
+    fp8_state.vllm_patches.append(patcher1)
+    # These patches add support for pow2, e8 dynamic activation scalings factors which are believed to have higher
+    # SNR compared to plain fp32 scaling factors. This feature is still under active research.
+    if global_fp8_config.use_activation_pow2_scale:
+        func2_path = "vllm.model_executor.layers.quantization.utils.fp8_utils.per_token_group_quant_fp8"
+        func3_path = "vllm.model_executor.layers.quantization.utils.fp8_utils._per_token_group_quant_fp8"
+        func4_path = "vllm.model_executor.layers.quantization.utils.fp8_utils._per_token_group_quant_fp8_colmajor"
+        patcher2 = patch(func2_path, per_token_group_quant_fp8)
+        patcher3 = patch(func3_path, _per_token_group_quant_fp8)
+        patcher4 = patch(func4_path, _per_token_group_quant_fp8_colmajor)
+        fp8_state.vllm_patches.append(patcher2, patcher3, patcher4)
+
+    for p in fp8_state.vllm_patches:
+        p.start()
+
+    fp8_patches_applied = True
+
+
+def init_fp8(vllm_cfg, model_name, model_parallel_size):
+    config = AutoConfig.from_pretrained(model_name)
+    if hasattr(config, "num_experts"):
+        assert config.num_experts == 0, (
+            "FP8 generation for MoE models is currently not supported"
+        )
+
+    global global_fp8_config
+    global_fp8_config = FP8Config(
+        use_weight_pow2_scale=vllm_cfg.get("pow2_weight_scaling_factors", False),
+        use_activation_pow2_scale=vllm_cfg.get(
+            "pow2_activation_scaling_factors", False
+        ),
+        num_first_layers_in_bf16=vllm_cfg.get("num_first_layers_in_bf16", 0),
+        num_last_layers_in_bf16=vllm_cfg.get("num_last_layers_in_bf16", 0),
+        model_parallel_size=model_parallel_size,
+    )
+
+    if vllm_cfg.get("use_deep_gemm", False):
+        os.environ["VLLM_USE_DEEP_GEMM"] = "1"
+
+    if vllm_cfg["async_engine"]:
+        # for async engine, vllm spawns a process for each DP, so we patch
+        # vllm so that upon spawning the thread it applies our FP8 patches
+        EngineCoreProc.run_engine_core = my_run_engine_core
+        CoreEngineProcManager.__init__ = my_init
+    else:
+        # if not async, just directly monkey patch the ray executor
+        monkey_patch_vllm_ray_executor(global_fp8_config)
+
+    # create fp8 kwargs for vllm's LLM(...)
+    num_first_layers_in_bf16 = vllm_cfg.get("num_first_layers_in_bf16", 0)
+    num_last_layers_in_bf16 = vllm_cfg.get("num_last_layers_in_bf16", 0)
+    fp8_block_quant_kwargs = dict(FP8_BLOCK_QUANT_KWARGS)
+
+    if num_first_layers_in_bf16 > 0 or num_last_layers_in_bf16 > 0:
+        with init_empty_weights():
+            model = AutoModel.from_config(config)
+        param_names = [name for name, _ in model.named_parameters()]
+
+        bf16_params = []
+        if num_first_layers_in_bf16 > 0:
+            layers = [l for l in range(num_first_layers_in_bf16)]
+            bf16_params.append(_get_params_in_layers(param_names, layers))
+
+        if num_last_layers_in_bf16 > 0:
+            layers = [
+                l
+                for l in range(
+                    config.num_hidden_layers - num_last_layers_in_bf16,
+                    config.num_hidden_layers,
+                )
+            ]
+            bf16_params.append(_get_params_in_layers(param_names, layers))
+
+        fp8_block_quant_kwargs["ignored_layers"] = bf16_params
+
+    vllm_kwargs = {
+        "quantization": "fp8",
+        "hf_overrides": {"quantization_config": fp8_block_quant_kwargs},
+    }
+    return vllm_kwargs
+
+
+def is_fp8_model(vllm_config):
+    from vllm.model_executor.layers.quantization.fp8 import Fp8Config
+
+    if hasattr(vllm_config, "quant_config") and isinstance(
+        vllm_config.quant_config, Fp8Config
+    ):
+        assert vllm_config.quant_config.weight_block_size is not None, (
+            "Only block scaling is currently supported in NeMo-RL!"
+        )
+        return True
+
+    return False
+
+
+def _get_params_in_layers(param_names, layers):
+    layer_templates = []
+    for i in layers:
+        # Prefixes used by huggingface model transformer layers.
+        # We'll use these to match against the parameter names to determine
+        # which layer the parameter is in.
+        layer_templates.extend(
+            [
+                f"transformer.h.{i}.",
+                f"layers.{i}.",
+                f"layer.{i}.",
+            ]
+        )
+    prefixes = [p for p in layer_templates if any(p in n for n in param_names)]
+    if len(prefixes) == 0:
+        raise ValueError(f"Could not identify layers {layers} for model.")
+
+    params = []
+    for name in param_names:
+        if (
+            any(p in name for p in prefixes)
+            and "bias" not in name
+            and "layernorm" not in name
+        ):
+            # Convert the param name into vllm's module name
+            # Vllm wraps the model with an extra 'model'
+            params.append(f"model.{name}".removesuffix(".weight"))
+    return params
+
+
+def _get_module_from_param_name(model, name: str):
+    # Split the name into parts (e.g., 'layers', '0', 'self_attn', 'q_proj', 'weight')
+    # The module path is all but the last part (the parameter's own name)
+    path_parts = name.split(".")
+    module_path = path_parts[:-1]
+    # Replace with the fused model name
+    packed_modules_mapping = model.packed_modules_mapping
+    reversed_mapping = {
+        original_name: fused_name
+        for fused_name, original_names_list in packed_modules_mapping.items()
+        for original_name in original_names_list
+    }
+    if module_path[-1] in reversed_mapping.keys():
+        module_path[-1] = reversed_mapping[module_path[-1]]
+
+    current_module = model
+    try:
+        # Traverse the model hierarchy
+        for part in module_path:
+            if isinstance(current_module, torch.nn.ModuleList):
+                current_module = current_module[int(part)]
+            else:
+                current_module = getattr(current_module, part)
+    except (AttributeError, IndexError, ValueError) as e:
+        print(f"Warning: Could not find module for parameter '{name}'. Error: {e}")
+    return current_module
+
+
+def _is_fp8_weight(name, model):
+    if name not in fp8_state.seen_params:
+        fp8_state.seen_params.add(name)
+        # Filter out bias params
+        if name.endswith("weight"):
+            module = _get_module_from_param_name(model, name)
+            # We currently only quantize linear layers
+            if (
+                isinstance(module, LinearBase)
+                and module.weight.dtype == torch.float8_e4m3fn
+            ):
+                fp8_state.fp8_param_names.add(name)
+    return name in fp8_state.fp8_param_names
+
+
+def load_weights(weights, model_runner):
+    weights_quantized = []
+    model = model_runner.model
+
+    for k, v in weights:
+        if not _is_fp8_weight(k, model):
+            weights_quantized.append((k, v))
+            continue
+        # Cast the weight into fp8 and its scale factor
+        param_lp, param_scale = cast_tensor_to_fp8_blockwise(
+            v.to(torch.float),
+            weight_block_size=FP8_BLOCK_QUANT_KWARGS["weight_block_size"],
+        )
+        param_scale = torch.squeeze(param_scale, dim=-1)
+        weights_quantized.append([k, param_lp])
+        weights_quantized.append([k + "_scale_inv", param_scale])
+    # Finally load the weights into vllm
+    model.load_weights(weights_quantized)
+
+
+def cast_tensor_to_fp8_blockwise(
+    data_hp,
+    weight_block_size,
+):
+    assert len(data_hp.shape) == 2, "Only 2d input tensor is supported"
+
+    block_size1 = weight_block_size[1]
+    block_size0 = weight_block_size[0]
+    shape_before_padding = data_hp.shape
+    # pad data_hp to make its shape a multiple of weight_block_size with the last element of data_hp
+    if data_hp.shape[1] % block_size1 != 0 or data_hp.shape[0] % block_size0 != 0:
+        pad1 = (
+            0
+            if data_hp.shape[1] % block_size1 == 0
+            else block_size1 - data_hp.shape[1] % block_size1
+        )
+        pad0 = (
+            0
+            if data_hp.shape[0] % block_size0 == 0
+            else block_size0 - data_hp.shape[0] % block_size0
+        )
+        print(
+            f"Padding data_hp from {data_hp.shape} to {(data_hp.shape[0] + pad0, data_hp.shape[1] + pad1)}"
+        )
+        data_hp = torch.nn.functional.pad(
+            data_hp, (0, pad1, 0, pad0), mode="constant", value=data_hp[-1, -1]
+        )
+
+    # FP8
+    max_dtype = torch.finfo(torch.float8_e4m3fn).max
+
+    original_shape = data_hp.shape
+    blk_m, blk_n = data_hp.shape[0] // block_size0, data_hp.shape[1] // block_size1
+
+    assert block_size1 == block_size0
+    data_hp = data_hp.reshape(blk_m, block_size0, blk_n, block_size1)
+
+    # Permute to (BLK_M, BLK_N, BLOCK_SIZE_M, BLOCK_SIZE_N)
+    data_hp = data_hp.permute(0, 2, 1, 3)
+    # Flatten to (BLK_M, BLK_N, BLOCK_SIZE_M * BLOCK_SIZE_N)
+    data_hp = data_hp.to(torch.float32).contiguous().flatten(start_dim=2)
+
+    # Calculate max absolute value per block
+    max_abs = torch.amax(torch.abs(data_hp), dim=-1, keepdim=True)
+    # Calculate descale factor
+    descale = max_abs / max_dtype
+
+    global global_fp8_config
+    if global_fp8_config.use_weight_pow2_scale:
+        exponent = torch.ceil(torch.log2(descale))
+        # Post process exponent to be in range of -127 to 127 and to be E8M0 biased
+        exponent = torch.clamp(exponent, min=-127, max=127) + 127
+        # Convert to uint8 container
+        exponent = exponent.to(torch.uint8)
+        # Calculate descale_fp to apply to data_hp
+        scale_fp = torch.where(
+            # If exponent is 0, descale_fp is 1.0 rather than 2^127
+            exponent == 0,
+            1.0,
+            torch.exp2(127 - exponent.to(torch.float32)),
+        )
+        descale_fp = torch.reciprocal(scale_fp)
+    else:
+        scale_fp = max_dtype / max_abs
+        scale_fp = torch.where(max_abs == 0, 1.0, scale_fp)
+        # preserve the behavior for 0 amax case
+        scale_fp = torch.where(max_abs == torch.inf, 1.0, scale_fp)
+
+        descale_fp = torch.reciprocal(scale_fp)
+
+    # Scale and saturate cast the data elements to max of target dtype
+    data_lp = torch.clamp(data_hp * scale_fp, min=-1 * max_dtype, max=max_dtype)
+
+    fp_data = data_lp.to(torch.float8_e4m3fn)
+
+    # (BLK_M, BLK_N, BLOCK_SIZE_M * BLOCK_SIZE_N) to (M, N)
+    fp_data = (
+        fp_data.reshape(blk_m, blk_n, block_size0, block_size1)
+        .permute(0, 2, 1, 3)
+        .reshape(original_shape)
+    )
+
+    # remove the padding
+    if data_hp.shape != shape_before_padding:
+        fp_data = fp_data[: shape_before_padding[0], : shape_before_padding[1]]
+
+    # Convert to target format, but still in original precision container
+    return fp_data, descale_fp
+
+
+def process_weights_after_loading(self, layer) -> None:
+    from vllm.model_executor.layers.quantization.utils.fp8_utils import (
+        maybe_post_process_fp8_weight_block,
+        process_fp8_weight_block_strategy,
+    )
+
+    assert self.block_quant and self.quant_config.is_checkpoint_fp8_serialized
+    assert self.quant_config.activation_scheme == "dynamic"
+
+    weight_scale = layer.weight_scale_inv
+    weight, weight_scale = process_fp8_weight_block_strategy(layer.weight, weight_scale)
+    layer.weight.data = weight.data
+    if hasattr(layer, "weight_scale"):
+        # Not the first time to call this function, just need to update the data
+        layer.weight_scale.data = weight_scale.data
+    else:
+        # The first time to call this function, create a new parameter and update the tp status
+        layer.weight_scale = torch.nn.Parameter(weight_scale.data, requires_grad=False)
+        layer.update_param_tp_status()
+
+    maybe_post_process_fp8_weight_block(layer, self.cutlass_block_fp8_supported)
+
+
+@triton.jit
+def _per_token_group_quant_fp8(
+    # Pointers to inputs and output
+    y_ptr,
+    y_q_ptr,
+    y_s_ptr,
+    group_size,
+    # Num columns of y
+    y_num_columns,
+    y_row_stride,
+    # Avoid to divide zero
+    eps,
+    # Information for float8
+    fp8_min,
+    fp8_max,
+    # Meta-parameters
+    BLOCK: tl.constexpr,
+):
+    groups_per_row = y_num_columns // group_size
+
+    # Map the program id to the row of X and Y it should compute.
+    g_id = tl.program_id(0)
+    row = g_id // groups_per_row
+    row_g_id = g_id % groups_per_row
+
+    y_ptr += (row * y_row_stride) + (row_g_id * group_size)
+    y_q_ptr += g_id * group_size
+    y_s_ptr += g_id
+
+    cols = tl.arange(0, BLOCK)  # N <= BLOCK
+    mask = cols < group_size
+
+    y = tl.load(y_ptr + cols, mask=mask, other=0.0).to(tl.float32)
+    # Quant
+    _absmax = tl.maximum(tl.max(tl.abs(y)), eps)
+
+    # pow2_scale
+    inv_scale = fp8_max / _absmax
+    exponent = tl.floor(tl.log2(inv_scale))
+    # exponent is an integer
+    exponent = tl.minimum(exponent, 126.0)
+
+    # after rounding to exponent, round back to floating
+    inv_scale_pow2 = tl.exp2(exponent)
+
+    is_nan = inv_scale_pow2 != inv_scale_pow2
+    is_inf = (inv_scale_pow2 == 1.0 / 0.0) | (inv_scale_pow2 == -1.0 / 0.0)
+
+    # If the value is NaN or infinity, default it to 1.0,
+    # otherwise keep its original value.
+    inv_scale_pow2 = tl.where(is_nan | is_inf, 1.0, inv_scale_pow2)
+    # finally uninverse
+    y_s = 1.0 / inv_scale_pow2
+
+    y_q = tl.clamp(y / y_s, fp8_min, fp8_max).to(y_q_ptr.dtype.element_ty)
+
+    tl.store(y_q_ptr + cols, y_q, mask=mask)
+    tl.store(y_s_ptr, y_s)
+
+
+@triton.jit
+def _per_token_group_quant_fp8_colmajor(
+    # Pointers to inputs and output
+    y_ptr,
+    y_q_ptr,
+    y_s_ptr,
+    group_size,
+    # Num columns of y
+    y_num_columns,
+    y_row_stride,
+    # Stride from one column to the next of y_s
+    y_s_col_stride,
+    # Avoid to divide zero
+    eps,
+    # Information for float8
+    fp8_min,
+    fp8_max,
+    # Meta-parameters
+    BLOCK: tl.constexpr,
+):
+    groups_per_row = y_num_columns // group_size
+
+    # Map the program id to the row of X and Y it should compute.
+    g_id = tl.program_id(0)
+    row = g_id // groups_per_row
+    row_g_id = g_id % groups_per_row
+
+    y_ptr += (row * y_row_stride) + (row_g_id * group_size)
+    y_q_ptr += g_id * group_size
+
+    # Convert g_id the flattened block coordinate to 2D so we can index
+    # into the output y_scales matrix
+    blocks_per_row = y_num_columns // group_size
+    scale_col = g_id % blocks_per_row
+    scale_row = g_id // blocks_per_row
+    y_s_ptr += scale_col * y_s_col_stride + scale_row
+
+    cols = tl.arange(0, BLOCK)  # group_size <= BLOCK
+    mask = cols < group_size
+
+    y = tl.load(y_ptr + cols, mask=mask, other=0.0).to(tl.float32)
+    _absmax = tl.maximum(tl.max(tl.abs(y)), eps)
+
+    # Quant pow2_scale:
+    inv_scale = fp8_max / _absmax
+    # calculate the nearest pow2 integer
+    exponent = tl.floor(tl.log2(inv_scale))
+    exponent = tl.minimum(exponent, 126.0)
+    # round inv_scale to the nearest pow2 with the exp we just calculated
+    inv_scale_pow2 = tl.exp2(exponent)
+    # If the value is NaN or infinity, default it to 1.0,
+    # otherwise keep its original value.
+    is_nan = inv_scale_pow2 != inv_scale_pow2
+    is_inf = (inv_scale_pow2 == float("inf")) | (inv_scale_pow2 == float("-inf"))
+    inv_scale_pow2 = tl.where(is_nan | is_inf, 1.0, inv_scale_pow2)
+    # finally uninverse
+    y_s = 1.0 / inv_scale_pow2
+
+    y_q = tl.clamp(y / y_s, fp8_min, fp8_max).to(y_q_ptr.dtype.element_ty)
+
+    tl.store(y_q_ptr + cols, y_q, mask=mask)
+    tl.store(y_s_ptr, y_s)
+
+
+def per_token_group_quant_fp8(
+    *args,
+    **kwargs,
+) -> tuple[torch.Tensor, torch.Tensor]:
+    assert global_fp8_config.use_activation_pow2_scale
+    from vllm.model_executor.layers.quantization.utils.fp8_utils import (
+        per_token_group_quant_fp8 as vllm_per_token_group_quant_fp8,
+    )
+
+    return vllm_per_token_group_quant_fp8(*args, **kwargs)
diff --git a/nemo_rl/models/generation/interfaces.py b/nemo_rl/models/generation/interfaces.py
index d424c7c1df..f7f58b383f 100644
--- a/nemo_rl/models/generation/interfaces.py
+++ b/nemo_rl/models/generation/interfaces.py
@@ -104,9 +104,15 @@ class ResourcesConfig(TypedDict):
     num_nodes: int
 
 
+class OptionalResourcesConfig(TypedDict):
+    # Same as ResourcesConfig, but fields can be null and are validated in grpo.py
+    gpus_per_node: int | None
+    num_nodes: int | None
+
+
 class ColocationConfig(TypedDict):
     enabled: bool
-    resources: NotRequired[ResourcesConfig]
+    resources: OptionalResourcesConfig
 
 
 class GenerationConfig(TypedDict):
@@ -116,12 +122,13 @@ class GenerationConfig(TypedDict):
     max_new_tokens: int
     temperature: float
     top_p: float
-    top_k: int
-    model_name: str
-    stop_token_ids: list[int]
-    stop_strings: NotRequired[list[str]]
-    pad_token_id: NotRequired[int]
+    top_k: int | None
+    model_name: NotRequired[str]  # Not Required b/c GRPO writes this
+    stop_token_ids: list[int] | None
+    stop_strings: list[str] | None
     colocated: NotRequired[ColocationConfig]
+    # This isn't meant to be passed by the user, but is populated by nemo_rl.models.generation.__init__.configure_generation_config
+    _pad_token_id: NotRequired[int]
 
 
 class GenerationDatumSpec(TypedDict):
@@ -233,10 +240,15 @@ def prepare_refit_info(self, state_dict_info: dict[str, Any]) -> None:
         """Prepare the info for refit."""
         raise NotImplementedError
 
-    def update_weights_from_ipc_handles(self, ipc_handles: dict[str, Any]) -> bool:
+    def update_weights_via_ipc_zmq(self) -> list[ray.ObjectRef]:
         """Update the model weights from the given IPC handles."""
         raise NotImplementedError
 
     def update_weights_from_collective(self) -> list[ray.ObjectRef]:
         """Update the model weights from collective communication."""
         raise NotImplementedError
+
+    # Optional hook; backends may override to invalidate any reusable caches
+    # (e.g., vLLM prefix/KV caches) after weight updates.
+    def invalidate_kv_cache(self) -> bool:
+        return False
diff --git a/nemo_rl/models/generation/vllm.py b/nemo_rl/models/generation/vllm.py
deleted file mode 100644
index c26109fc93..0000000000
--- a/nemo_rl/models/generation/vllm.py
+++ /dev/null
@@ -1,2020 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import asyncio
-import copy
-import gc
-import os
-import sys
-import uuid
-from collections import defaultdict
-from typing import (
-    Any,
-    AsyncGenerator,
-    NotRequired,
-    Optional,
-    TypedDict,
-    Union,
-    cast,
-)
-
-import numpy as np
-import ray
-import torch
-from ray.util.placement_group import PlacementGroup
-
-from nemo_rl.distributed.batched_data_dict import BatchedDataDict, SlicedDataDict
-from nemo_rl.distributed.named_sharding import NamedSharding
-from nemo_rl.distributed.virtual_cluster import (
-    RayVirtualCluster,
-)
-from nemo_rl.distributed.worker_group_utils import get_nsight_config_if_pattern_matches
-from nemo_rl.distributed.worker_groups import (
-    RayWorkerBuilder,
-    RayWorkerGroup,
-)
-from nemo_rl.models.generation.interfaces import (
-    GenerationConfig,
-    GenerationDatumSpec,
-    GenerationInterface,
-    GenerationOutputSpec,
-    verify_right_padding,
-)
-from nemo_rl.models.huggingface.common import ModelFlag
-from nemo_rl.models.policy.utils import is_vllm_v1_engine_enabled
-
-
-class VllmSpecificArgs(TypedDict):
-    tensor_parallel_size: int
-    pipeline_parallel_size: int
-    gpu_memory_utilization: float
-    max_model_len: int
-    # Additional arguments for vLLM inserted by nemo rl based on the context of when vllm is used
-    skip_tokenizer_init: bool
-    async_engine: bool
-    load_format: NotRequired[str]
-    precision: NotRequired[str]
-    enforce_eager: NotRequired[bool]
-
-
-class VllmConfig(GenerationConfig):
-    vllm_cfg: VllmSpecificArgs
-    vllm_kwargs: NotRequired[dict[str, Any]]
-
-
-@ray.remote(
-    runtime_env={**get_nsight_config_if_pattern_matches("vllm_generation_worker")}
-)  # pragma: no cover
-class VllmGenerationWorker:
-    def __repr__(self) -> str:
-        """Customizes the actor's prefix in the Ray logs.
-
-        This makes it easier to identify which worker is producing specific log messages.
-        """
-        return f"{self.__class__.__name__}"
-
-    @staticmethod
-    def configure_worker(
-        num_gpus: int | float, bundle_indices: Optional[tuple[int, list[int]]] = None
-    ) -> tuple[dict[str, Any], dict[str, str], dict[str, Any]]:
-        """Provides complete worker configuration for vLLM tensor and pipeline parallelism.
-
-        This method configures the worker based on its role in tensor and pipeline parallelism,
-        which is determined directly from the bundle_indices parameter.
-
-        Args:
-            num_gpus: Original GPU allocation for this worker based on the placement group
-            bundle_indices: Tuple of (node_idx, local_bundle_indices) for parallelism (if applicable)
-
-        Returns:
-            tuple with complete worker configuration:
-              - 'resources': Resource allocation (e.g., num_gpus)
-              - 'env_vars': Environment variables for this worker
-              - 'init_kwargs': Parameters to pass to __init__ of the worker
-        """
-        # Initialize configuration
-        resources: dict[str, Any] = {"num_gpus": num_gpus}
-        init_kwargs: dict[str, Any] = {}
-        env_vars: dict[str, str] = {}
-
-        local_bundle_indices = None
-        if bundle_indices is not None:
-            node_idx = bundle_indices[0]
-            local_bundle_indices = bundle_indices[1]
-            init_kwargs["bundle_indices"] = local_bundle_indices
-
-            """
-            compute a unique seed from the node_idx and bundle_indices:
-            node_idx = 0, bundle_indices = [0, 1, 2, 3] -> seed = 0*1024 + 0
-            node_idx = 0, bundle_indices = [4, 5, 6, 7] -> seed = 0*1024 + 1
-            node_idx = 1, bundle_indices = [0, 1, 2, 3] -> seed = 1*1024 + 0
-            node_idx = 1, bundle_indices = [4, 5, 6, 7] -> seed = 1*1024 + 1
-            """
-            # For single worker groups, use a simpler seed calculation
-            if len(local_bundle_indices) == 1:
-                seed = node_idx * 1024 + local_bundle_indices[0]
-            else:
-                # For parallel groups, use the original calculation
-                bundle_id = local_bundle_indices[0] // len(local_bundle_indices)
-                seed = node_idx * 1024 + bundle_id
-
-            init_kwargs["seed"] = seed
-            # Need to give each DP group its own vllm cache to address:
-            # https://github.com/vllm-project/vllm/issues/18851
-            env_vars["VLLM_CACHE_ROOT"] = os.path.expanduser(f"~/.cache/vllm_{seed}")
-
-        # Check if this worker is part of a parallel group (TP or TP+PP).
-        # A worker is part of a parallel group if it's a secondary member (local_bundle_indices is None)
-        # or if it's a primary member of a group with multiple workers.
-        is_part_of_parallel_workers = (
-            local_bundle_indices is not None and len(local_bundle_indices) > 1
-        ) or local_bundle_indices is None
-
-        if is_part_of_parallel_workers:
-            # Ray + vllm likes to manage GPU assignment internally for parallel groups
-            resources["num_gpus"] = 0
-            env_vars["RAY_EXPERIMENTAL_NOSET_CUDA_VISIBLE_DEVICES"] = "1"
-            init_kwargs["fraction_of_gpus"] = num_gpus
-
-        env_vars["VLLM_ENABLE_V1_MULTIPROCESSING"] = "0"
-        # Skip vllm P2P check and rely on driver to report peer to peer capability.
-        env_vars["VLLM_SKIP_P2P_CHECK"] = "1"
-
-        return resources, env_vars, init_kwargs
-
-    def __init__(
-        self,
-        config: VllmConfig,
-        bundle_indices: Optional[list[int]] = None,
-        fraction_of_gpus: float = 1.0,
-        seed: Optional[int] = None,
-    ):
-        """Initialize a vLLM worker for distributed inference.
-
-        Args:
-            config: Configuration dictionary for the policy
-            bundle_indices: List of local bundle indices within a node for parallelism.
-                          Only needed for the first worker in each tied worker group.
-            fraction_of_gpus: Fraction of GPUs to use for this worker
-            seed: Random seed for initialization
-        """
-        self.cfg = config
-
-        self.model_name = self.cfg["model_name"]
-        self.tensor_parallel_size = self.cfg["vllm_cfg"]["tensor_parallel_size"]
-        self.pipeline_parallel_size = self.cfg["vllm_cfg"]["pipeline_parallel_size"]
-        self.gpu_memory_utilization = self.cfg["vllm_cfg"]["gpu_memory_utilization"]
-        self.fraction_of_gpus = fraction_of_gpus
-        self.is_model_owner = bundle_indices is not None
-
-        # Store the Python executable being used by this worker
-        self.py_executable = sys.executable
-
-        # Skip model loading if we're not the model owner
-        if not self.is_model_owner:
-            self.llm = None
-            self.tokenizer = None
-            self.rank = 0
-            self.world_size = 1
-            return
-
-        # In Ray+vLLM setup, each worker process considers itself rank 0
-        # vLLM handles the parallelism internally through Ray
-        self.rank = 0
-        self.world_size = 1
-
-        # Monkey patch for vLLM to ensure RAY_ADDRESS is set in Ray actors.
-        try:
-            import vllm.utils
-            from vllm.logger import init_logger
-            from vllm.utils import cuda_is_initialized, is_in_ray_actor
-
-            logger = init_logger("vllm_patch")
-
-            def _patched_maybe_force_spawn():
-                """Patched version of vllm.utils._maybe_force_spawn.
-
-                This patch changes an `elif is_in_ray_actor()` to an `if` statement.
-                This ensures that `os.environ["RAY_ADDRESS"]` is set when running
-                within a Ray actor, even if CUDA has already been initialized.
-                This is crucial for vLLM workers to connect back to the Ray cluster.
-                """
-                if os.environ.get("VLLM_WORKER_MULTIPROC_METHOD") == "spawn":
-                    return
-
-                reason = None
-                if cuda_is_initialized():
-                    reason = "CUDA is initialized"
-
-                if is_in_ray_actor():
-                    # even if we choose to spawn, we need to pass the ray address
-                    # to the subprocess so that it knows how to connect to the ray cluster.
-                    # env vars are inherited by subprocesses, even if we use spawn.
-                    import ray
-
-                    os.environ["RAY_ADDRESS"] = ray.get_runtime_context().gcs_address
-                    if reason is None:
-                        reason = "In a Ray actor and can only be spawned"
-
-                if reason is not None:
-                    logger.warning(
-                        "We must use the `spawn` multiprocessing start method. "
-                        "Overriding VLLM_WORKER_MULTIPROC_METHOD to 'spawn'. "
-                        "See https://docs.vllm.ai/en/latest/getting_started/"
-                        "troubleshooting.html#python-multiprocessing "
-                        "for more information. Reason: %s",
-                        reason,
-                    )
-                    os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
-
-            vllm.utils._maybe_force_spawn = _patched_maybe_force_spawn
-            logger.info("Successfully patched vllm.utils._maybe_force_spawn.")
-
-            def _patch_vllm_init_workers_ray():
-                # Patch the vLLM ray_distributed_executor.py file to pass custom runtime_env in _init_workers_ray call.
-                # This allows passing custom py_executable to worker initialization.
-
-                try:
-                    import vllm.executor.ray_distributed_executor as ray_executor_module
-
-                    file_to_patch = ray_executor_module.__file__
-
-                    with open(file_to_patch, "r") as f:
-                        content = f.read()
-
-                    old_line = "self._init_workers_ray(placement_group)"
-                    new_line = f'self._init_workers_ray(placement_group, runtime_env={{"py_executable": "{self.py_executable}"}})'
-
-                    if new_line in content:
-                        return
-
-                    if old_line not in content:
-                        return
-
-                    patched_content = content.replace(old_line, new_line)
-
-                    # Write back the patched content
-                    with open(file_to_patch, "w") as f:
-                        f.write(patched_content)
-
-                except (ImportError, FileNotFoundError, PermissionError):
-                    # Allow failures gracefully
-                    pass
-
-            _patch_vllm_init_workers_ray()
-
-        except (ImportError, AttributeError):
-            # vllm not installed or has a different structure, skipping patch.
-            pass
-
-        try:
-            import vllm
-
-            self.SamplingParams = vllm.SamplingParams
-        except ImportError:
-            raise ImportError(
-                "vLLM is not installed. Please check that the py_executable in the runtime_env of VllmGenerationWorker "
-                "covers the vllm dependency. You may have to update nemo_rl/distributed/ray_actor_environment_registry.py. "
-                "If you are working interactively, you can install by running  `uv sync --extra vllm` anywhere in the repo."
-            )
-        vllm_kwargs: dict[str, Any] = copy.deepcopy(self.cfg.get("vllm_kwargs", {}))
-
-        # Calculate total parallel size (TP * PP)
-        model_parallel_size = self.tensor_parallel_size * self.pipeline_parallel_size
-
-        # Special handling for parallel case (either TP or PP or both)
-        if model_parallel_size > 1:
-            # Configure vLLM for tensor/pipeline parallelism within Ray
-            # Reset CUDA_VISIBLE_DEVICES to allow vLLM to manage GPU assignment
-            os.environ.pop("CUDA_VISIBLE_DEVICES", None)
-            os.environ["VLLM_RAY_PER_WORKER_GPUS"] = str(
-                self.fraction_of_gpus / model_parallel_size
-            )
-
-            # Set bundle indices for parallel workers
-            bundle_indices_str = ",".join(map(str, bundle_indices))
-            os.environ["VLLM_RAY_BUNDLE_INDICES"] = bundle_indices_str
-            print(
-                f"VLLM_RAY_BUNDLE_INDICES environment variable set to: {os.environ.get('VLLM_RAY_BUNDLE_INDICES')}"
-            )
-
-            # Use Ray for distributed execution in parallel mode
-            vllm_kwargs["distributed_executor_backend"] = "ray"
-        else:
-            # For non-parallel mode, explicitly set executor to None to avoid Ray issues
-            vllm_kwargs["distributed_executor_backend"] = None
-
-        os.environ["VLLM_USE_V1"] = "1" if is_vllm_v1_engine_enabled() else "0"
-        os.environ["VLLM_ALLOW_INSECURE_SERIALIZATION"] = "1"
-
-        load_format = self.cfg["vllm_cfg"]["load_format"]
-        if ModelFlag.VLLM_LOAD_FORMAT_AUTO.matches(self.model_name):
-            load_format = "auto"
-
-        llm_kwargs = dict(
-            model=self.model_name,
-            load_format=load_format,
-            skip_tokenizer_init=self.cfg["vllm_cfg"]["skip_tokenizer_init"],
-            tensor_parallel_size=self.tensor_parallel_size,
-            pipeline_parallel_size=self.pipeline_parallel_size,
-            gpu_memory_utilization=self.gpu_memory_utilization,
-            enable_prefix_caching=torch.cuda.get_device_capability()[0] >= 8,
-            dtype=self.cfg["vllm_cfg"]["precision"],
-            seed=seed,
-            enforce_eager=self.cfg["vllm_cfg"]["enforce_eager"],
-            max_model_len=self.cfg["vllm_cfg"]["max_model_len"],
-            trust_remote_code=True,
-            worker_extension_cls="nemo_rl.models.generation.vllm_backend.VllmInternalWorkerExtension",
-            enable_sleep_mode=True,
-            disable_log_stats=True,
-            **vllm_kwargs,
-        )
-
-        if self.cfg["vllm_cfg"]["async_engine"]:
-            from vllm.engine.arg_utils import AsyncEngineArgs
-            from vllm.v1.engine.async_llm import AsyncLLM
-
-            self.llm = AsyncLLM.from_engine_args(AsyncEngineArgs(**llm_kwargs))
-        else:
-            self.llm = vllm.LLM(**llm_kwargs)
-
-        # will be initialized in post_init
-        # used in update_weights_from_ipc_handles
-        self.vllm_device_ids = None
-
-    def post_init(self):
-        self.vllm_device_ids = self.report_device_id()
-
-    async def post_init_async(self):
-        self.vllm_device_ids = await self.report_device_id_async()
-
-    def init_collective(
-        self, rank_prefix: int, ip: str, port: int, world_size: int
-    ) -> None:
-        self.llm.collective_rpc(
-            "init_collective",
-            args=(
-                rank_prefix,
-                ip,
-                port,
-                world_size,
-            ),
-        )
-
-    async def init_collective_async(
-        self, rank_prefix: int, ip: str, port: int, world_size: int
-    ) -> None:
-        await self.llm.collective_rpc(
-            "init_collective",
-            args=(
-                rank_prefix,
-                ip,
-                port,
-                world_size,
-            ),
-        )
-
-    def llm(self):
-        return self.llm
-
-    def is_alive(self):
-        """Check if the worker is alive."""
-        return True
-
-    def _merge_stop_strings(self, batch_stop_strings):
-        stop_set: set[str] = set()
-
-        if self.cfg.get("stop_strings"):
-            stop_set.update(self.cfg["stop_strings"])
-
-        if batch_stop_strings is not None:
-            for sample_ss in batch_stop_strings:
-                if sample_ss:
-                    stop_set.update(sample_ss)
-
-        return list(stop_set) if stop_set else None
-
-    def _build_sampling_params(
-        self,
-        *,
-        greedy: bool,
-        stop_strings,
-        max_new_tokens: Optional[int] = None,
-    ):
-        top_k_cfg = self.cfg["top_k"]
-        top_k_val = 1 if greedy else (top_k_cfg if top_k_cfg is not None else -1)
-
-        temperature = 0.0 if greedy else self.cfg["temperature"]
-
-        max_tokens = (
-            max_new_tokens if max_new_tokens is not None else self.cfg["max_new_tokens"]
-        )
-
-        return self.SamplingParams(
-            temperature=temperature,
-            top_p=self.cfg["top_p"],
-            top_k=top_k_val,
-            max_tokens=max_tokens,
-            logprobs=0,
-            stop_token_ids=self.cfg["stop_token_ids"],
-            stop=stop_strings,
-            include_stop_str_in_output=True,
-        )
-
-    def generate(
-        self, data: BatchedDataDict[GenerationDatumSpec], greedy: bool = False
-    ) -> BatchedDataDict[GenerationOutputSpec]:
-        """Generate a batch of data using vLLM generation.
-
-        Args:
-            data: BatchedDataDict containing input_ids and input_lengths tensors
-            greedy: Whether to use greedy decoding instead of sampling
-
-        Returns:
-            BatchedDataDict conforming to GenerationOutputSpec:
-                - output_ids: input + generated token IDs with proper padding
-                - logprobs: Log probabilities for tokens
-                - generation_lengths: Lengths of each response
-                - unpadded_sequence_lengths: Lengths of each input + generated sequence
-        """
-        # Handle empty input case
-        if len(data["input_ids"]) == 0:
-            # Return empty BatchedDataDict with all required fields
-            return BatchedDataDict[GenerationOutputSpec](
-                {
-                    "output_ids": torch.zeros((0, 0), dtype=torch.long),
-                    "logprobs": torch.zeros((0, 0), dtype=torch.float),
-                    "generation_lengths": torch.zeros(0, dtype=torch.long),
-                    "unpadded_sequence_lengths": torch.zeros(0, dtype=torch.long),
-                }
-            )
-
-        input_ids = data["input_ids"]
-        input_lengths = data["input_lengths"]
-        batch_stop_strings: list[list[str]] = data.get("stop_strings", [])
-        stop_strings = self._merge_stop_strings(batch_stop_strings)
-        sampling_params = self._build_sampling_params(
-            greedy=greedy,
-            stop_strings=stop_strings,
-        )
-
-        # verify inputs have correct padding
-        verify_right_padding(data, pad_value=self.cfg["pad_token_id"])
-
-        # Convert inputs to vLLM format
-        batch_size = input_ids.shape[0]
-        # Original input length with padding
-        padded_input_length = input_ids.size(1)
-
-        # Prepare prompts for vLLM (removing padding)
-        prompts = []
-
-        for i in range(batch_size):
-            # Use input_lengths to get only valid tokens (not padding)
-            valid_length = input_lengths[i].item()
-            valid_ids = (
-                input_ids[i, :valid_length] if valid_length > 0 else input_ids[i, :0]
-            )
-            token_ids = valid_ids.tolist()
-
-            prompts.append({"prompt_token_ids": token_ids})
-
-        # Generate outputs
-        assert self.llm is not None, (
-            "Attempting to generate with either an uninitialized vLLM or non-model-owner"
-        )
-        outputs = self.llm.generate(prompts, sampling_params)
-
-        # Process the outputs - but preserve the original input padding structure
-        output_ids_list = []
-        logprobs_list = []
-        generation_lengths = []
-        unpadded_sequence_lengths = []
-        max_length = 0
-        for output in outputs:
-            max_length = max(max_length, len(output.outputs[0].token_ids))
-
-        for i, output in enumerate(outputs):
-            # Extract generated tokens
-            sequence_length = input_lengths[i]
-            generation = output.outputs[0]
-            generated_tokens = list(generation.token_ids)
-
-            # Calculate total sequence length (original input length + generated tokens)
-            total_length = padded_input_length + max_length
-
-            # Create a new tensor with the right size and fill with padding token
-            full_output = torch.full(
-                (total_length,), self.cfg["pad_token_id"], dtype=input_ids.dtype
-            )
-
-            # Copy original input (with padding) into the beginning
-            full_output[:sequence_length] = input_ids[i][:sequence_length]
-
-            # Add generated tokens after the original input
-            full_output[sequence_length : sequence_length + len(generated_tokens)] = (
-                torch.tensor(generated_tokens)
-            )
-
-            output_ids_list.append(full_output)
-            full_logprobs = torch.zeros(total_length, dtype=torch.float32)
-            if hasattr(generation, "logprobs") and generation.logprobs:
-                try:
-                    for idx, logprob_dict in enumerate(generation.logprobs):
-                        if logprob_dict:
-                            position = sequence_length + idx
-                            full_logprobs[position] = next(iter(logprob_dict.items()))[
-                                1
-                            ].logprob
-                except Exception:
-                    import traceback
-
-                    traceback.print_exc()
-
-            logprobs_list.append(full_logprobs)
-
-            response_length = sequence_length + len(generated_tokens)
-            generation_lengths.append(len(generated_tokens))
-            unpadded_sequence_lengths.append(response_length)
-            assert response_length <= self.llm.llm_engine.model_config.max_model_len, (
-                f"response_length={response_length} > max_model_len={self.llm.llm_engine.model_config.max_model_len}, which should not happen. Please check this behavior in isolation by running `uv run --extra vllm tools/model_diagnostics/1.max_model_len_respected.py {self.llm.llm_engine.model_config.model}` and raise this issue with the vllm team."
-            )
-
-        # Create return data conforming to GenerationOutputSpec
-        output_ids = torch.stack(output_ids_list)
-        logprobs = torch.stack(logprobs_list)
-
-        return_data = BatchedDataDict[GenerationOutputSpec](
-            {
-                "output_ids": output_ids,
-                "logprobs": logprobs,
-                "generation_lengths": torch.tensor(
-                    generation_lengths, dtype=torch.long
-                ),
-                "unpadded_sequence_lengths": torch.tensor(
-                    unpadded_sequence_lengths, dtype=torch.long
-                ),
-            }
-        )
-
-        return return_data
-
-    async def generate_async(
-        self,
-        data: BatchedDataDict[GenerationDatumSpec],
-        greedy: bool = False,
-    ) -> AsyncGenerator[tuple[int, BatchedDataDict[GenerationOutputSpec]], None]:
-        """Generate a batch of data using vLLM's AsyncLLMEngine, yielding results as they are ready.
-
-        Args:
-            data: BatchedDataDict with input_ids and input_lengths
-            greedy: Whether to use greedy decoding instead of sampling
-
-        Yields:
-            Tuple of (original_index, BatchedDataDict conforming to GenerationOutputSpec for the single sequence)
-        """
-        if not self.cfg["vllm_cfg"]["async_engine"]:
-            raise RuntimeError(
-                "generate_async can only be used when async_engine is enabled in vLLM config."
-            )
-
-        # Handle empty input case
-        if len(data["input_ids"]) == 0:
-            return
-
-        verify_right_padding(data, pad_value=self.cfg["pad_token_id"])
-
-        input_ids_batch = data["input_ids"]
-        input_lengths_batch = data["input_lengths"]
-        batch_size = input_ids_batch.shape[0]
-
-        # Ensure generate_async only receives single samples (batch_size = 1)
-        assert batch_size == 1, (
-            f"generate_async is restricted to handle only single samples, "
-            f"but received batch_size={batch_size}. Please handle batching outside this method."
-        )
-
-        batch_specific_stop_strings_list = data.get(
-            "stop_strings", [[] for _ in range(batch_size)]
-        )
-
-        # Create tasks for each sample in the batch
-        async def process_single_sample(sample_idx):
-            """Process a single sample and return the result."""
-            current_input_actual_length = input_lengths_batch[sample_idx].item()
-            prompt_token_ids_list = (
-                input_ids_batch[sample_idx, :current_input_actual_length].tolist()
-                if current_input_actual_length > 0
-                else []
-            )
-            prompt = {"prompt_token_ids": prompt_token_ids_list}
-
-            per_sample_stop_strings = None
-            if batch_specific_stop_strings_list and sample_idx < len(
-                batch_specific_stop_strings_list
-            ):
-                per_sample_stop_strings = batch_specific_stop_strings_list[sample_idx]
-
-            final_stop_strings_for_sample = self._merge_stop_strings(
-                [per_sample_stop_strings] if per_sample_stop_strings else None
-            )
-
-            remaining_ctx = (
-                self.cfg["vllm_cfg"]["max_model_len"] - current_input_actual_length
-            )
-            allowed_new_tokens = max(0, min(self.cfg["max_new_tokens"], remaining_ctx))
-
-            # Handle case where no tokens can be generated due to length constraints
-            if allowed_new_tokens == 0:
-                # Access the input data directly from the function parameters
-                input_ids_single_row = input_ids_batch[sample_idx]
-
-                # Create output tensors with just the input (no generated tokens)
-                output_ids_single_item_batched = input_ids_single_row[
-                    :current_input_actual_length
-                ].unsqueeze(0)
-
-                logprobs_single_item = torch.zeros(
-                    (1, current_input_actual_length),
-                    dtype=torch.float32,
-                    device=input_ids_single_row.device,
-                )
-
-                generation_lengths_tensor = torch.tensor(
-                    [0], dtype=torch.long, device=input_ids_single_row.device
-                )
-
-                unpadded_sequence_lengths_tensor = torch.tensor(
-                    [current_input_actual_length],
-                    dtype=torch.long,
-                    device=input_ids_single_row.device,
-                )
-
-                result_batch = BatchedDataDict[GenerationOutputSpec](
-                    {
-                        "output_ids": output_ids_single_item_batched,
-                        "logprobs": logprobs_single_item,
-                        "generation_lengths": generation_lengths_tensor,
-                        "unpadded_sequence_lengths": unpadded_sequence_lengths_tensor,
-                    }
-                )
-
-                return (sample_idx, result_batch)
-
-            sampling_params_for_request = self._build_sampling_params(
-                greedy=greedy,
-                stop_strings=final_stop_strings_for_sample,
-                max_new_tokens=allowed_new_tokens,
-            )
-
-            request_id = str(uuid.uuid4())
-
-            # Generate using vLLM async engine
-            vllm_request_generator = self.llm.generate(
-                prompt=prompt,
-                sampling_params=sampling_params_for_request,
-                request_id=request_id,
-            )
-
-            # Get the final result from the generator
-            final_request_output = None
-            async for req_output in vllm_request_generator:
-                final_request_output = req_output
-
-            if final_request_output is None:
-                raise RuntimeError(f"No output received for request {request_id}")
-
-            # Process the output
-            generation_details = final_request_output.outputs[0]
-            generated_token_ids = list(generation_details.token_ids)
-            num_generated_tokens = len(generated_token_ids)
-
-            original_input_ids_single_row = input_ids_batch[sample_idx]
-            final_output_tensor_len = current_input_actual_length + num_generated_tokens
-
-            # Create output_ids tensor for this single item
-            output_ids_single_item = torch.full(
-                (final_output_tensor_len,),
-                self.cfg["pad_token_id"],
-                dtype=original_input_ids_single_row.dtype,
-                device=original_input_ids_single_row.device,
-            )
-            # Copy original input (up to its actual length)
-            output_ids_single_item[:current_input_actual_length] = (
-                original_input_ids_single_row[:current_input_actual_length]
-            )
-            # Add generated tokens after the actual input
-            output_ids_single_item[
-                current_input_actual_length : current_input_actual_length
-                + num_generated_tokens
-            ] = torch.tensor(
-                generated_token_ids,
-                dtype=original_input_ids_single_row.dtype,
-                device=original_input_ids_single_row.device,
-            )
-
-            # Reshape to (1, seq_len) for BatchedDataDict
-            output_ids_single_item_batched = output_ids_single_item.unsqueeze(0)
-
-            # Create logprobs tensor for this single item
-            logprobs_single_item = torch.zeros(
-                (1, final_output_tensor_len),
-                dtype=torch.float32,
-                device=original_input_ids_single_row.device,
-            )
-            if hasattr(generation_details, "logprobs") and generation_details.logprobs:
-                for idx, logprob_dict_per_token in enumerate(
-                    generation_details.logprobs
-                ):
-                    if logprob_dict_per_token and idx < len(generated_token_ids):
-                        token_id_at_idx = generated_token_ids[idx]
-                        if token_id_at_idx in logprob_dict_per_token:
-                            logprob_value = logprob_dict_per_token[
-                                token_id_at_idx
-                            ].logprob
-                            position_in_output_tensor = (
-                                current_input_actual_length + idx
-                            )
-                            if position_in_output_tensor < final_output_tensor_len:
-                                logprobs_single_item[0, position_in_output_tensor] = (
-                                    logprob_value
-                                )
-
-            # Generation lengths
-            generation_lengths_tensor = torch.tensor(
-                [num_generated_tokens],
-                dtype=torch.long,
-                device=original_input_ids_single_row.device,
-            )
-
-            # Unpadded sequence lengths (actual_input + actual_generated)
-            unpadded_total_length = current_input_actual_length + num_generated_tokens
-            unpadded_sequence_lengths_tensor = torch.tensor(
-                [unpadded_total_length],
-                dtype=torch.long,
-                device=original_input_ids_single_row.device,
-            )
-
-            result_batch = BatchedDataDict[GenerationOutputSpec](
-                {
-                    "output_ids": output_ids_single_item_batched,
-                    "logprobs": logprobs_single_item,
-                    "generation_lengths": generation_lengths_tensor,
-                    "unpadded_sequence_lengths": unpadded_sequence_lengths_tensor,
-                }
-            )
-
-            return (sample_idx, result_batch)
-
-        # Create tasks for all samples and yield results as they complete
-        sample_tasks = [
-            asyncio.create_task(process_single_sample(i)) for i in range(batch_size)
-        ]
-
-        # Yield results as they become available
-        for completed_task in asyncio.as_completed(sample_tasks):
-            try:
-                result = await completed_task
-                yield result
-            except Exception as e:
-                # Cancel remaining tasks
-                for task in sample_tasks:
-                    if not task.done():
-                        task.cancel()
-                await asyncio.gather(*sample_tasks, return_exceptions=True)
-                raise e
-
-    def generate_text(
-        self, data: BatchedDataDict[GenerationDatumSpec], greedy: bool = False
-    ) -> BatchedDataDict[GenerationOutputSpec]:
-        """Generate text responses using vLLM generation.
-
-        Args:
-            data: BatchedDataDict containing prompts with text strings
-            greedy: Whether to use greedy decoding instead of sampling
-
-        Returns:
-            BatchedDataDict containing:
-                - texts: List of generated text responses
-        """
-        # Check if async engine is enabled
-        if self.cfg["vllm_cfg"]["async_engine"]:
-            raise RuntimeError(
-                "generate_text cannot be used with async_engine=True. Use generate_text_async instead."
-            )
-
-        # Extract stop_strings if provided, else use default from config
-        batch_stop_strings: list[list[str] | None] = data.get(
-            "stop_strings", [self.cfg.get("stop_strings")] * len(data["prompts"])
-        )
-
-        # This function requires all generations have the same stop strings, so we collect all here
-        stop_strings: set[str] = set()
-        for sample_stop_strings in batch_stop_strings:
-            if sample_stop_strings:
-                stop_strings.update(sample_stop_strings)
-
-        # Add default stop strings from config
-        if self.cfg.get("stop_strings", None):
-            stop_strings.update(self.cfg["stop_strings"])
-
-        stop_strings = list(stop_strings) if len(stop_strings) > 0 else None
-
-        # Read generation parameters from config
-        top_k = self.cfg["top_k"] if self.cfg["top_k"] is not None else -1
-        sampling_params = self.SamplingParams(
-            temperature=self.cfg["temperature"] if not greedy else 0,
-            top_p=self.cfg["top_p"],
-            top_k=top_k if not greedy else 1,
-            max_tokens=self.cfg["max_new_tokens"],
-            stop_token_ids=self.cfg["stop_token_ids"],
-            stop=stop_strings,
-            include_stop_str_in_output=True,  # returning stop strings like hf
-        )
-
-        # Generate outputs
-        assert self.llm is not None, (
-            "Attempting to generate with either an uninitialized vLLM or non-model-owner"
-        )
-        outputs = self.llm.generate(data["prompts"], sampling_params)
-        texts = [output.outputs[0].text for output in outputs]
-
-        # Convert to BatchedDataDict
-        return_data: BatchedDataDict[GenerationOutputSpec] = BatchedDataDict(
-            {"texts": texts}
-        )
-        return return_data
-
-    async def generate_text_async(
-        self, data: BatchedDataDict[GenerationDatumSpec], greedy: bool = False
-    ) -> AsyncGenerator[tuple[int, BatchedDataDict[GenerationOutputSpec]], None]:
-        """Generate text responses asynchronously, yielding results as they are ready.
-
-        Args:
-            data: BatchedDataDict containing prompts with text strings
-            greedy: Whether to use greedy decoding instead of sampling
-
-        Yields:
-            Tuple of (original_index, BatchedDataDict containing single text response)
-        """
-        if not self.cfg["vllm_cfg"]["async_engine"]:
-            raise RuntimeError(
-                "generate_text_async can only be used when async_engine is enabled in vLLM config."
-            )
-
-        # Handle empty input case
-        if len(data["prompts"]) == 0:
-            return
-
-        prompts = data["prompts"]
-        batch_size = len(prompts)
-
-        # Extract stop_strings if provided, else use default from config
-        batch_stop_strings: list[list[str] | None] = data.get(
-            "stop_strings", [self.cfg.get("stop_strings")] * batch_size
-        )
-
-        # Create tasks for each prompt
-        async def process_single_prompt(prompt_idx):
-            """Process a single prompt and return the result."""
-            prompt = prompts[prompt_idx]
-
-            # Get stop strings for this specific prompt
-            per_prompt_stop_strings = None
-            if batch_stop_strings and prompt_idx < len(batch_stop_strings):
-                per_prompt_stop_strings = batch_stop_strings[prompt_idx]
-
-            # Merge stop strings
-            final_stop_strings = self._merge_stop_strings(
-                [per_prompt_stop_strings] if per_prompt_stop_strings else None
-            )
-
-            # Create sampling parameters
-            top_k = self.cfg["top_k"] if self.cfg["top_k"] is not None else -1
-            sampling_params = self.SamplingParams(
-                temperature=self.cfg["temperature"] if not greedy else 0,
-                top_p=self.cfg["top_p"],
-                top_k=top_k if not greedy else 1,
-                max_tokens=self.cfg["max_new_tokens"],
-                stop_token_ids=self.cfg["stop_token_ids"],
-                stop=final_stop_strings,
-                include_stop_str_in_output=True,  # returning stop strings like hf
-            )
-
-            request_id = str(uuid.uuid4())
-
-            # Generate using vLLM async engine
-            vllm_request_generator = self.llm.generate(
-                prompt=prompt,
-                sampling_params=sampling_params,
-                request_id=request_id,
-            )
-
-            # Get the final result from the generator
-            final_request_output = None
-            async for req_output in vllm_request_generator:
-                final_request_output = req_output
-
-            if final_request_output is None:
-                raise RuntimeError(f"No output received for request {request_id}")
-
-            # Extract the generated text
-            generated_text = final_request_output.outputs[0].text
-
-            # Create result in BatchedDataDict format
-            result_batch = BatchedDataDict[GenerationOutputSpec](
-                {"texts": [generated_text]}
-            )
-
-            return (prompt_idx, result_batch)
-
-        # Create tasks for all prompts and yield results as they complete
-        prompt_tasks = [
-            asyncio.create_task(process_single_prompt(i)) for i in range(batch_size)
-        ]
-
-        # Yield results as they become available
-        for completed_task in asyncio.as_completed(prompt_tasks):
-            try:
-                result = await completed_task
-                yield result
-            except Exception as e:
-                # Cancel remaining tasks
-                for task in prompt_tasks:
-                    if not task.done():
-                        task.cancel()
-                await asyncio.gather(*prompt_tasks, return_exceptions=True)
-                raise e
-
-    def shutdown(self) -> bool:
-        """Clean up vLLM resources."""
-        try:
-            if self.llm is not None:
-                is_async_engine = self.cfg.get("vllm_cfg", {}).get(
-                    "async_engine", False
-                )
-
-                if is_async_engine:
-                    try:
-                        self.llm.shutdown()
-                    except Exception as e_stop:
-                        print(f"Error calling shutdown_background_loop: {e_stop}")
-                # Explicitly delete the engine. This may trigger its __del__ method.
-                del self.llm
-
-            self.llm = None
-            self.tokenizer = None
-
-            # Force garbage collection
-            gc.collect()
-            torch.cuda.empty_cache()
-
-            return True
-        except Exception as e:
-            print(f"Error during vLLM shutdown: {e}")
-            return False
-
-    def report_device_id(self) -> list[str]:
-        """Report device ID from the vLLM worker."""
-        assert self.llm is not None, (
-            "Attempting to report device id with either an uninitialized vLLM or non-model-owner"
-        )
-
-        if self.cfg["vllm_cfg"]["async_engine"]:
-            raise RuntimeError(
-                "report_device_id cannot be used with async_engine=True. Use report_device_id_async instead."
-            )
-
-        list_of_worker_results = self.llm.collective_rpc(
-            "report_device_id", args=tuple()
-        )
-        return cast(list[str], list_of_worker_results)
-
-    async def report_device_id_async(self) -> list[str]:
-        """Async version of report_device_id."""
-        assert self.llm is not None, (
-            "Attempting to report device id with either an uninitialized vLLM or non-model-owner"
-        )
-
-        if not self.cfg["vllm_cfg"]["async_engine"]:
-            raise RuntimeError(
-                "report_device_id_async can only be used with async_engine=True. Use report_device_id instead."
-            )
-
-        result_or_coro = await self.llm.collective_rpc("report_device_id", args=tuple())
-
-        if asyncio.iscoroutine(result_or_coro):
-            list_of_worker_results = await result_or_coro
-        else:
-            list_of_worker_results = result_or_coro
-
-        return cast(list[str], list_of_worker_results)
-
-    def prepare_refit_info(self, state_dict_info: dict[str, Any]) -> None:
-        """Prepare the info for refit."""
-        self.llm.collective_rpc("prepare_refit_info", args=(state_dict_info,))
-
-    async def prepare_refit_info_async(self, state_dict_info: dict[str, Any]) -> None:
-        """Async version of prepare_refit_info."""
-        await self.llm.collective_rpc("prepare_refit_info", args=(state_dict_info,))
-
-    def update_weights_from_ipc_handles(self, ipc_handles: dict[str, Any]) -> bool:
-        """Update weights from IPC handles by delegating to the vLLM Worker implementation.
-
-        Args:
-            ipc_handles (dict): Dictionary mapping device UUIDs (str) to parameter IPC handles.
-
-        Returns:
-            bool: True if weights were successfully updated, False otherwise.
-        """
-        try:
-            assert self.llm is not None, (
-                "Attempting to update weights with either an uninitialized vLLM or non-model-owner"
-            )
-
-            if self.cfg["vllm_cfg"]["async_engine"]:
-                raise RuntimeError(
-                    "update_weights_from_ipc_handles cannot be used with async_engine=True. Use update_weights_from_ipc_handles_async instead."
-                )
-
-            if self.tensor_parallel_size == 1:
-                # UniProcExecutor
-                assert len(self.vllm_device_ids) == 1
-                result_or_coro = self.llm.collective_rpc(
-                    "update_weights_from_local_ipc_handles",
-                    args=(ipc_handles[self.vllm_device_ids[0]],),
-                )
-            else:
-                """
-                DO NOT USE VLLM's collective_rpc: This code causes duplicate IPC data transfer across Ray workers,
-                leading to unnecessary network serialization overhead and potential performance degradation.
-
-                result_or_coro = self.llm.collective_rpc(
-                    "update_weights_from_global_ipc_handles", args=(ipc_handles,)
-                )
-                """
-                ray_worker_outputs = []
-                # MultiProcExecutor
-                for worker, device_id in zip(
-                    self.llm.llm_engine.model_executor.workers, self.vllm_device_ids
-                ):
-                    ray_worker_outputs.append(
-                        worker.execute_method.remote(
-                            "update_weights_from_local_ipc_handles",
-                            ipc_handles[device_id],
-                        )
-                    )
-
-                # Gather the results
-                result_or_coro = ray.get(ray_worker_outputs)
-
-            worker_result = result_or_coro[0]
-
-            if not worker_result:
-                print(
-                    f"Error: Worker failed to update weights. Result: {worker_result}"
-                )
-                return False
-            return True
-        except Exception as e:
-            print(f"Exception during collective_rpc for weight update: {e}")
-            import traceback
-
-            traceback.print_exc()
-            return False
-
-    async def update_weights_from_ipc_handles_async(
-        self, ipc_handles: dict[str, Any]
-    ) -> bool:
-        """Async version of update_weights_from_ipc_handles.
-
-        Args:
-            ipc_handles (dict): Dictionary mapping device UUIDs (str) to parameter IPC handles.
-
-        Returns:
-            bool: True if weights were successfully updated, False otherwise.
-        """
-        try:
-            assert self.llm is not None, (
-                "Attempting to update weights with either an uninitialized vLLM or non-model-owner"
-            )
-
-            if not self.cfg["vllm_cfg"]["async_engine"]:
-                raise RuntimeError(
-                    "update_weights_from_ipc_handles_async can only be used with async_engine=True. Use update_weights_from_ipc_handles instead."
-                )
-
-            # TODO: switch to update_weights_from_local_ipc_handles for better performance once collectively report_device_id is supported in asyncLLM initialization
-            result_or_coro = await self.llm.collective_rpc(
-                "update_weights_from_global_ipc_handles", args=(ipc_handles,)
-            )
-
-            if asyncio.iscoroutine(result_or_coro):
-                worker_results = await result_or_coro
-            else:
-                worker_results = result_or_coro
-
-            worker_result = worker_results[0]
-
-            if not worker_result:
-                print(
-                    f"Error: Worker failed to update weights. Result: {worker_result}"
-                )
-                return False
-            return True
-        except Exception as e:
-            print(f"Exception during collective_rpc for weight update: {e}")
-            import traceback
-
-            traceback.print_exc()
-            return False
-
-    def update_weights_from_collective(self) -> bool:
-        """Update the model weights from collective communication."""
-        try:
-            assert self.llm is not None, (
-                "Attempting to update weights with either an uninitialized vLLM or non-model-owner"
-            )
-
-            if self.cfg["vllm_cfg"]["async_engine"]:
-                raise RuntimeError(
-                    "update_weights_from_collective can only be used with async_engine=False. Use update_weights_from_collective_async instead."
-                )
-
-            result_or_coro = self.llm.collective_rpc(
-                "update_weights_from_collective", args=tuple()
-            )
-            worker_result = result_or_coro[0]
-
-            if not worker_result:
-                print(
-                    f"Error: Worker failed to update weights. Result: {worker_result}"
-                )
-                return False
-            return True
-        except Exception as e:
-            print(f"Exception during collective_rpc for weight update: {e}")
-            import traceback
-
-            traceback.print_exc()
-            return False
-
-    async def update_weights_from_collective_async(self) -> bool:
-        """Async version of update_weights_from_collective."""
-        try:
-            assert self.llm is not None, (
-                "Attempting to update weights with either an uninitialized vLLM or non-model-owner"
-            )
-
-            if not self.cfg["vllm_cfg"]["async_engine"]:
-                raise RuntimeError(
-                    "update_weights_from_collective_async can only be used with async_engine=True. Use update_weights_from_collective instead."
-                )
-
-            result_or_coro = await self.llm.collective_rpc(
-                "update_weights_from_collective", args=tuple()
-            )
-
-            if asyncio.iscoroutine(result_or_coro):
-                worker_results = await result_or_coro
-            else:
-                worker_results = result_or_coro
-
-            worker_result = worker_results[0]
-
-            if not worker_result:
-                print(
-                    f"Error: Worker failed to update weights. Result: {worker_result}"
-                )
-                return False
-            return True
-        except Exception as e:
-            print(f"Exception during collective_rpc for weight update: {e}")
-            import traceback
-
-            traceback.print_exc()
-            return False
-
-    def reset_prefix_cache(self):
-        """Reset the prefix cache of vLLM engine."""
-        assert self.llm is not None, (
-            "Attempting to reset prefix cache with either an uninitialized vLLM or non-model-owner"
-        )
-
-        if self.cfg["vllm_cfg"]["async_engine"]:
-            raise RuntimeError(
-                "reset_prefix_cache can only be used with async_engine=False. Use reset_prefix_cache_async instead."
-            )
-
-        self.llm.llm_engine.reset_prefix_cache()
-        gc.collect()
-        torch.cuda.empty_cache()
-
-    async def reset_prefix_cache_async(self):
-        """Async version of reset_prefix_cache."""
-        assert self.llm is not None, (
-            "Attempting to reset prefix cache with either an uninitialized vLLM or non-model-owner"
-        )
-
-        if not self.cfg["vllm_cfg"]["async_engine"]:
-            raise RuntimeError(
-                "reset_prefix_cache_async can only be used with async_engine=True. Use reset_prefix_cache instead."
-            )
-
-        await self.llm.reset_prefix_cache()
-        gc.collect()
-        torch.cuda.empty_cache()
-
-    def sleep(self):
-        """Put the vLLM engine to sleep."""
-        assert self.llm is not None, (
-            "Attempting to sleep with either an uninitialized vLLM or non-model-owner"
-        )
-
-        if self.cfg["vllm_cfg"]["async_engine"]:
-            raise RuntimeError(
-                "sleep cannot be used with async_engine=True. Use sleep_async instead."
-            )
-
-        # Reset the prefix cache to ensure that prefix cache is not reused after weights are updated
-        self.llm.llm_engine.reset_prefix_cache()
-        self.llm.sleep(level=1)
-
-        gc.collect()
-        torch.cuda.empty_cache()
-
-    async def sleep_async(self):
-        """Async version of sleep."""
-        assert self.llm is not None, (
-            "Attempting to sleep with either an uninitialized vLLM or non-model-owner"
-        )
-
-        if not self.cfg["vllm_cfg"]["async_engine"]:
-            raise RuntimeError(
-                "sleep_async can only be used with async_engine=True. Use sleep instead."
-            )
-
-        # Reset the prefix cache to ensure that prefix cache is not reused after weights are updated
-        await self.llm.reset_prefix_cache()
-        await self.llm.sleep(level=1)
-
-        gc.collect()
-        torch.cuda.empty_cache()
-
-    def wake_up(self, **kwargs):
-        """Wake up the vLLM engine."""
-        assert self.llm is not None, (
-            "Attempting to wake up with either an uninitialized vLLM or non-model-owner"
-        )
-
-        if self.cfg["vllm_cfg"]["async_engine"]:
-            raise RuntimeError(
-                "wake_up cannot be used with async_engine=True. Use wake_up_async instead."
-            )
-
-        tags = kwargs.get("tags")
-
-        wake_up_args = {}
-        if tags is not None:
-            wake_up_args["tags"] = tags
-
-        self.llm.wake_up(**wake_up_args)
-
-    async def wake_up_async(self, **kwargs):
-        """Async version of wake_up."""
-        assert self.llm is not None, (
-            "Attempting to wake up with either an uninitialized vLLM or non-model-owner"
-        )
-
-        if not self.cfg["vllm_cfg"]["async_engine"]:
-            raise RuntimeError(
-                "wake_up_async can only be used with async_engine=True. Use wake_up instead."
-            )
-
-        tags = kwargs.get("tags")
-
-        wake_up_args = {}
-        if tags is not None:
-            wake_up_args["tags"] = tags
-
-        await self.llm.wake_up(**wake_up_args)
-
-    def start_gpu_profiling(self) -> None:
-        """Start GPU profiling."""
-        torch.cuda.profiler.start()
-
-    def stop_gpu_profiling(self) -> None:
-        """Stop GPU profiling."""
-        torch.cuda.profiler.stop()
-
-
-class VllmGeneration(GenerationInterface):
-    def __init__(
-        self,
-        cluster: RayVirtualCluster,
-        config: VllmConfig,
-        name_prefix: str = "vllm_policy",
-        workers_per_node: Optional[Union[int, list[int]]] = None,
-    ):
-        """Initialize a vLLM policy with distributed workers."""
-        # Store config
-        self.cfg = config
-        if self.cfg["vllm_cfg"]["pipeline_parallel_size"] > 1:
-            assert self.cfg["vllm_cfg"]["async_engine"], (
-                "When pipeline_parallel_size > 1, async_engine must be set to True in the vLLM configuration. "
-                "You can enable it by adding `policy.generation.vllm_cfg.async_engine=true` to your command."
-            )
-
-        # Ensure all required VllmConfig fields are present
-        missing_keys = [
-            key for key in VllmConfig.__required_keys__ if key not in self.cfg
-        ]
-        assert not missing_keys, (
-            f"VLLM Configuration Error: Missing required keys in VllmConfig.\n"
-            f"Missing keys: {', '.join(missing_keys)}\n"
-            f"Provided keys: {', '.join(self.cfg.keys())}\n"
-            f"Please update your configuration to include all required VLLM parameters."
-        )
-
-        self.sharding_annotations = NamedSharding(
-            layout=np.arange(cluster.world_size()).reshape(
-                -1,  # DP
-                config["vllm_cfg"]["pipeline_parallel_size"],  # PP
-                config["vllm_cfg"]["tensor_parallel_size"],  # TP
-            ),
-            names=["data_parallel", "pipeline_parallel", "tensor_parallel"],
-        )
-        self.model_parallel_size = self.sharding_annotations.get_axis_size(
-            "tensor_parallel"
-        ) * self.sharding_annotations.get_axis_size("pipeline_parallel")
-
-        # Determine if we need cross-node model parallelism
-        needs_cross_node_parallelism = (
-            self.model_parallel_size > cluster.num_gpus_per_node
-        )
-
-        # Initialize placement groups with the appropriate mode
-        cluster._init_placement_groups(use_unified_pg=needs_cross_node_parallelism)
-
-        # Create worker builder for VllmGenerationWorker
-        worker_builder = RayWorkerBuilder(
-            "nemo_rl.models.generation.vllm.VllmGenerationWorker", config
-        )
-
-        # It's necessary to set env_vars here to ensure that vllm non-leader workers also have these env_vars
-        # Explicitly set NCCL_CUMEM_ENABLE to 1 to avoid the P2P initialization error for PyNCCLCommunicator.
-        # See https://github.com/NVIDIA-NeMo/RL/issues/564 for more details.
-        env_vars = {}
-        if not self.cfg["colocated"]["enabled"]:
-            os.environ["NCCL_CUMEM_ENABLE"] = "1"
-
-        # Check if we need parallelism-aware worker group creation
-        if self.model_parallel_size > 1:
-            # For parallelism, create node-aware worker groups
-            node_bundle_indices = self._get_tied_worker_bundle_indices(cluster)
-
-            self.worker_group = RayWorkerGroup(
-                cluster,
-                worker_builder,
-                name_prefix=name_prefix,
-                bundle_indices_list=node_bundle_indices,
-                sharding_annotations=self.sharding_annotations,
-                env_vars=env_vars,
-            )
-        else:
-            # Use standard worker group creation for non-parallel case
-            self.worker_group = RayWorkerGroup(
-                cluster,
-                worker_builder,
-                name_prefix=name_prefix,
-                workers_per_node=workers_per_node,
-                sharding_annotations=self.sharding_annotations,
-                env_vars=env_vars,
-            )
-
-        # Call some collective rpc functions in VllmGenerationWorker when initializing the vLLM engine
-        # This is necessary for async engine to work
-        self._post_init()
-
-        # Number of data parallel groups is the number of tied worker groups
-        self.dp_size = self.worker_group.dp_size
-
-        # Used to track the round-robin selection of worker groups for generate_async
-        self.current_generate_dp_shard_idx = 0
-
-        # Save the device uuids for the workers
-        self.device_uuids = self._report_device_id()
-
-    def _get_tied_worker_bundle_indices(
-        self, cluster: RayVirtualCluster
-    ) -> list[tuple[int, list[int]]]:
-        """Calculate bundle indices for tensor and pipeline parallel workers.
-
-        Handles both unified placement groups (for cross-node model parallelism) and
-        per-node placement groups (for node-local model parallelism).
-        """
-        # Get the placement groups from the cluster
-        placement_groups = cluster.get_placement_groups()
-
-        if not placement_groups:
-            raise ValueError("No placement groups available in the cluster")
-
-        # Total parallel sizes
-        tp_size = self.sharding_annotations.get_axis_size("tensor_parallel")
-        pp_size = self.sharding_annotations.get_axis_size("pipeline_parallel")
-        model_parallel_size = tp_size * pp_size
-
-        if len(placement_groups) == 1:
-            # Single unified placement group used when we need multiple nodes for model parallelism
-            unified_pg = placement_groups[0]
-
-            def get_node_bundles(
-                pg: PlacementGroup,
-            ) -> dict[str, list[int]]:
-                # Retrieve mapping from node ID to bundle indices from a placement group.
-                try:
-                    pg_table = ray.util.placement_group_table(pg)
-                    bundle_to_node = pg_table["bundles_to_node_id"]
-                except Exception as e:
-                    raise RuntimeError(
-                        "Failed to retrieve bundle/node mapping from placement group"
-                    ) from e
-
-                node_bundles: dict[str, list[int]] = defaultdict(list)
-                for bundle_idx, node_id in bundle_to_node.items():
-                    node_bundles[node_id].append(bundle_idx)
-                for bundles in node_bundles.values():
-                    bundles.sort()
-                return dict(node_bundles)
-
-            def allocate_worker_groups(
-                pg: PlacementGroup, tp_size: int, pp_size: int
-            ) -> list[tuple[int, list[int]]]:
-                # Allocate worker groups for TP and PP training, assuming all nodes have identical bundle counts.
-
-                # Retrieve both bundle mapping and per-node bundles
-                pg_table = ray.util.placement_group_table(pg)
-                bundle_to_node = pg_table["bundles_to_node_id"]
-                node_bundles = get_node_bundles(pg)
-
-                if not node_bundles:
-                    raise ValueError("Placement group contains no bundles")
-
-                # Ensure all nodes have the same number of bundles
-                counts = [len(b) for b in node_bundles.values()]
-                assert len(set(counts)) == 1, (
-                    "All nodes must have identical bundle counts"
-                )
-
-                total = sum(counts)
-                model_parallel_size = tp_size * pp_size
-                num_groups = total // model_parallel_size
-                if num_groups == 0:
-                    raise ValueError(
-                        "Unable to allocate any worker groups with the available resources."
-                    )
-
-                # Create reproducible node indices
-                sorted_nodes = sorted(node_bundles)
-                node_idx = {nid: idx for idx, nid in enumerate(sorted_nodes)}
-
-                # Flatten bundles in node order
-                flat: list[int] = []
-                for nid in sorted_nodes:
-                    flat.extend(node_bundles[nid])
-
-                # Slice into groups and assign logical index
-                groups: list[tuple[int, list[int]]] = []
-                for i in range(num_groups):
-                    slice_ = flat[
-                        i * model_parallel_size : (i + 1) * model_parallel_size
-                    ]
-                    first_node = bundle_to_node[slice_[0]]
-                    groups.append((node_idx[first_node], slice_))
-
-                return groups
-
-            tied_groups = allocate_worker_groups(unified_pg, tp_size, pp_size)
-        else:
-            tied_groups = []
-            # For per-node PGs, each PG represents a node
-            for pg_idx, pg in enumerate(placement_groups):
-                if pg.bundle_count == 0:
-                    continue
-
-                # Check if this PG has enough bundles for at least one group
-                num_groups_in_pg = pg.bundle_count // model_parallel_size
-
-                # Create groups within this PG
-                for group_idx in range(num_groups_in_pg):
-                    start_idx = group_idx * model_parallel_size
-                    end_idx = start_idx + model_parallel_size
-                    bundle_indices = list(range(start_idx, end_idx))
-                    # Use pg_idx as the node identifier
-                    tied_groups.append((pg_idx, bundle_indices))
-
-        if not tied_groups:
-            raise ValueError(
-                "Unable to allocate any worker groups with the available resources."
-            )
-
-        return tied_groups
-
-    def _report_device_id(self) -> list[list[str]]:
-        """Report the device ID of vllm workers."""
-        # Choose the appropriate method based on async_engine setting
-        method_name = (
-            "report_device_id_async"
-            if self.cfg["vllm_cfg"]["async_engine"]
-            else "report_device_id"
-        )
-        # Use run_all_workers_single_data for methods that don't need data
-        futures = self.worker_group.run_all_workers_single_data(
-            method_name, run_rank_0_only_axes=["tensor_parallel", "pipeline_parallel"]
-        )
-        # Wait for all futures to complete
-        results = ray.get(futures)
-        return results
-
-    def _post_init(self):
-        # Choose the appropriate method based on async_engine setting
-        method_name = (
-            "post_init_async" if self.cfg["vllm_cfg"]["async_engine"] else "post_init"
-        )
-        # Use run_all_workers_single_data for methods that don't need data
-        futures = self.worker_group.run_all_workers_single_data(
-            method_name, run_rank_0_only_axes=["tensor_parallel", "pipeline_parallel"]
-        )
-        # Wait for all futures to complete
-        results = ray.get(futures)
-        return results
-
-    def init_collective(
-        self, ip: str, port: int, world_size: int
-    ) -> list[ray.ObjectRef]:
-        """Initialize the collective communication."""
-        if not self.worker_group or not self.worker_group.workers:
-            raise RuntimeError("Worker group is not initialized")
-
-        # Choose the appropriate method based on async_engine setting
-        method_name = (
-            "init_collective_async"
-            if self.cfg["vllm_cfg"]["async_engine"]
-            else "init_collective"
-        )
-
-        # Prepare rank
-        total_workers = len(self.worker_group.workers)
-        if self.dp_size == 0:
-            raise RuntimeError(
-                "Data parallel size is zero, cannot initialize collective."
-            )
-        workers_per_group = total_workers // self.dp_size
-        rank_prefix_list = list(range(0, total_workers, workers_per_group))
-
-        # Send world_size and rank for init collective to all workers
-        futures = self.worker_group.run_all_workers_multiple_data(
-            method_name,
-            rank_prefix=rank_prefix_list,
-            run_rank_0_only_axes=["tensor_parallel", "pipeline_parallel"],
-            common_kwargs={"ip": ip, "port": port, "world_size": world_size},
-        )
-
-        # this function should co-work with lm_policy, so we should wait for all futures to complete outside
-        return futures
-
-    def generate(
-        self, data: BatchedDataDict[GenerationDatumSpec], greedy: bool = False
-    ) -> BatchedDataDict[GenerationOutputSpec]:
-        """Generate a batch of data using vLLM."""
-        assert isinstance(data, BatchedDataDict), (
-            f"data must be a BatchedDataDict, got type: {type(data)}"
-        )
-        assert "input_ids" in data and "input_lengths" in data, (
-            "input_ids and input_lengths are required in data for vLLM generation"
-        )
-
-        # Shard the data across the tied worker groups
-        dp_size = self.sharding_annotations.get_axis_size("data_parallel")
-        sharded_data: list[SlicedDataDict] = data.shard_by_batch_size(
-            dp_size, allow_uneven_shards=True
-        )
-        future_bundle = self.worker_group.run_all_workers_sharded_data(
-            "generate",
-            data=sharded_data,
-            in_sharded_axes=["data_parallel"],
-            replicate_on_axes=None,  # just run on tp rank 0
-            output_is_replicated=None,
-            common_kwargs={"greedy": greedy},
-        )
-
-        # Get results from the workers, respecting tied worker groups (only one result per tied worker group)
-        results = self.worker_group.get_all_worker_results(future_bundle)
-
-        # Combine results from all tied worker groups
-        combined: BatchedDataDict[GenerationOutputSpec] = BatchedDataDict.from_batches(
-            results, pad_value_dict={"output_ids": self.cfg["pad_token_id"]}
-        )
-
-        # Verify the output has all required fields
-        required_keys = [
-            "output_ids",
-            "generation_lengths",
-            "unpadded_sequence_lengths",
-            "logprobs",
-        ]
-        missing_keys = [key for key in required_keys if key not in combined]
-        if missing_keys:
-            raise ValueError(
-                f"Missing required keys for GenerationOutputSpec: {missing_keys}"
-            )
-
-        return combined
-
-    def generate_text(
-        self, data: BatchedDataDict[GenerationDatumSpec], greedy: bool = False
-    ) -> BatchedDataDict[GenerationOutputSpec]:
-        """Generate text responses using vLLM."""
-        assert isinstance(data, BatchedDataDict), (
-            f"data must be a BatchedDataDict, got type: {type(data)}"
-        )
-
-        # Check if async engine is enabled
-        if self.cfg["vllm_cfg"]["async_engine"]:
-            raise RuntimeError(
-                "generate_text cannot be used with async_engine=True. Use generate_text_async instead."
-            )
-
-        # Shard the data across the tied worker groups
-        dp_size = self.sharding_annotations.get_axis_size("data_parallel")
-        sharded_data: list[SlicedDataDict] = data.shard_by_batch_size(
-            dp_size, allow_uneven_shards=True
-        )
-        future_bundle = self.worker_group.run_all_workers_sharded_data(
-            "generate_text",
-            data=sharded_data,
-            in_sharded_axes=["data_parallel"],
-            replicate_on_axes=None,  # just run on tp rank 0
-            output_is_replicated=None,
-            common_kwargs={"greedy": greedy},
-        )
-
-        # Get results from the workers, respecting tied worker groups (only one result per tied worker group)
-        results = self.worker_group.get_all_worker_results(future_bundle)
-
-        # Combine results from all tied worker groups
-        combined: BatchedDataDict[GenerationOutputSpec] = BatchedDataDict.from_batches(
-            results, pad_value_dict={"output_ids": self.cfg["pad_token_id"]}
-        )
-
-        # Verify the output has all required fields
-        required_keys = ["texts"]
-        missing_keys = [key for key in required_keys if key not in combined]
-        if missing_keys:
-            raise ValueError(
-                f"Missing required keys for GenerationOutputSpec: {missing_keys}"
-            )
-
-        return combined
-
-    async def _async_generate_base(
-        self,
-        data: BatchedDataDict[GenerationDatumSpec],
-        method_name: str,
-        data_validation_fn,
-        greedy: bool = False,
-    ) -> AsyncGenerator[tuple[int, BatchedDataDict[GenerationOutputSpec]], None]:
-        """Base async generation method that handles common worker management logic.
-
-        Args:
-            data: Input data for generation
-            method_name: Name of the worker method to call ('generate_async' or 'generate_text_async')
-            data_validation_fn: Function to validate input data
-            greedy: Whether to use greedy decoding
-
-        Yields:
-            Tuple of (original_index, BatchedDataDict containing generation result)
-        """
-        if not self.cfg["vllm_cfg"]["async_engine"]:
-            raise RuntimeError(
-                f"{method_name} can only be used when async_engine is enabled in vLLM config."
-            )
-
-        assert isinstance(data, BatchedDataDict), (
-            f"data must be a BatchedDataDict, got type: {type(data)}"
-        )
-
-        # Validate input data and handle empty case
-        if not data_validation_fn(data):
-            return
-
-        # Determine the leader worker for the current data parallel shard
-        leader_worker_idx = self.worker_group.get_dp_leader_worker_idx(
-            self.current_generate_dp_shard_idx
-        )
-
-        # Run the async method on the selected leader worker
-        worker_gen_proxy = self.worker_group.run_single_worker_single_data(
-            method_name=method_name,
-            worker_idx=leader_worker_idx,
-            data=data,
-            greedy=greedy,
-        )
-
-        # Increment the round-robin worker group index
-        self.current_generate_dp_shard_idx += 1
-        self.current_generate_dp_shard_idx %= self.worker_group.dp_size
-
-        # Create a queue to collect sample results from the worker as they complete
-        result_queue = asyncio.Queue()
-        finished = False
-
-        async def consume_worker_generator(worker_idx, worker_gen):
-            """Consume a single worker generator and put sample results in the queue."""
-            nonlocal finished
-            worker_name = f"Worker-{worker_idx}"
-            try:
-                async for sample_result_ref in worker_gen:
-                    sample_result = await sample_result_ref
-                    await result_queue.put(("sample", sample_result))
-            except Exception as e:
-                # Log the error before putting it in the queue for better debugging
-                import traceback
-
-                print(f"Exception in worker {worker_name}")
-                traceback.print_exc()
-                await result_queue.put(("error", e))
-            finally:
-                finished = True
-                await result_queue.put(("worker_done", None))
-
-        # Start the task to consume the worker generator
-        worker_task = asyncio.create_task(
-            consume_worker_generator(leader_worker_idx, worker_gen_proxy)
-        )
-
-        # Yield sample results as they become available from the worker
-        timeout_seconds = float(
-            os.environ.get("NRL_VLLM_ASYNC_TIMEOUT_SECONDS", "600")
-        )  # Default 10 minutes
-
-        while not finished:
-            try:
-                msg_type, item = await asyncio.wait_for(
-                    result_queue.get(), timeout=timeout_seconds
-                )
-            except asyncio.TimeoutError:
-                print(
-                    f"Timeout waiting for results after {timeout_seconds}s. Worker has not finished."
-                )
-                print(
-                    f"For longer sequences, increase the timeout by setting: export NRL_VLLM_ASYNC_TIMEOUT_SECONDS={int(timeout_seconds * 2)}"
-                )
-                # Cancel the task
-                if not worker_task.done():
-                    worker_task.cancel()
-                await asyncio.gather(worker_task, return_exceptions=True)
-                raise RuntimeError(
-                    f"Timeout waiting for worker results after {timeout_seconds}s. "
-                    f"For longer sequences, increase timeout by setting: export NRL_VLLM_ASYNC_TIMEOUT_SECONDS={int(timeout_seconds * 2)}"
-                )
-
-            if msg_type == "sample":
-                # Yield individual sample result immediately
-                yield item
-            elif msg_type == "error":
-                # Cancel the task and propagate error
-                if not worker_task.done():
-                    worker_task.cancel()
-                await asyncio.gather(worker_task, return_exceptions=True)
-                raise item
-            elif msg_type == "worker_done":
-                # Worker finished, just continue the loop
-                pass
-            else:
-                raise RuntimeError(f"Unexpected message type: {msg_type}")
-
-        # Verify the task is actually done
-        assert worker_task.done(), (
-            f"Worker task {leader_worker_idx} should be done but isn't"
-        )
-
-    async def generate_text_async(
-        self, data: BatchedDataDict[GenerationDatumSpec], greedy: bool = False
-    ) -> AsyncGenerator[tuple[int, BatchedDataDict[GenerationOutputSpec]], None]:
-        """Generate text responses asynchronously, yielding results as they are ready.
-
-        Args:
-            data: BatchedDataDict containing prompts with text strings
-            greedy: Whether to use greedy decoding instead of sampling
-
-        Yields:
-            Tuple of (original_index, BatchedDataDict containing single text response)
-        """
-
-        def validate_text_data(data):
-            if len(data["prompts"]) == 0:
-                return False  # Return False for empty case to trigger early return
-            return True
-
-        async for result in self._async_generate_base(
-            data, "generate_text_async", validate_text_data, greedy
-        ):
-            yield result
-
-    async def generate_async(
-        self, data: BatchedDataDict[GenerationDatumSpec], greedy: bool = False
-    ) -> AsyncGenerator[tuple[int, BatchedDataDict[GenerationOutputSpec]], None]:
-        """Generate responses asynchronously, yielding individual samples as they complete.
-
-        This method provides per-sample streaming across all workers, yielding each
-        sample result as soon as it's ready, regardless of which worker processed it.
-        """
-
-        def validate_generate_data(data):
-            if "input_ids" not in data or "input_lengths" not in data:
-                raise AssertionError(
-                    "input_ids and input_lengths are required in data for vLLM generation"
-                )
-            if len(data["input_ids"]) == 0:
-                return False  # Return False for empty case to trigger early return
-            return True
-
-        async for result in self._async_generate_base(
-            data, "generate_async", validate_generate_data, greedy
-        ):
-            yield result
-
-    def prepare_for_generation(self, *args: Any, **kwargs: Any) -> bool:
-        """Wake workers up for colocated inference."""
-        # non-colocated no need to wake up
-        if not self.cfg["colocated"]["enabled"]:
-            return True
-
-        try:
-            # Choose the appropriate method based on async_engine setting
-            method_name = (
-                "wake_up_async" if self.cfg["vllm_cfg"]["async_engine"] else "wake_up"
-            )
-            # Use run_all_workers_single_data for methods that don't need data
-            futures = self.worker_group.run_all_workers_single_data(
-                method_name,
-                run_rank_0_only_axes=["tensor_parallel", "pipeline_parallel"],
-                **kwargs,
-            )
-            # Wait for all futures to complete
-            results = ray.get(futures)
-            return all(result for result in results if result is not None)
-        except Exception as e:
-            print(f"Error during policy preparation: {e}")
-            return False
-
-    def finish_generation(self, *args: Any, **kwargs: Any) -> bool:
-        """Sleep workers and reset prefix cache."""
-        try:
-            # Choose the appropriate method based on setting
-            # non-colocated only needs reset prefix cache, no need to sleep.
-            if self.cfg["colocated"]["enabled"]:
-                method_name = (
-                    "sleep_async" if self.cfg["vllm_cfg"]["async_engine"] else "sleep"
-                )
-            else:
-                method_name = (
-                    "reset_prefix_cache_async"
-                    if self.cfg["vllm_cfg"]["async_engine"]
-                    else "reset_prefix_cache"
-                )
-            # Use run_all_workers_single_data for methods that don't need data
-            futures = self.worker_group.run_all_workers_single_data(
-                method_name,
-                run_rank_0_only_axes=["tensor_parallel", "pipeline_parallel"],
-            )
-            # Wait for all futures to complete
-            results = ray.get(futures)
-            return all(result for result in results if result is not None)
-        except Exception as e:
-            print(f"Error during policy preparation: {e}")
-            return False
-
-    def shutdown(self) -> bool:
-        """Shut down all vLLM workers and clean up resources."""
-        try:
-            # Use the worker group's shutdown method with the worker's cleanup method
-            return self.worker_group.shutdown(cleanup_method="shutdown")
-        except Exception as e:
-            print(f"Error during policy shutdown: {e}")
-            return False
-
-    def prepare_refit_info(self, state_dict_info: dict[str, Any]) -> None:
-        """Prepare the info for refit."""
-        # Choose the appropriate method based on async_engine setting
-        method_name = (
-            "prepare_refit_info_async"
-            if self.cfg["vllm_cfg"]["async_engine"]
-            else "prepare_refit_info"
-        )
-
-        # Use run_all_workers_single_data to send data to all workers
-        futures = self.worker_group.run_all_workers_single_data(
-            method_name,
-            state_dict_info=state_dict_info,
-            run_rank_0_only_axes=["tensor_parallel", "pipeline_parallel"],
-        )
-
-        # Wait for all futures to complete
-        ray.get(futures)
-
-    def update_weights_from_ipc_handles(self, ipc_handles: dict[str, Any]) -> bool:
-        """Update weights of the policy using IPC handles, considering tensor parallelism.
-
-        For tp > 1, only the leader in each tensor parallel tied worker group will update weights.
-
-        Args:
-            ipc_handles (dict): Dictionary mapping device UUIDs (str) to parameter IPC handles.
-
-        Returns:
-            bool: True if weights were successfully updated, False otherwise.
-        """
-        if not self.worker_group or not self.worker_group.workers:
-            return False
-
-        # Choose the appropriate method based on async_engine setting
-        method_name = (
-            "update_weights_from_ipc_handles_async"
-            if self.cfg["vllm_cfg"]["async_engine"]
-            else "update_weights_from_ipc_handles"
-        )
-
-        # Only send the ipc handles required by the current worker
-        ipc_handles_list = []
-        for worker_device_uuids in self.device_uuids:
-            worker_ipc_handles = {
-                device_uuid: ipc_handles[device_uuid]
-                for device_uuid in worker_device_uuids
-            }
-            ipc_handles_list.append(worker_ipc_handles)
-
-        try:
-            # Directly pass ipc_handles to the method
-            futures = self.worker_group.run_all_workers_multiple_data(
-                method_name,
-                ipc_handles=ipc_handles_list,
-                run_rank_0_only_axes=["tensor_parallel", "pipeline_parallel"],
-            )
-            # Wait for all futures to complete
-            results = ray.get(futures)
-            return all(result for result in results if result is not None)
-        except Exception as e:
-            print(f"Error during update weights: {e}")
-            return False
-
-    def update_weights_from_collective(self) -> list[ray.ObjectRef]:
-        """Update weights of the policy using collective communication."""
-        if not self.worker_group or not self.worker_group.workers:
-            raise RuntimeError("Worker group is not initialized")
-
-        # Choose the appropriate method based on async_engine setting
-        method_name = (
-            "update_weights_from_collective_async"
-            if self.cfg["vllm_cfg"]["async_engine"]
-            else "update_weights_from_collective"
-        )
-
-        # Use run_all_workers_single_data for methods that don't need data
-        futures = self.worker_group.run_all_workers_single_data(
-            method_name,
-            run_rank_0_only_axes=["tensor_parallel", "pipeline_parallel"],
-        )
-
-        # this function should co-work with lm_policy, so we should wait for all futures to complete outside
-        return futures
-
-    def start_gpu_profiling(self) -> None:
-        """Start GPU profiling."""
-        futures = self.worker_group.run_all_workers_single_data("start_gpu_profiling")
-        ray.get(futures)
-
-    def stop_gpu_profiling(self) -> None:
-        """Stop GPU profiling."""
-        futures = self.worker_group.run_all_workers_single_data("stop_gpu_profiling")
-        ray.get(futures)
-
-    def __del__(self) -> None:
-        """Shuts down the worker groups when the object is deleted or is garbage collected.
-
-        This is an extra safety net in case the user forgets to call shutdown() and the pointer to
-        the object is lost due to leaving a function scope. It's always recommended that the
-        user calls shutdown().
-        """
-        self.shutdown()
diff --git a/nemo_rl/models/megatron/converters/__init__.py b/nemo_rl/models/generation/vllm/__init__.py
similarity index 61%
rename from nemo_rl/models/megatron/converters/__init__.py
rename to nemo_rl/models/generation/vllm/__init__.py
index 3563de4959..8d3409eb00 100644
--- a/nemo_rl/models/megatron/converters/__init__.py
+++ b/nemo_rl/models/generation/vllm/__init__.py
@@ -11,17 +11,14 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-from .common import (
-    get_global_expert_num,
-    get_global_layer_num,
-    get_local_expert_num,
-    get_local_layer_num,
-)
+from nemo_rl.models.generation.vllm.config import VllmConfig
+from nemo_rl.models.generation.vllm.vllm_generation import VllmGeneration
+from nemo_rl.models.generation.vllm.vllm_worker import VllmGenerationWorker
+from nemo_rl.models.generation.vllm.vllm_worker_async import VllmAsyncGenerationWorker
 
 __all__ = [
-    "get_global_expert_num",
-    "get_global_layer_num",
-    "get_local_expert_num",
-    "get_local_layer_num",
+    "VllmConfig",
+    "VllmGeneration",
+    "VllmGenerationWorker",
+    "VllmAsyncGenerationWorker",
 ]
diff --git a/nemo_rl/models/generation/vllm/config.py b/nemo_rl/models/generation/vllm/config.py
new file mode 100644
index 0000000000..8ea82ec4db
--- /dev/null
+++ b/nemo_rl/models/generation/vllm/config.py
@@ -0,0 +1,42 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any, NotRequired, TypedDict
+
+from nemo_rl.models.generation.interfaces import GenerationConfig
+
+
+class VllmSpecificArgs(TypedDict):
+    tensor_parallel_size: int
+    pipeline_parallel_size: int
+    expert_parallel_size: int
+    gpu_memory_utilization: float
+    max_model_len: int
+    # Additional arguments for vLLM inserted by nemo rl based on the context of when vllm is used
+    skip_tokenizer_init: bool
+    async_engine: bool
+    load_format: NotRequired[str]
+    precision: NotRequired[str]
+    enforce_eager: NotRequired[bool]
+    # By default, NeMo RL only has a Python handle to the vllm.LLM generation engine. The expose_http_server flag here will expose that generation engine as an HTTP server.
+    # Exposing vLLM as a server is useful in instances where the multi-turn rollout is performed with utilities outside of NeMo RL, but the user still wants to take advantage of the refit logic in NeMo RL that keeps the policy and generation up to date.
+    # Currently it will expose the /tokenize and /v1/chat/completions endpoints. Later on we may expose /v1/completions or /v1/responses.
+    expose_http_server: NotRequired[bool]
+    # These kwargs are passed to the vllm.LLM HTTP server Chat Completions endpoint config. Typically this will include things like tool parser, chat template, etc
+    http_server_serving_chat_kwargs: NotRequired[dict[str, Any]]
+
+
+class VllmConfig(GenerationConfig):
+    vllm_cfg: VllmSpecificArgs
+    vllm_kwargs: NotRequired[dict[str, Any]]
diff --git a/nemo_rl/models/generation/vllm/utils.py b/nemo_rl/models/generation/vllm/utils.py
new file mode 100644
index 0000000000..d4a8cd88ef
--- /dev/null
+++ b/nemo_rl/models/generation/vllm/utils.py
@@ -0,0 +1,84 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Any, Optional
+
+from nemo_rl.distributed.batched_data_dict import BatchedDataDict
+from nemo_rl.models.generation.interfaces import GenerationDatumSpec
+
+
+def format_prompt_for_vllm_generation(
+    data: BatchedDataDict[GenerationDatumSpec], sample_idx: Optional[int] = None
+) -> list[dict[str, Any]]:
+    """Format a list of prompts for vllm generation (which requires a specific format for its own `generate` method).
+
+    See https://docs.vllm.ai/en/v0.9.1/features/multimodal_inputs.html for prompt format for multimodal inputs.
+    """
+    # Prepare prompts for vLLM (removing padding)
+    prompts = []
+
+    input_ids = data["input_ids"]
+    batch_size = input_ids.shape[0]
+    input_lengths = data["input_lengths"]
+
+    # if sample_idx is None, return list of all prompts for the entire batch
+    # else, return the prompt for the single sample specified by sample_idx
+    return_all = sample_idx is None
+    if sample_idx is None:
+        start_idx = 0
+        end_idx = batch_size
+    else:
+        start_idx = sample_idx
+        end_idx = sample_idx + 1
+
+    def _get_regular_prompt(index: int):
+        valid_length = input_lengths[index].item()
+        valid_ids = (
+            input_ids[index, :valid_length]
+            if valid_length > 0
+            else input_ids[index, :0]
+        )
+        token_ids = valid_ids.tolist()
+        return {"prompt_token_ids": token_ids}
+
+    # Check if this is VLM generation by looking for message_log with images
+    # Support for videos/audio/etc. can be added here
+    # if 'message_log' in data and any('images' in msg for msg in data['message_log']):
+    if "vllm_content" in data:
+        # VLM generation using content and multi_modal_data
+        for i in range(start_idx, end_idx):
+            msg = data["vllm_content"][i]
+            # if msg is None, this conversation had no multimodal content, fallback to regular prompt
+            if msg is None:
+                prompts.append(_get_regular_prompt(i))
+                continue
+            # init prompt dict
+            prompt_dict = {"prompt": msg}
+            # add additional data if present
+            images = data.get("vllm_images", None)
+            if images is None or len(images[i]) == 0:
+                prompts.append(_get_regular_prompt(i))
+                continue
+            else:
+                prompt_dict["multi_modal_data"] = {
+                    "image": images[i][0] if len(images[i]) == 1 else images[i]
+                }
+            prompts.append(prompt_dict)
+    else:
+        # Regular LLM generation using token_ids
+        for i in range(start_idx, end_idx):
+            # Use input_lengths to get only valid tokens (not padding)
+            prompts.append(_get_regular_prompt(i))
+
+    return prompts if return_all else prompts[0]
diff --git a/nemo_rl/models/generation/vllm/vllm_backend.py b/nemo_rl/models/generation/vllm/vllm_backend.py
new file mode 100644
index 0000000000..f5b8463ae0
--- /dev/null
+++ b/nemo_rl/models/generation/vllm/vllm_backend.py
@@ -0,0 +1,232 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import gc
+import traceback
+from typing import Any
+
+import torch
+import zmq
+
+from nemo_rl.models.policy.utils import (
+    IPCProtocol,
+    calculate_aligned_size,
+    rebuild_cuda_tensor_from_ipc,
+)
+from nemo_rl.utils.nsys import wrap_with_nvtx_name
+from nemo_rl.utils.packed_tensor import packed_broadcast_consumer
+
+try:
+    import vllm  # noqa: F401
+except ImportError:
+    raise ImportError(
+        "vLLM is not installed. Please check that the py_executable in the runtime_env of VllmGenerationWorker "
+        "covers the vllm dependency. You may have to update nemo_rl/distributed/ray_actor_environment_registry.py. "
+        "This error can also happen if the venv creation was aborted or errored out in the middle. In that case, "
+        "please run at least once with the environment variable NRL_FORCE_REBUILD_VENVS=true set to force the rebuild of the environment."
+    )
+
+
+class VllmInternalWorkerExtension:
+    def init_collective(
+        self,
+        rank_prefix: int,
+        ip: str,
+        port: int,
+        world_size: int,
+        train_world_size: int,
+    ) -> None:
+        """Initialize the collective communication."""
+        from vllm.distributed.device_communicators.pynccl import PyNcclCommunicator
+        from vllm.distributed.utils import StatelessProcessGroup
+
+        local_rank = torch.distributed.get_rank()
+        # Place vLLM ranks after all training ranks so all training workers can join
+        rank = train_world_size + rank_prefix + local_rank
+
+        pg = StatelessProcessGroup.create(
+            host=ip, port=port, rank=rank, world_size=world_size
+        )
+        self.model_update_group = PyNcclCommunicator(  # pyrefly: ignore[implicitly-defined-attribute]  This class does not define __init__ so assignments like this should be ignored
+            pg, device=self.device
+        )
+
+    def report_device_id(self) -> str:
+        """Retrieve the UUID of the current CUDA device."""
+        from nemo_rl.utils.nvml import get_device_uuid
+
+        return get_device_uuid(self.device.index)
+
+    def get_zmq_address(self):
+        """Get the ZMQ address for the current device."""
+        return f"ipc:///tmp/{self.report_device_id()}.sock"
+
+    def maybe_init_zmq(self):
+        """Initialize the ZMQ socket if it doesn't exist."""
+        if not hasattr(self, "zmq_socket"):
+            self.zmq_context = zmq.Context()  # pyrefly: ignore[implicitly-defined-attribute]  This class does not define __init__ so assignments like this should be ignored
+            self.zmq_socket = self.zmq_context.socket(  # pyrefly: ignore[implicitly-defined-attribute]  This class does not define __init__ so assignments like this should be ignored
+                zmq.REP
+            )
+            self.zmq_socket.setsockopt(
+                zmq.SNDTIMEO, 120000
+            )  # set timeout to 120 seconds
+            self.zmq_socket.setsockopt(
+                zmq.RCVTIMEO, 120000
+            )  # set timeout to 120 seconds
+            self.zmq_socket.setsockopt(zmq.LINGER, 0)
+            self.zmq_socket.connect(self.get_zmq_address())
+
+    def prepare_refit_info(self, state_dict_info: dict[str, Any]) -> None:
+        """Prepare state dict metadata for weight refitting and IPC streaming.
+
+        Args:
+            state_dict_info (dict): A dictionary containing the info for refit.
+                e.g. {tensor_name: (shape, dtype)}
+        """
+        self.state_dict_info = state_dict_info  # pyrefly: ignore[implicitly-defined-attribute]  This class does not define __init__ so assignments like this should be ignored
+
+    @wrap_with_nvtx_name("vllm_internal_worker_extension/update_weights_via_ipc_zmq")
+    def update_weights_via_ipc_zmq(self) -> bool:
+        """Receive and update model weights via ZMQ IPC socket.
+
+        Returns:
+            bool: True if weights were successfully updated.
+        """
+        buffer = None
+        weights = None
+
+        try:
+            self.maybe_init_zmq()
+            while True:
+                # Blocking receive with timeout (this is the main operation)
+                payload = self.zmq_socket.recv_pyobj()
+
+                if payload == IPCProtocol.COMPLETE:
+                    # means the update is done
+                    self.zmq_socket.send(IPCProtocol.ACK.value.encode())
+                    break
+
+                ipc_handle, list_keys, used_bytes = payload
+                buffer = rebuild_cuda_tensor_from_ipc(ipc_handle, self.device.index)
+
+                weights = []
+                offset = 0
+                for key in list_keys:
+                    shape, dtype = self.state_dict_info[key]  # pyrefly
+                    if isinstance(shape, list):
+                        shape = torch.Size(shape)
+                    size_in_bytes = dtype.itemsize * shape.numel()
+                    weights.append(
+                        (
+                            key,
+                            buffer[offset : offset + size_in_bytes]
+                            .view(dtype=dtype)
+                            .view(shape),
+                        )
+                    )
+                    aligned_size = calculate_aligned_size(size_in_bytes)
+                    offset += aligned_size
+                assert offset == used_bytes, (
+                    "Offset is not equal to used bytes, usually indicate inaccurate info like keys or cached dtype in state_dict_info"
+                )
+                # Load weights into the model
+                from nemo_rl.models.generation import fp8
+
+                if fp8.is_fp8_model(self.model_runner.vllm_config):
+                    # the fp8 load_weights additionally casts bf16 weights into fp8
+                    fp8.load_weights(weights, self.model_runner)
+                else:
+                    self.model_runner.model.load_weights(weights=weights)
+
+                torch.cuda.current_stream().synchronize()
+
+                # CRITICAL: Delete views before ACK to prevent corruption.
+                # 'weights' contains views into IPC shared memory. Even though load_weights()
+                # copied the data, Python may not garbage collect these view objects immediately.
+                # If sender reuses the buffer before GC runs, old views would read corrupted data.
+                # Explicit del ensures immediate cleanup before sending ACK.
+                del weights, buffer
+                weights = None
+                buffer = None
+                self.zmq_socket.send(IPCProtocol.ACK.value.encode())
+
+            gc.collect()
+            torch.cuda.empty_cache()
+            return True
+        except Exception as e:
+            print(
+                f"Error in VllmInternalWorkerExtension.update_weights_via_ipc_zmq: {e}.\n"
+                f"{traceback.format_exc()}"
+            )
+            return False
+
+    @wrap_with_nvtx_name(
+        "vllm_internal_worker_extension/update_weights_from_collective"
+    )
+    def update_weights_from_collective(self) -> bool:
+        """Update the model weights from collective communication."""
+        assert self.state_dict_info is not None, (
+            "state_dict_info is not prepared. "
+            "Please call prepare_refit_info when initializing the worker."
+        )
+
+        def _load_model_weights(weights, model_runner):
+            """Load model weights.
+
+            Args:
+                weights: List[(name, tensor)]
+                model_runner: vLLM ModelRunner
+
+            Returns:
+                None
+            """
+            from nemo_rl.models.generation import fp8
+
+            if fp8.is_fp8_model(model_runner.vllm_config):
+                # the fp8 load_weights additionally casts bf16 weights into fp8
+                fp8.load_weights(weights, model_runner)
+            else:
+                model_runner.model.load_weights(weights=weights)
+
+        load_model_weight_func = lambda x: _load_model_weights(x, self.model_runner)
+
+        try:
+            packed_broadcast_consumer(
+                iterator=iter(self.state_dict_info.items()),
+                group=self.model_update_group,
+                src=0,
+                post_unpack_func=load_model_weight_func,
+            )
+        except Exception as e:
+            print(
+                f"Error in VllmInternalWorkerExtension.update_weights_from_collective: {e}"
+            )
+            return False
+
+        return True
+
+    def cleanup(self) -> None:
+        """Shutdown and cleanup resources."""
+        # Close ZMQ socket and context if they exist
+        if hasattr(self, "zmq_socket"):
+            self.zmq_socket.close()
+            self.zmq_context.term()
+
+    def start_gpu_profiling(self) -> None:
+        """Start GPU profiling."""
+        torch.cuda.profiler.start()
+
+    def stop_gpu_profiling(self) -> None:
+        """Stop GPU profiling."""
+        torch.cuda.profiler.stop()
diff --git a/nemo_rl/models/generation/vllm/vllm_generation.py b/nemo_rl/models/generation/vllm/vllm_generation.py
new file mode 100644
index 0000000000..5dcc7eaf2e
--- /dev/null
+++ b/nemo_rl/models/generation/vllm/vllm_generation.py
@@ -0,0 +1,851 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import asyncio
+import os
+from collections import defaultdict
+from typing import (
+    Any,
+    AsyncGenerator,
+    Optional,
+    Union,
+)
+
+import numpy as np
+import ray
+from ray.util.placement_group import PlacementGroup
+
+from nemo_rl.distributed.batched_data_dict import BatchedDataDict, SlicedDataDict
+from nemo_rl.distributed.named_sharding import NamedSharding
+from nemo_rl.distributed.virtual_cluster import RayVirtualCluster
+from nemo_rl.distributed.worker_groups import RayWorkerBuilder, RayWorkerGroup
+from nemo_rl.models.generation.interfaces import (
+    GenerationDatumSpec,
+    GenerationInterface,
+    GenerationOutputSpec,
+)
+from nemo_rl.models.generation.vllm.config import VllmConfig
+
+# Global thresholds for top_k and top_p validation.
+# While top-k/p are not supported, these values allow for token filtering while the logprobs should be compatible.
+# See https://github.com/NVIDIA-NeMo/RL/issues/69 and https://github.com/NVIDIA-NeMo/RL/issues/237 for more details.
+TOP_K_THRESHOLD = 8000  # Allow top_k >= 8000 (effectively no filtering)
+TOP_P_THRESHOLD = 0.99  # Allow top_p >= 0.99 (close to 1.0)
+
+
+class VllmGeneration(GenerationInterface):
+    def __init__(
+        self,
+        cluster: RayVirtualCluster,
+        config: VllmConfig,
+        name_prefix: str = "vllm_policy",
+        workers_per_node: Optional[Union[int, list[int]]] = None,
+    ):
+        """Initialize a vLLM policy with distributed workers."""
+        # Store config
+        self.cfg = config
+        self.tp_size = self.cfg["vllm_cfg"]["tensor_parallel_size"]
+        self.pp_size = self.cfg["vllm_cfg"]["pipeline_parallel_size"]
+        self.ep_size = self.cfg["vllm_cfg"]["expert_parallel_size"]
+        self.model_parallel_size = self.tp_size * self.pp_size
+
+        assert cluster.world_size() % self.model_parallel_size == 0, (
+            "World size must be a multiple of model parallel size. "
+            f"Got world size {cluster.world_size()} and model parallel size (TP * PP) {self.model_parallel_size}."
+        )
+        self.dp_size = cluster.world_size() // self.model_parallel_size
+        self.vllm_dp_size = self.ep_size // self.tp_size
+
+        if self.pp_size > 1:
+            assert self.cfg["vllm_cfg"]["async_engine"], (
+                "When pipeline_parallel_size > 1, async_engine must be set to True in the vLLM configuration. "
+                "You can enable it by adding `policy.generation.vllm_cfg.async_engine=true` to your command."
+            )
+
+        if self.ep_size > 1:
+            assert self.ep_size % self.tp_size == 0, (
+                "When EP > 1, EP must be a multiple of TP since vLLM's EP = DP * TP. "
+                "Please update your configuration to set expert_parallel_size to a multiple of tensor_parallel_size."
+            )
+            if self.ep_size != self.tp_size:
+                # vLLM's EP = DP * TP, so here we need to use DP inside vLLM.
+                assert not self.cfg["vllm_cfg"]["async_engine"], (
+                    "vLLM async_engine has some issues when using DP inside vLLM. "
+                    "Please update your configuration to set `policy.generation.vllm_cfg.async_engine=false`. "
+                    "See https://github.com/NVIDIA-NeMo/RL/issues/1101 for more details."
+                )
+
+        # Validate sampling parameters early to avoid resource allocation with unsupported configs.
+        # The vLLM sampler patch only supports temperature scaling and does not handle top_p/top_k correctly.
+        # However, we allow values above certain thresholds for token filtering purposes.
+        top_k = self.cfg["top_k"]
+        if top_k is not None and top_k != -1 and top_k < TOP_K_THRESHOLD:
+            raise ValueError(
+                (
+                    f"top_k sampling with values < {TOP_K_THRESHOLD} is not supported because the vLLM V1 engine "
+                    "does not return logprobs after top_k filtering. Values >= {TOP_K_THRESHOLD} are allowed "
+                    "for token filtering purposes. If you understand the implications and still want to use "
+                    f"a lower top_k value, please manually comment out this check. Got top_k={top_k}. "
+                    "See https://github.com/NVIDIA-NeMo/RL/issues/69 for more details."
+                )
+            )
+
+        top_p: float = self.cfg.get("top_p", 1.0)
+        if top_p < TOP_P_THRESHOLD:
+            raise ValueError(
+                (
+                    f"top_p sampling with values < {TOP_P_THRESHOLD} is not supported because the vLLM V1 engine "
+                    "does not return logprobs after top_p filtering. Values >= {TOP_P_THRESHOLD} are allowed "
+                    "for token filtering purposes. If you understand the implications and still want to use "
+                    f"a lower top_p value, please manually comment out this check. Got top_p={top_p}. "
+                    "See https://github.com/NVIDIA-NeMo/RL/issues/69 for more details."
+                )
+            )
+
+        # Ensure all required VllmConfig fields are present
+        missing_keys = [
+            key for key in VllmConfig.__required_keys__ if key not in self.cfg
+        ]
+        # Also check for model_name which is required by VllmGenerationWorker but marked as NotRequired in GenerationConfig because it's not expected to be set in the job yaml.
+        if "model_name" not in self.cfg:
+            missing_keys.append("model_name")
+
+        assert not missing_keys, (
+            f"VLLM Configuration Error: Missing required keys in VllmConfig.\n"
+            f"Missing keys: {', '.join(missing_keys)}\n"
+            f"Provided keys: {', '.join(self.cfg.keys())}\n"
+            f"Please update your configuration to include all required VLLM parameters."
+        )
+
+        self.sharding_annotations = NamedSharding(
+            layout=np.arange(cluster.world_size()).reshape(
+                self.dp_size, self.pp_size, self.tp_size
+            ),
+            names=["data_parallel", "pipeline_parallel", "tensor_parallel"],
+        )
+
+        # non-colocated needs to use PACK strategy to avoid uneven node_bundles
+        # e.g. assuming we use 3 nodes with 8GPUs, 2 nodes for train and 1 node for inference.
+        # if we use SPREAD, then the node bundles will be something like 0: [0,3,6] 1: [1,4,7] 2: [2,5], which is not correct.
+        strategy = None if self.cfg["colocated"]["enabled"] else "PACK"
+
+        # Determine if we need cross-node model parallelism
+        needs_cross_node_parallelism = (
+            self.model_parallel_size > cluster.num_gpus_per_node
+        )
+
+        # Initialize placement groups with the appropriate mode
+        cluster._init_placement_groups(
+            strategy=strategy,
+            use_unified_pg=needs_cross_node_parallelism,
+        )
+
+        # Create worker builder for VllmGenerationWorker
+        if self.cfg["vllm_cfg"]["async_engine"]:
+            worker_cls = "nemo_rl.models.generation.vllm.vllm_worker_async.VllmAsyncGenerationWorker"
+        else:
+            worker_cls = (
+                "nemo_rl.models.generation.vllm.vllm_worker.VllmGenerationWorker"
+            )
+        worker_builder = RayWorkerBuilder(worker_cls, config)
+
+        # It's necessary to set env_vars here to ensure that vllm non-leader workers also have these env_vars
+        env_vars = {}
+        # Explicitly set NCCL_CUMEM_ENABLE to 1 to avoid the P2P initialization error for PyNCCLCommunicator.
+        # See https://github.com/NVIDIA-NeMo/RL/issues/564 for more details.
+        if not self.cfg["colocated"]["enabled"]:
+            env_vars["NCCL_CUMEM_ENABLE"] = "1"
+
+        if needs_cross_node_parallelism:
+            # When using cross-node model parallelism with non-colocated inference,
+            # we are disabling NCCL_NVLS_ENABLE to avoid the NCCL error.
+            # See https://github.com/NVIDIA-NeMo/RL/issues/1352 for more details.
+            env_vars["NCCL_NVLS_ENABLE"] = "0"
+            print(
+                "[INFO] NCCL_NVLS_ENABLE is set to 0 for non-colocated inference with cross-node model parallelism."
+                "See https://github.com/NVIDIA-NeMo/RL/issues/1352 for more details."
+            )
+        # We should use vLLM DP if ep_size > tp_size since EP_SIZE = DP_SIZE * TP_SIZE in vLLM.
+        # See details in https://github.com/vllm-project/vllm/blob/main/examples/offline_inference/data_parallel.py
+        if self.ep_size > self.tp_size:
+            env_vars["VLLM_DP_SIZE"] = str(self.vllm_dp_size)
+
+        # Check if we need parallelism-aware worker group creation
+        if self.model_parallel_size > 1:
+            # For parallelism, create node-aware worker groups
+            node_bundle_indices = self._get_tied_worker_bundle_indices(cluster)
+
+            self.worker_group = RayWorkerGroup(
+                cluster,
+                worker_builder,
+                name_prefix=name_prefix,
+                bundle_indices_list=node_bundle_indices,
+                sharding_annotations=self.sharding_annotations,
+                env_vars=env_vars,
+            )
+        else:
+            # Use standard worker group creation for non-parallel case
+            self.worker_group = RayWorkerGroup(
+                cluster,
+                worker_builder,
+                name_prefix=name_prefix,
+                workers_per_node=workers_per_node,
+                sharding_annotations=self.sharding_annotations,
+                env_vars=env_vars,
+            )
+
+        # Call some collective rpc functions in VllmGenerationWorker when initializing the vLLM engine
+        # This is necessary for async engine to work
+        self._post_init()
+
+        # dp_openai_server_base_urls is only returned by Async vLLM flow when http server is active
+        self.dp_openai_server_base_urls = self._report_dp_openai_server_base_urls()
+
+        # Number of data parallel groups is the number of tied worker groups
+        assert self.dp_size == self.worker_group.dp_size, (
+            f"Data parallel size mismatch. Expected {self.dp_size}, got {self.worker_group.dp_size}"
+        )
+
+        # Used to track the round-robin selection of worker groups for generate_async
+        self.current_generate_dp_shard_idx = 0
+
+        # Save the device uuids for the workers
+        self.device_uuids = self._report_device_id()
+
+    def _get_tied_worker_bundle_indices(
+        self, cluster: RayVirtualCluster
+    ) -> list[tuple[int, list[int]]]:
+        """Calculate bundle indices for tensor and pipeline parallel workers.
+
+        Handles both unified placement groups (for cross-node model parallelism) and
+        per-node placement groups (for node-local model parallelism).
+        """
+        # Get the placement groups from the cluster
+        placement_groups = cluster.get_placement_groups()
+
+        if not placement_groups:
+            raise ValueError("No placement groups available in the cluster")
+
+        # Total parallel sizes
+        tp_size = self.sharding_annotations.get_axis_size("tensor_parallel")
+        pp_size = self.sharding_annotations.get_axis_size("pipeline_parallel")
+        model_parallel_size = tp_size * pp_size
+
+        if len(placement_groups) == 1:
+            # Single unified placement group used when we need multiple nodes for model parallelism
+            unified_pg = placement_groups[0]
+
+            def get_node_bundles(
+                pg: PlacementGroup,
+            ) -> dict[str, list[int]]:
+                # Retrieve mapping from node ID to bundle indices from a placement group.
+                try:
+                    pg_table = ray.util.placement_group_table(pg)
+                    bundle_to_node = pg_table["bundles_to_node_id"]
+                except Exception as e:
+                    raise RuntimeError(
+                        "Failed to retrieve bundle/node mapping from placement group"
+                    ) from e
+
+                node_bundles: dict[str, list[int]] = defaultdict(list)
+                for bundle_idx, node_id in bundle_to_node.items():
+                    node_bundles[node_id].append(bundle_idx)
+                for bundles in node_bundles.values():
+                    bundles.sort()
+                return dict(node_bundles)
+
+            def allocate_worker_groups(
+                pg: PlacementGroup, tp_size: int, pp_size: int
+            ) -> list[tuple[int, list[int]]]:
+                # Allocate worker groups for TP and PP training, assuming all nodes have identical bundle counts.
+
+                # Retrieve both bundle mapping and per-node bundles
+                pg_table = ray.util.placement_group_table(pg)
+                bundle_to_node = pg_table["bundles_to_node_id"]
+                node_bundles = get_node_bundles(pg)
+
+                if not node_bundles:
+                    raise ValueError("Placement group contains no bundles")
+
+                # Ensure all nodes have the same number of bundles
+                counts = [len(b) for b in node_bundles.values()]
+                assert len(set(counts)) == 1, (
+                    "All nodes must have identical bundle counts"
+                )
+
+                total = sum(counts)
+                model_parallel_size = tp_size * pp_size
+                num_groups = total // model_parallel_size
+                if num_groups == 0:
+                    raise ValueError(
+                        "Unable to allocate any worker groups with the available resources."
+                    )
+
+                # Create reproducible node indices
+                sorted_nodes = sorted(node_bundles)
+                node_idx = {nid: idx for idx, nid in enumerate(sorted_nodes)}
+
+                # Flatten bundles in node order
+                flat: list[int] = []
+                for nid in sorted_nodes:
+                    flat.extend(node_bundles[nid])
+
+                # Slice into groups and assign logical index
+                groups: list[tuple[int, list[int]]] = []
+                for i in range(num_groups):
+                    slice_ = flat[
+                        i * model_parallel_size : (i + 1) * model_parallel_size
+                    ]
+                    first_node = bundle_to_node[slice_[0]]
+                    groups.append((node_idx[first_node], slice_))
+
+                return groups
+
+            tied_groups = allocate_worker_groups(unified_pg, tp_size, pp_size)
+        else:
+            tied_groups = []
+            # For per-node PGs, each PG represents a node
+            for pg_idx, pg in enumerate(placement_groups):
+                if pg.bundle_count == 0:
+                    continue
+
+                # Check if this PG has enough bundles for at least one group
+                num_groups_in_pg = pg.bundle_count // model_parallel_size
+
+                # Create groups within this PG
+                for group_idx in range(num_groups_in_pg):
+                    start_idx = group_idx * model_parallel_size
+                    end_idx = start_idx + model_parallel_size
+                    bundle_indices = list(range(start_idx, end_idx))
+                    # Use pg_idx as the node identifier
+                    tied_groups.append((pg_idx, bundle_indices))
+
+        if not tied_groups:
+            raise ValueError(
+                "Unable to allocate any worker groups with the available resources."
+            )
+
+        return tied_groups
+
+    def _report_device_id(self) -> list[list[str]]:
+        """Report the device ID of vllm workers."""
+        # Choose the appropriate method based on async_engine setting
+        method_name = (
+            "report_device_id_async"
+            if self.cfg["vllm_cfg"]["async_engine"]
+            else "report_device_id"
+        )
+        # Use run_all_workers_single_data for methods that don't need data
+        futures = self.worker_group.run_all_workers_single_data(
+            method_name, run_rank_0_only_axes=["tensor_parallel", "pipeline_parallel"]
+        )
+        # Wait for all futures to complete
+        results = ray.get(futures)
+        return results
+
+    def _report_dp_openai_server_base_urls(self) -> list[Optional[str]]:
+        """Report the data parallel OpenAI server base URLs of vLLM workers, only populated if it is async vLLM engine and the HTTP server is active."""
+        if not self.cfg["vllm_cfg"]["async_engine"]:
+            return [None]  # Not applicable since this is sync
+
+        # Use run_all_workers_single_data for methods that don't need data
+        futures = self.worker_group.run_all_workers_single_data(
+            "report_dp_openai_server_base_url",
+            run_rank_0_only_axes=["tensor_parallel", "pipeline_parallel"],
+        )
+        # Wait for all futures to complete
+        results = ray.get(futures)
+        return results
+
+    def _post_init(self):
+        # Choose the appropriate method based on async_engine setting
+        method_name = (
+            "post_init_async" if self.cfg["vllm_cfg"]["async_engine"] else "post_init"
+        )
+        # Use run_all_workers_single_data for methods that don't need data
+        futures = self.worker_group.run_all_workers_single_data(
+            method_name, run_rank_0_only_axes=["tensor_parallel", "pipeline_parallel"]
+        )
+        # Wait for all futures to complete
+        results = ray.get(futures)
+        return results
+
+    def init_collective(
+        self, ip: str, port: int, world_size: int, *, train_world_size: int
+    ) -> list[ray.ObjectRef]:
+        """Initialize the collective communication."""
+        if not self.worker_group or not self.worker_group.workers:
+            raise RuntimeError("Worker group is not initialized")
+
+        # Choose the appropriate method based on async_engine setting
+        method_name = (
+            "init_collective_async"
+            if self.cfg["vllm_cfg"]["async_engine"]
+            else "init_collective"
+        )
+
+        # Prepare rank
+        total_workers = len(self.worker_group.workers)
+        if self.dp_size == 0:
+            raise RuntimeError(
+                "Data parallel size is zero, cannot initialize collective."
+            )
+        workers_per_group = total_workers // self.dp_size
+        rank_prefix_list = list(range(0, total_workers, workers_per_group))
+
+        # Send world_size and rank for init collective to all workers
+        futures = self.worker_group.run_all_workers_multiple_data(
+            method_name,
+            rank_prefix=rank_prefix_list,
+            run_rank_0_only_axes=["tensor_parallel", "pipeline_parallel"],
+            common_kwargs={
+                "ip": ip,
+                "port": port,
+                "world_size": world_size,
+                "train_world_size": train_world_size,
+            },
+        )
+
+        # this function should co-work with lm_policy, so we should wait for all futures to complete outside
+        return futures
+
+    def generate(
+        self, data: BatchedDataDict[GenerationDatumSpec], greedy: bool = False
+    ) -> BatchedDataDict[GenerationOutputSpec]:
+        """Generate a batch of data using vLLM."""
+        assert isinstance(data, BatchedDataDict), (
+            f"data must be a BatchedDataDict, got type: {type(data)}"
+        )
+        assert "input_ids" in data and "input_lengths" in data, (
+            "input_ids and input_lengths are required in data for vLLM generation"
+        )
+
+        # Shard the data across the tied worker groups
+        dp_size = self.sharding_annotations.get_axis_size("data_parallel")
+        sharded_data: list[SlicedDataDict] = data.shard_by_batch_size(
+            dp_size, allow_uneven_shards=True
+        )
+        future_bundle = self.worker_group.run_all_workers_sharded_data(
+            "generate",
+            data=sharded_data,
+            in_sharded_axes=["data_parallel"],
+            replicate_on_axes=None,  # just run on tp rank 0
+            output_is_replicated=None,
+            common_kwargs={"greedy": greedy},
+        )
+
+        # Get results from the workers, respecting tied worker groups (only one result per tied worker group)
+        results = self.worker_group.get_all_worker_results(future_bundle)
+
+        # Combine results from all tied worker groups
+        combined: BatchedDataDict[GenerationOutputSpec] = BatchedDataDict.from_batches(
+            results, pad_value_dict={"output_ids": self.cfg["_pad_token_id"]}
+        )
+
+        # Verify the output has all required fields
+        required_keys = [
+            "output_ids",
+            "generation_lengths",
+            "unpadded_sequence_lengths",
+            "logprobs",
+        ]
+        missing_keys = [key for key in required_keys if key not in combined]
+        if missing_keys:
+            raise ValueError(
+                f"Missing required keys for GenerationOutputSpec: {missing_keys}"
+            )
+
+        return combined
+
+    def generate_text(
+        self, data: BatchedDataDict[GenerationDatumSpec], greedy: bool = False
+    ) -> BatchedDataDict[GenerationOutputSpec]:
+        """Generate text responses using vLLM."""
+        assert isinstance(data, BatchedDataDict), (
+            f"data must be a BatchedDataDict, got type: {type(data)}"
+        )
+
+        # Check if async engine is enabled
+        if self.cfg["vllm_cfg"]["async_engine"]:
+            raise RuntimeError(
+                "generate_text cannot be used with async_engine=True. Use generate_text_async instead."
+            )
+
+        # Shard the data across the tied worker groups
+        dp_size = self.sharding_annotations.get_axis_size("data_parallel")
+        sharded_data: list[SlicedDataDict] = data.shard_by_batch_size(
+            dp_size, allow_uneven_shards=True
+        )
+        future_bundle = self.worker_group.run_all_workers_sharded_data(
+            "generate_text",
+            data=sharded_data,
+            in_sharded_axes=["data_parallel"],
+            replicate_on_axes=None,  # just run on tp rank 0
+            output_is_replicated=None,
+            common_kwargs={"greedy": greedy},
+        )
+
+        # Get results from the workers, respecting tied worker groups (only one result per tied worker group)
+        results = self.worker_group.get_all_worker_results(future_bundle)
+
+        # Combine results from all tied worker groups
+        combined: BatchedDataDict[GenerationOutputSpec] = BatchedDataDict.from_batches(
+            results, pad_value_dict={"output_ids": self.cfg["_pad_token_id"]}
+        )
+
+        # Verify the output has all required fields
+        required_keys = ["texts"]
+        missing_keys = [key for key in required_keys if key not in combined]
+        if missing_keys:
+            raise ValueError(
+                f"Missing required keys for GenerationOutputSpec: {missing_keys}"
+            )
+
+        return combined
+
+    async def _async_generate_base(
+        self,
+        data: BatchedDataDict[GenerationDatumSpec],
+        method_name: str,
+        data_validation_fn,
+        greedy: bool = False,
+    ) -> AsyncGenerator[tuple[int, BatchedDataDict[GenerationOutputSpec]], None]:
+        """Base async generation method that handles common worker management logic.
+
+        Args:
+            data: Input data for generation
+            method_name: Name of the worker method to call ('generate_async' or 'generate_text_async')
+            data_validation_fn: Function to validate input data
+            greedy: Whether to use greedy decoding
+
+        Yields:
+            Tuple of (original_index, BatchedDataDict containing generation result)
+        """
+        if not self.cfg["vllm_cfg"]["async_engine"]:
+            raise RuntimeError(
+                f"{method_name} can only be used when async_engine is enabled in vLLM config."
+            )
+
+        assert isinstance(data, BatchedDataDict), (
+            f"data must be a BatchedDataDict, got type: {type(data)}"
+        )
+
+        # Validate input data and handle empty case
+        if not data_validation_fn(data):
+            return
+
+        # Determine the leader worker for the current data parallel shard
+        leader_worker_idx = self.worker_group.get_dp_leader_worker_idx(
+            self.current_generate_dp_shard_idx
+        )
+
+        # Run the async method on the selected leader worker
+        worker_gen_proxy = self.worker_group.run_single_worker_single_data(
+            method_name=method_name,
+            worker_idx=leader_worker_idx,
+            data=data,
+            greedy=greedy,
+        )
+
+        # Increment the round-robin worker group index
+        self.current_generate_dp_shard_idx += 1
+        self.current_generate_dp_shard_idx %= self.worker_group.dp_size
+
+        # Create a queue to collect sample results from the worker as they complete
+        result_queue = asyncio.Queue()
+        finished = False
+
+        async def consume_worker_generator(worker_idx, worker_gen):
+            """Consume a single worker generator and put sample results in the queue."""
+            nonlocal finished
+            worker_name = f"Worker-{worker_idx}"
+            try:
+                async for sample_result_ref in worker_gen:
+                    sample_result = await sample_result_ref
+                    # sample_result is a tuple: (original_idx, BatchedDataDict)
+                    # Tag the result with worker index for downstream attribution
+                    original_idx, result_batch = sample_result
+                    # Use a length-one list so BatchedDataDict.from_batches can merge without shape errors
+                    result_batch["gen_leader_worker_idx"] = [int(worker_idx)]
+                    sample_result = (original_idx, result_batch)
+                    await result_queue.put(("sample", sample_result))
+            except Exception as e:
+                # Log the error before putting it in the queue for better debugging
+                import traceback
+
+                print(f"Exception in worker {worker_name}")
+                traceback.print_exc()
+                await result_queue.put(("error", e))
+            finally:
+                finished = True
+                await result_queue.put(("worker_done", None))
+
+        # Start the task to consume the worker generator
+        worker_task = asyncio.create_task(
+            consume_worker_generator(leader_worker_idx, worker_gen_proxy)
+        )
+
+        # Yield sample results as they become available from the worker
+        timeout_seconds = float(
+            os.environ.get("NRL_VLLM_ASYNC_TIMEOUT_SECONDS", "600")
+        )  # Default 10 minutes
+
+        while not finished:
+            try:
+                msg_type, item = await asyncio.wait_for(
+                    result_queue.get(), timeout=timeout_seconds
+                )
+            except asyncio.TimeoutError:
+                print(
+                    f"Timeout waiting for results after {timeout_seconds}s. Worker has not finished."
+                )
+                print(
+                    f"For longer sequences, increase the timeout by setting: export NRL_VLLM_ASYNC_TIMEOUT_SECONDS={int(timeout_seconds * 2)}"
+                )
+                # Cancel the task
+                if not worker_task.done():
+                    worker_task.cancel()
+                await asyncio.gather(worker_task, return_exceptions=True)
+                raise RuntimeError(
+                    f"Timeout waiting for worker results after {timeout_seconds}s. "
+                    f"For longer sequences, increase timeout by setting: export NRL_VLLM_ASYNC_TIMEOUT_SECONDS={int(timeout_seconds * 2)}"
+                )
+
+            if msg_type == "sample":
+                # Yield individual sample result immediately
+                yield item
+            elif msg_type == "error":
+                # Cancel the task and propagate error
+                if not worker_task.done():
+                    worker_task.cancel()
+                await asyncio.gather(worker_task, return_exceptions=True)
+                raise item
+            elif msg_type == "worker_done":
+                # Worker finished, just continue the loop
+                pass
+            else:
+                raise RuntimeError(f"Unexpected message type: {msg_type}")
+
+        # Verify the task is actually done
+        assert worker_task.done(), (
+            f"Worker task {leader_worker_idx} should be done but isn't"
+        )
+
+    async def generate_text_async(
+        self, data: BatchedDataDict[GenerationDatumSpec], greedy: bool = False
+    ) -> AsyncGenerator[tuple[int, BatchedDataDict[GenerationOutputSpec]], None]:
+        """Generate text responses asynchronously, yielding results as they are ready.
+
+        Args:
+            data: BatchedDataDict containing prompts with text strings
+            greedy: Whether to use greedy decoding instead of sampling
+
+        Yields:
+            Tuple of (original_index, BatchedDataDict containing single text response)
+        """
+
+        def validate_text_data(data):
+            if len(data["prompts"]) == 0:
+                return False  # Return False for empty case to trigger early return
+            return True
+
+        async for result in self._async_generate_base(
+            data, "generate_text_async", validate_text_data, greedy
+        ):
+            yield result
+
+    async def generate_async(
+        self, data: BatchedDataDict[GenerationDatumSpec], greedy: bool = False
+    ) -> AsyncGenerator[tuple[int, BatchedDataDict[GenerationOutputSpec]], None]:
+        """Generate responses asynchronously, yielding individual samples as they complete.
+
+        This method provides per-sample streaming across all workers, yielding each
+        sample result as soon as it's ready, regardless of which worker processed it.
+        """
+
+        def validate_generate_data(data):
+            if "input_ids" not in data or "input_lengths" not in data:
+                raise AssertionError(
+                    "input_ids and input_lengths are required in data for vLLM generation"
+                )
+            if len(data["input_ids"]) == 0:
+                return False  # Return False for empty case to trigger early return
+            return True
+
+        async for result in self._async_generate_base(
+            data, "generate_async", validate_generate_data, greedy
+        ):
+            yield result
+
+    def prepare_for_generation(self, *args: Any, **kwargs: Any) -> bool:
+        """Wake workers up for colocated inference."""
+        # non-colocated no need to wake up
+        if not self.cfg["colocated"]["enabled"]:
+            return True
+
+        try:
+            # Choose the appropriate method based on async_engine setting
+            method_name = (
+                "wake_up_async" if self.cfg["vllm_cfg"]["async_engine"] else "wake_up"
+            )
+            # Use run_all_workers_single_data for methods that don't need data
+            futures = self.worker_group.run_all_workers_single_data(
+                method_name,
+                run_rank_0_only_axes=["tensor_parallel", "pipeline_parallel"],
+                **kwargs,
+            )
+            # Wait for all futures to complete
+            results = ray.get(futures)
+            return all(result for result in results if result is not None)
+        except Exception as e:
+            print(f"Error during policy preparation: {e}")
+            return False
+
+    def finish_generation(self, *args: Any, **kwargs: Any) -> bool:
+        """Sleep workers and reset prefix cache."""
+        try:
+            # Choose the appropriate method based on setting
+            # non-colocated only needs reset prefix cache, no need to sleep.
+            if self.cfg["colocated"]["enabled"]:
+                method_name = (
+                    "sleep_async" if self.cfg["vllm_cfg"]["async_engine"] else "sleep"
+                )
+            else:
+                method_name = (
+                    "reset_prefix_cache_async"
+                    if self.cfg["vllm_cfg"]["async_engine"]
+                    else "reset_prefix_cache"
+                )
+            # Use run_all_workers_single_data for methods that don't need data
+            futures = self.worker_group.run_all_workers_single_data(
+                method_name,
+                run_rank_0_only_axes=["tensor_parallel", "pipeline_parallel"],
+            )
+            # Wait for all futures to complete
+            results = ray.get(futures)
+            return all(result for result in results if result is not None)
+        except Exception as e:
+            print(f"Error during policy preparation: {e}")
+            return False
+
+    def shutdown(self) -> bool:
+        """Shut down all vLLM workers and clean up resources."""
+        try:
+            # Use the worker group's shutdown method with the worker's cleanup method
+            return self.worker_group.shutdown(cleanup_method="shutdown")
+        except Exception as e:
+            print(f"Error during policy shutdown: {e}")
+            return False
+
+    def prepare_refit_info(self, state_dict_info: dict[str, Any]) -> None:
+        """Prepare the info for refit."""
+        # Choose the appropriate method based on async_engine setting
+        method_name = (
+            "prepare_refit_info_async"
+            if self.cfg["vllm_cfg"]["async_engine"]
+            else "prepare_refit_info"
+        )
+
+        # Use run_all_workers_single_data to send data to all workers
+        futures = self.worker_group.run_all_workers_single_data(
+            method_name,
+            state_dict_info=state_dict_info,
+            run_rank_0_only_axes=["tensor_parallel", "pipeline_parallel"],
+        )
+
+        # Wait for all futures to complete
+        ray.get(futures)
+
+    def update_weights_via_ipc_zmq(self) -> list[ray.ObjectRef]:
+        """Update weights of the policy using IPC handles via ZMQ socket."""
+        if not self.worker_group or not self.worker_group.workers:
+            raise RuntimeError("Worker group is not initialized")
+
+        # Choose the appropriate method based on async_engine setting
+        method_name = (
+            "update_weights_via_ipc_zmq_async"
+            if self.cfg["vllm_cfg"]["async_engine"]
+            else "update_weights_via_ipc_zmq"
+        )
+
+        # Use run_all_workers_single_data since no data needs to be passed
+        futures = self.worker_group.run_all_workers_single_data(
+            method_name,
+            run_rank_0_only_axes=["tensor_parallel", "pipeline_parallel"],
+        )
+
+        # this function should co-work with lm_policy, so we should wait for all futures to complete outside
+        return futures
+
+    def update_weights_from_collective(self) -> list[ray.ObjectRef]:
+        """Update weights of the policy using collective communication."""
+        if not self.worker_group or not self.worker_group.workers:
+            raise RuntimeError("Worker group is not initialized")
+
+        # Choose the appropriate method based on async_engine setting
+        method_name = (
+            "update_weights_from_collective_async"
+            if self.cfg["vllm_cfg"]["async_engine"]
+            else "update_weights_from_collective"
+        )
+
+        # Use run_all_workers_single_data for methods that don't need data
+        futures = self.worker_group.run_all_workers_single_data(
+            method_name,
+            run_rank_0_only_axes=["tensor_parallel", "pipeline_parallel"],
+        )
+
+        # this function should co-work with lm_policy, so we should wait for all futures to complete outside
+        return futures
+
+    def start_gpu_profiling(self) -> None:
+        """Start GPU profiling."""
+        futures = self.worker_group.run_all_workers_single_data("start_gpu_profiling")
+        ray.get(futures)
+
+    def stop_gpu_profiling(self) -> None:
+        """Stop GPU profiling."""
+        futures = self.worker_group.run_all_workers_single_data("stop_gpu_profiling")
+        ray.get(futures)
+
+    def __del__(self) -> None:
+        """Shuts down the worker groups when the object is deleted or is garbage collected.
+
+        This is an extra safety net in case the user forgets to call shutdown() and the pointer to
+        the object is lost due to leaving a function scope. It's always recommended that the
+        user calls shutdown().
+        """
+        self.shutdown()
+
+    def invalidate_kv_cache(self) -> bool:
+        """Invalidate reusable caches in vLLM (e.g., prefix/KV cache) after weight updates.
+
+        For async_engine, calls reset_prefix_cache_async on workers. For sync, calls reset_prefix_cache.
+        Returns True if all workers report success.
+        """
+        try:
+            method_name = (
+                "reset_prefix_cache_async"
+                if self.cfg["vllm_cfg"]["async_engine"]
+                else "reset_prefix_cache"
+            )
+            futures = self.worker_group.run_all_workers_single_data(
+                method_name,
+                run_rank_0_only_axes=["tensor_parallel", "pipeline_parallel"],
+            )
+            results = ray.get(futures)
+            return all(result for result in results if result is not None)
+        except Exception as e:
+            print(f"Error invalidating vLLM caches: {e}")
+            return False
diff --git a/nemo_rl/models/generation/vllm/vllm_worker.py b/nemo_rl/models/generation/vllm/vllm_worker.py
new file mode 100644
index 0000000000..a97d68e669
--- /dev/null
+++ b/nemo_rl/models/generation/vllm/vllm_worker.py
@@ -0,0 +1,770 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import copy
+import gc
+import os
+import sys
+from typing import Any, Optional, cast
+
+import ray
+import torch
+
+from nemo_rl.distributed.batched_data_dict import BatchedDataDict
+from nemo_rl.distributed.worker_group_utils import get_nsight_config_if_pattern_matches
+from nemo_rl.models.generation.interfaces import (
+    GenerationDatumSpec,
+    GenerationOutputSpec,
+    verify_right_padding,
+)
+from nemo_rl.models.generation.vllm.config import VllmConfig
+from nemo_rl.models.generation.vllm.utils import format_prompt_for_vllm_generation
+from nemo_rl.models.huggingface.common import ModelFlag
+from nemo_rl.models.policy.utils import is_vllm_v1_engine_enabled
+from nemo_rl.utils.nsys import wrap_with_nvtx_name
+
+
+# Use a base class to share some functions to avoid code duplication.
+class BaseVllmGenerationWorker:
+    def __repr__(self) -> str:
+        """Customizes the actor's prefix in the Ray logs.
+
+        This makes it easier to identify which worker is producing specific log messages.
+        """
+        return f"{self.__class__.__name__}"
+
+    @staticmethod
+    def configure_worker(
+        num_gpus: int | float, bundle_indices: Optional[tuple[int, list[int]]] = None
+    ) -> tuple[dict[str, Any], dict[str, str], dict[str, Any]]:
+        """Provides complete worker configuration for vLLM tensor and pipeline parallelism.
+
+        This method configures the worker based on its role in tensor and pipeline parallelism,
+        which is determined directly from the bundle_indices parameter.
+
+        Args:
+            num_gpus: Original GPU allocation for this worker based on the placement group
+            bundle_indices: Tuple of (node_idx, local_bundle_indices) for parallelism (if applicable)
+
+        Returns:
+            tuple with complete worker configuration:
+              - 'resources': Resource allocation (e.g., num_gpus)
+              - 'env_vars': Environment variables for this worker
+              - 'init_kwargs': Parameters to pass to __init__ of the worker
+        """
+        # Initialize configuration
+        resources: dict[str, Any] = {"num_gpus": num_gpus}
+        init_kwargs: dict[str, Any] = {}
+        env_vars: dict[str, str] = {}
+
+        local_bundle_indices = None
+        if bundle_indices is not None:
+            node_idx = bundle_indices[0]
+            local_bundle_indices = bundle_indices[1]
+            init_kwargs["bundle_indices"] = local_bundle_indices
+
+            """
+            compute a unique seed from the node_idx and bundle_indices:
+            node_idx = 0, bundle_indices = [0, 1, 2, 3] -> seed = 0*1024 + 0
+            node_idx = 0, bundle_indices = [4, 5, 6, 7] -> seed = 0*1024 + 1
+            node_idx = 1, bundle_indices = [0, 1, 2, 3] -> seed = 1*1024 + 0
+            node_idx = 1, bundle_indices = [4, 5, 6, 7] -> seed = 1*1024 + 1
+            """
+            # For single worker groups, use a simpler seed calculation
+            if len(local_bundle_indices) == 1:
+                seed = node_idx * 1024 + local_bundle_indices[0]
+            else:
+                # For parallel groups, use the original calculation
+                bundle_id = local_bundle_indices[0] // len(local_bundle_indices)
+                seed = node_idx * 1024 + bundle_id
+
+            init_kwargs["seed"] = seed
+            # Need to give each DP group its own vllm cache to address:
+            # https://github.com/vllm-project/vllm/issues/18851
+            env_vars["VLLM_CACHE_ROOT"] = os.path.expanduser(f"~/.cache/vllm_{seed}")
+
+        # Check if this worker is part of a parallel group (TP or TP+PP).
+        # A worker is part of a parallel group if it's a secondary member (local_bundle_indices is None)
+        # or if it's a primary member of a group with multiple workers.
+        is_part_of_parallel_workers = (
+            local_bundle_indices is not None and len(local_bundle_indices) > 1
+        ) or local_bundle_indices is None
+
+        if is_part_of_parallel_workers:
+            # Ray + vllm likes to manage GPU assignment internally for parallel groups
+            resources["num_gpus"] = 0
+            env_vars["RAY_EXPERIMENTAL_NOSET_CUDA_VISIBLE_DEVICES"] = "1"
+            init_kwargs["fraction_of_gpus"] = num_gpus
+
+        env_vars["VLLM_ENABLE_V1_MULTIPROCESSING"] = "0"
+        # Skip vllm P2P check and rely on driver to report peer to peer capability.
+        env_vars["VLLM_SKIP_P2P_CHECK"] = "1"
+
+        return resources, env_vars, init_kwargs
+
+    def __init__(
+        self,
+        config: VllmConfig,
+        bundle_indices: Optional[list[int]] = None,
+        fraction_of_gpus: float = 1.0,
+        seed: Optional[int] = None,
+    ):
+        """Initialize a vLLM worker for distributed inference.
+
+        Args:
+            config: Configuration dictionary for the policy
+            bundle_indices: List of local bundle indices within a node for parallelism.
+                          Only needed for the first worker in each tied worker group.
+            fraction_of_gpus: Fraction of GPUs to use for this worker
+            seed: Random seed for initialization
+        """
+        self.cfg = config
+
+        self.model_name = self.cfg["model_name"]
+        self.tensor_parallel_size = self.cfg["vllm_cfg"]["tensor_parallel_size"]
+        self.pipeline_parallel_size = self.cfg["vllm_cfg"]["pipeline_parallel_size"]
+        self.expert_parallel_size = self.cfg["vllm_cfg"]["expert_parallel_size"]
+        self.enable_expert_parallel = self.expert_parallel_size > 1
+        self.gpu_memory_utilization = self.cfg["vllm_cfg"]["gpu_memory_utilization"]
+        self.precision = self.cfg["vllm_cfg"]["precision"]
+        self.fraction_of_gpus = fraction_of_gpus
+        self.is_model_owner = bundle_indices is not None
+
+        # Store the Python executable being used by this worker
+        self.py_executable = sys.executable
+
+        # Skip model loading if we're not the model owner
+        if not self.is_model_owner:
+            self.llm = None
+            self.tokenizer = None
+            self.rank = 0
+            self.world_size = 1
+            return
+
+        # In Ray+vLLM setup, each worker process considers itself rank 0
+        # vLLM handles the parallelism internally through Ray
+        self.rank = 0
+        self.world_size = 1
+
+        # Monkey patch for vLLM to ensure RAY_ADDRESS is set in Ray actors.
+        try:
+            from vllm.logger import init_logger
+
+            logger = init_logger("vllm_patch")
+
+            def _patch_vllm_init_workers_ray():
+                """Patch the vLLM ray_distributed_executor.py file.
+
+                1. Pass custom runtime_env in _init_workers_ray call.
+                    - This allows passing custom py_executable to worker initialization.
+                2. Add NCCL_CUMEM_ENABLE and NCCL_NVLS_ENABLE to vLLM ADDITIONAL_ENV_VARS.
+                    - This is a workaround to fix async vllm in some scenarios.
+                    - See https://github.com/NVIDIA-NeMo/RL/pull/898 for more details.
+                """
+                try:
+                    import vllm.executor.ray_distributed_executor as ray_executor_module
+
+                    file_to_patch = ray_executor_module.__file__
+
+                    with open(file_to_patch, "r") as f:
+                        content = f.read()
+
+                    old_lines = [
+                        "self._init_workers_ray(placement_group)",
+                        'ADDITIONAL_ENV_VARS = {"HF_TOKEN", "HUGGING_FACE_HUB_TOKEN"}',
+                    ]
+
+                    new_lines = [
+                        f'self._init_workers_ray(placement_group, runtime_env={{"py_executable": "{self.py_executable}"}})',
+                        'ADDITIONAL_ENV_VARS = {"HF_TOKEN", "HUGGING_FACE_HUB_TOKEN", "NCCL_CUMEM_ENABLE", "NCCL_NVLS_ENABLE", "RAY_ENABLE_UV_RUN_RUNTIME_ENV"}',
+                    ]
+
+                    need_replace = False
+                    for old_line, new_line in zip(old_lines, new_lines):
+                        if new_line in content or old_line not in content:
+                            continue
+                        content = content.replace(old_line, new_line)
+                        need_replace = True
+
+                    if not need_replace:
+                        return
+
+                    # Write back the patched content
+                    with open(file_to_patch, "w") as f:
+                        f.write(content)
+
+                except (ImportError, FileNotFoundError, PermissionError):
+                    # Allow failures gracefully
+                    pass
+
+            _patch_vllm_init_workers_ray()
+            logger.info("Successfully patched vllm _init_workers_ray.")
+
+        except (ImportError, AttributeError):
+            # vllm not installed or has a different structure, skipping patch.
+            pass
+
+        try:
+            import vllm
+
+            self.SamplingParams = vllm.SamplingParams
+        except ImportError:
+            raise ImportError(
+                "vLLM is not installed. Please check that the py_executable in the runtime_env of VllmGenerationWorker "
+                "covers the vllm dependency. You may have to update nemo_rl/distributed/ray_actor_environment_registry.py. "
+                "This error can also happen if the venv creation was aborted or errored out in the middle. In that case, "
+                "please run at least once with the environment variable NRL_FORCE_REBUILD_VENVS=true set to force the rebuild of the environment."
+            )
+        vllm_kwargs: dict[str, Any] = copy.deepcopy(self.cfg.get("vllm_kwargs", {}))
+
+        # Calculate total parallel size (TP * PP)
+        model_parallel_size = self.tensor_parallel_size * self.pipeline_parallel_size
+
+        # Special handling for parallel case (either TP or PP or both)
+        if model_parallel_size > 1:
+            # Configure vLLM for tensor/pipeline parallelism within Ray
+            # Reset CUDA_VISIBLE_DEVICES to allow vLLM to manage GPU assignment
+            os.environ.pop("CUDA_VISIBLE_DEVICES", None)
+            os.environ["VLLM_RAY_PER_WORKER_GPUS"] = str(
+                self.fraction_of_gpus / model_parallel_size
+            )
+
+            # Set bundle indices for parallel workers
+            bundle_indices_str = ",".join(map(str, bundle_indices))
+            os.environ["VLLM_RAY_BUNDLE_INDICES"] = bundle_indices_str
+            print(
+                f"VLLM_RAY_BUNDLE_INDICES environment variable set to: {os.environ.get('VLLM_RAY_BUNDLE_INDICES')}"
+            )
+
+            # Use Ray for distributed execution in parallel mode
+            vllm_kwargs["distributed_executor_backend"] = "ray"
+        else:
+            # For non-parallel mode, explicitly set executor to None to avoid Ray issues
+            vllm_kwargs["distributed_executor_backend"] = None
+
+        os.environ["VLLM_USE_V1"] = "1" if is_vllm_v1_engine_enabled() else "0"
+        os.environ["VLLM_ALLOW_INSECURE_SERIALIZATION"] = "1"
+
+        # We should use vLLM DP if ep_size > tp_size since EP_SIZE = DP_SIZE * TP_SIZE in vLLM.
+        # See details in https://github.com/vllm-project/vllm/blob/main/examples/offline_inference/data_parallel.py
+        if self.expert_parallel_size > self.tensor_parallel_size:
+            # set vLLM DP rank
+            world_size = int(os.environ["VLLM_DP_SIZE"]) * model_parallel_size
+            rank = int(os.environ["RANK"]) % world_size
+            os.environ["VLLM_DP_RANK"] = str(rank // model_parallel_size)
+            os.environ["VLLM_DP_RANK_LOCAL"] = str((rank % 8) // model_parallel_size)
+            # set vLLM DP address and port
+            leader_rank = int(os.environ["RANK"]) // world_size * world_size
+            addr_list = eval(os.environ["AVAILABLE_ADDR_LIST"])
+            port_list = eval(os.environ["AVAILABLE_PORT_LIST"])
+            os.environ["VLLM_DP_MASTER_IP"] = addr_list[leader_rank]
+            os.environ["VLLM_DP_MASTER_PORT"] = str(port_list[leader_rank])
+
+        load_format = self.cfg["vllm_cfg"]["load_format"]
+        if ModelFlag.VLLM_LOAD_FORMAT_AUTO.matches(self.model_name):
+            load_format = "auto"
+
+        if (
+            len(get_nsight_config_if_pattern_matches("vllm_generation_worker")) > 0
+            and vllm_kwargs["distributed_executor_backend"] == "ray"
+        ):
+            logger.warning(
+                "Nsight profiling is enabled for vllm generation worker through the vllm ray distributed executor. "
+                "The nsight command-line args and output file names are automatically picked by the ray distributed "
+                "executor. Refer to https://github.com/vllm-project/vllm/blob/7e3a8dc90670fd312ce1e0d4eba9bf11c571e3ad/vllm/executor/ray_distributed_executor.py#L136 "
+                "for more information."
+            )
+            vllm_kwargs["ray_workers_use_nsight"] = True
+
+        if self.cfg["vllm_cfg"]["precision"] == "fp8":
+            from nemo_rl.models.generation.fp8 import init_fp8
+
+            fp8_kwargs = init_fp8(
+                self.cfg["vllm_cfg"], self.model_name, model_parallel_size
+            )
+            vllm_kwargs.update(fp8_kwargs)
+            # overriden by quant config, however vllm complains if this not passed
+            self.precision = "bfloat16"
+
+        if not isinstance(vllm_kwargs.get("hf_overrides"), dict):
+            vllm_kwargs["hf_overrides"] = {}
+        vllm_kwargs["hf_overrides"].update(
+            self.cfg["vllm_cfg"].get("hf_overrides", {}) or {}
+        )
+
+        llm_kwargs = dict(
+            model=self.model_name,
+            served_model_name=self.model_name,
+            load_format=load_format,
+            # Set in nemo_rl.models.generation.configure_generation_config
+            skip_tokenizer_init=self.cfg["vllm_cfg"]["skip_tokenizer_init"],
+            tensor_parallel_size=self.tensor_parallel_size,
+            pipeline_parallel_size=self.pipeline_parallel_size,
+            enable_expert_parallel=self.enable_expert_parallel,
+            gpu_memory_utilization=self.gpu_memory_utilization,
+            enable_prefix_caching=torch.cuda.get_device_capability()[0] >= 8,
+            dtype=self.precision,
+            seed=seed,
+            enforce_eager=self.cfg["vllm_cfg"]["enforce_eager"],
+            max_model_len=self.cfg["vllm_cfg"]["max_model_len"],
+            trust_remote_code=True,
+            worker_extension_cls="nemo_rl.models.generation.vllm.vllm_backend.VllmInternalWorkerExtension",
+            enable_sleep_mode=True,
+            disable_log_stats=True,
+            logprobs_mode="processed_logprobs",
+            **vllm_kwargs,
+        )
+
+        self._create_engine(llm_kwargs)
+
+        # will be initialized in post_init
+        # used in update_weights_from_ipc_handles
+        self.vllm_device_ids = None
+
+    def llm(self):
+        return self.llm
+
+    def is_alive(self):
+        """Check if the worker is alive."""
+        return True
+
+    def _merge_stop_strings(self, batch_stop_strings):
+        stop_set: set[str] = set()
+
+        if self.cfg.get("stop_strings"):
+            stop_set.update(self.cfg["stop_strings"])
+
+        if batch_stop_strings is not None:
+            for sample_ss in batch_stop_strings:
+                if sample_ss:
+                    stop_set.update(sample_ss)
+
+        return list(stop_set) if stop_set else None
+
+    def _build_sampling_params(
+        self,
+        *,
+        greedy: bool,
+        stop_strings,
+        max_new_tokens: Optional[int] = None,
+    ):
+        top_k_cfg = self.cfg["top_k"]
+        top_k_val = 1 if greedy else (top_k_cfg if top_k_cfg is not None else -1)
+
+        temperature = 0.0 if greedy else self.cfg["temperature"]
+
+        max_tokens = (
+            max_new_tokens if max_new_tokens is not None else self.cfg["max_new_tokens"]
+        )
+
+        return self.SamplingParams(
+            temperature=temperature,
+            top_p=self.cfg["top_p"],
+            top_k=top_k_val,
+            max_tokens=max_tokens,
+            logprobs=0,
+            stop_token_ids=self.cfg["stop_token_ids"],
+            stop=stop_strings,
+            include_stop_str_in_output=True,
+        )
+
+    def start_gpu_profiling(self) -> None:
+        """Start GPU profiling."""
+        torch.cuda.profiler.start()
+        if self.llm is not None:
+            self.llm.collective_rpc("start_gpu_profiling", args=tuple())
+
+    def stop_gpu_profiling(self) -> None:
+        """Stop GPU profiling."""
+        torch.cuda.profiler.stop()
+        if self.llm is not None:
+            self.llm.collective_rpc("stop_gpu_profiling", args=tuple())
+
+
+@ray.remote(
+    runtime_env={**get_nsight_config_if_pattern_matches("vllm_generation_worker")}
+)  # pragma: no cover
+class VllmGenerationWorker(BaseVllmGenerationWorker):
+    def _create_engine(self, llm_kwargs: dict[str, Any]) -> None:
+        import vllm
+
+        self.llm = vllm.LLM(**llm_kwargs)
+
+    def post_init(self):
+        self.vllm_device_ids = self.report_device_id()
+
+    def init_collective(
+        self,
+        rank_prefix: int,
+        ip: str,
+        port: int,
+        world_size: int,
+        train_world_size: int,
+    ) -> None:
+        self.llm.collective_rpc(
+            "init_collective",
+            args=(
+                rank_prefix,
+                ip,
+                port,
+                world_size,
+                train_world_size,
+            ),
+        )
+
+    @wrap_with_nvtx_name("vllm_genertion_worker/generate")
+    def generate(
+        self, data: BatchedDataDict[GenerationDatumSpec], greedy: bool = False
+    ) -> BatchedDataDict[GenerationOutputSpec]:
+        """Generate a batch of data using vLLM generation.
+
+        Args:
+            data: BatchedDataDict containing input_ids and input_lengths tensors
+            greedy: Whether to use greedy decoding instead of sampling
+
+        Returns:
+            BatchedDataDict conforming to GenerationOutputSpec:
+                - output_ids: input + generated token IDs with proper padding
+                - logprobs: Log probabilities for tokens
+                - generation_lengths: Lengths of each response
+                - unpadded_sequence_lengths: Lengths of each input + generated sequence
+        """
+        # Handle empty input case
+        if len(data["input_ids"]) == 0:
+            # Return empty BatchedDataDict with all required fields
+            return BatchedDataDict[GenerationOutputSpec](
+                {
+                    "output_ids": torch.zeros((0, 0), dtype=torch.long),
+                    "logprobs": torch.zeros((0, 0), dtype=torch.float),
+                    "generation_lengths": torch.zeros(0, dtype=torch.long),
+                    "unpadded_sequence_lengths": torch.zeros(0, dtype=torch.long),
+                }
+            )
+
+        input_ids = data["input_ids"]
+        input_lengths = data["input_lengths"]
+        batch_stop_strings: list[list[str]] = data.get("stop_strings", [])
+        stop_strings = self._merge_stop_strings(batch_stop_strings)
+        sampling_params = self._build_sampling_params(
+            greedy=greedy,
+            stop_strings=stop_strings,
+        )
+
+        # verify inputs have correct padding
+        verify_right_padding(data, pad_value=self.cfg["_pad_token_id"])
+
+        # Original input length with padding
+        padded_input_length = input_ids.size(1)
+
+        # Convert inputs to vLLM format
+        prompts = format_prompt_for_vllm_generation(data)
+
+        # Generate outputs
+        assert self.llm is not None, (
+            "Attempting to generate with either an uninitialized vLLM or non-model-owner"
+        )
+        outputs = self.llm.generate(prompts, sampling_params)
+
+        # Process the outputs - but preserve the original input padding structure
+        output_ids_list = []
+        logprobs_list = []
+        generation_lengths = []
+        unpadded_sequence_lengths = []
+        max_length = 0
+        for output in outputs:
+            max_length = max(max_length, len(output.outputs[0].token_ids))
+
+        for i, output in enumerate(outputs):
+            # Extract generated tokens
+            sequence_length = input_lengths[i]
+            generation = output.outputs[0]
+            generated_tokens = list(generation.token_ids)
+
+            # Calculate total sequence length (original input length + generated tokens)
+            total_length = padded_input_length + max_length
+
+            # Create a new tensor with the right size and fill with padding token
+            full_output = torch.full(
+                (total_length,), self.cfg["_pad_token_id"], dtype=input_ids.dtype
+            )
+
+            # Copy original input (with padding) into the beginning
+            full_output[:sequence_length] = input_ids[i][:sequence_length]
+
+            # Add generated tokens after the original input
+            full_output[sequence_length : sequence_length + len(generated_tokens)] = (
+                torch.tensor(generated_tokens)
+            )
+
+            output_ids_list.append(full_output)
+            full_logprobs = torch.zeros(total_length, dtype=torch.float32)
+            if hasattr(generation, "logprobs") and generation.logprobs:
+                try:
+                    for idx, logprob_dict in enumerate(generation.logprobs):
+                        if logprob_dict:
+                            position = sequence_length + idx
+                            full_logprobs[position] = next(iter(logprob_dict.items()))[
+                                1
+                            ].logprob
+                except Exception:
+                    import traceback
+
+                    traceback.print_exc()
+
+            logprobs_list.append(full_logprobs)
+
+            response_length = sequence_length + len(generated_tokens)
+            generation_lengths.append(len(generated_tokens))
+            unpadded_sequence_lengths.append(response_length)
+            assert response_length <= self.llm.llm_engine.model_config.max_model_len, (
+                f"response_length={response_length} > max_model_len={self.llm.llm_engine.model_config.max_model_len}, which should not happen. Please check this behavior in isolation by running `uv run --extra vllm tools/model_diagnostics/1.max_model_len_respected.py {self.llm.llm_engine.model_config.model}` and raise this issue with the vllm team."
+            )
+
+        # Create return data conforming to GenerationOutputSpec
+        output_ids = torch.stack(output_ids_list)
+        logprobs = torch.stack(logprobs_list)
+
+        return_data = BatchedDataDict[GenerationOutputSpec](
+            {
+                "output_ids": output_ids,
+                "logprobs": logprobs,
+                "generation_lengths": torch.tensor(
+                    generation_lengths, dtype=torch.long
+                ),
+                "unpadded_sequence_lengths": torch.tensor(
+                    unpadded_sequence_lengths, dtype=torch.long
+                ),
+            }
+        )
+
+        return return_data
+
+    @wrap_with_nvtx_name("vllm_genertion_worker/generate_text")
+    def generate_text(
+        self, data: BatchedDataDict[GenerationDatumSpec], greedy: bool = False
+    ) -> BatchedDataDict[GenerationOutputSpec]:
+        """Generate text responses using vLLM generation.
+
+        Args:
+            data: BatchedDataDict containing prompts with text strings
+            greedy: Whether to use greedy decoding instead of sampling
+
+        Returns:
+            BatchedDataDict containing:
+                - texts: List of generated text responses
+        """
+        # Check if async engine is enabled
+        if self.cfg["vllm_cfg"]["async_engine"]:
+            raise RuntimeError(
+                "generate_text cannot be used with async_engine=True. Use generate_text_async instead."
+            )
+
+        # Extract stop_strings if provided, else use default from config
+        batch_stop_strings: list[list[str] | None] = data.get(
+            "stop_strings", [self.cfg.get("stop_strings")] * len(data["prompts"])
+        )
+
+        # This function requires all generations have the same stop strings, so we collect all here
+        stop_strings: set[str] = set()
+        for sample_stop_strings in batch_stop_strings:
+            if sample_stop_strings:
+                stop_strings.update(sample_stop_strings)
+
+        # Add default stop strings from config
+        if self.cfg.get("stop_strings", None):
+            stop_strings.update(self.cfg["stop_strings"])
+
+        stop_strings = list(stop_strings) if len(stop_strings) > 0 else None
+
+        # Read generation parameters from config
+        top_k = self.cfg["top_k"] if self.cfg["top_k"] is not None else -1
+        sampling_params = self.SamplingParams(
+            temperature=self.cfg["temperature"] if not greedy else 0,
+            top_p=self.cfg["top_p"],
+            top_k=top_k if not greedy else 1,
+            max_tokens=self.cfg["max_new_tokens"],
+            stop_token_ids=self.cfg["stop_token_ids"],
+            stop=stop_strings,
+            include_stop_str_in_output=True,  # returning stop strings like hf
+        )
+
+        # Generate outputs
+        assert self.llm is not None, (
+            "Attempting to generate with either an uninitialized vLLM or non-model-owner"
+        )
+        outputs = self.llm.generate(data["prompts"], sampling_params)
+        texts = [output.outputs[0].text for output in outputs]
+
+        # Convert to BatchedDataDict
+        return_data: BatchedDataDict[GenerationOutputSpec] = BatchedDataDict(
+            {"texts": texts}
+        )
+        return return_data
+
+    def report_device_id(self) -> list[str]:
+        """Report device ID from the vLLM worker."""
+        assert self.llm is not None, (
+            "Attempting to report device id with either an uninitialized vLLM or non-model-owner"
+        )
+
+        if self.cfg["vllm_cfg"]["async_engine"]:
+            raise RuntimeError(
+                "report_device_id cannot be used with async_engine=True. Use report_device_id_async instead."
+            )
+
+        list_of_worker_results = self.llm.collective_rpc(
+            "report_device_id", args=tuple()
+        )
+        return cast(list[str], list_of_worker_results)
+
+    def prepare_refit_info(self, state_dict_info: dict[str, Any]) -> None:
+        """Prepare the info for refit."""
+        self.llm.collective_rpc("prepare_refit_info", args=(state_dict_info,))
+
+    @wrap_with_nvtx_name("vllm_genertion_worker/update_weights_via_ipc_zmq")
+    def update_weights_via_ipc_zmq(self) -> bool:
+        """Update weights from IPC handles via ZMQ socket."""
+        try:
+            assert self.llm is not None, (
+                "Attempting to update weights with either an uninitialized vLLM or non-model-owner"
+            )
+
+            if self.cfg["vllm_cfg"]["async_engine"]:
+                raise RuntimeError(
+                    "update_weights_via_ipc_zmq cannot be used with async_engine=True. Use update_weights_via_ipc_zmq_async instead."
+                )
+
+            result_or_coro = self.llm.collective_rpc(
+                "update_weights_via_ipc_zmq",
+                args=tuple(),
+            )
+            worker_result = result_or_coro[0]
+
+            if not worker_result:
+                print(
+                    f"Error: Worker failed to update weights. Result: {worker_result}"
+                )
+                return False
+            return True
+        except Exception as e:
+            print(f"Exception during collective_rpc for weight update: {e}")
+            import traceback
+
+            traceback.print_exc()
+            return False
+
+    @wrap_with_nvtx_name("vllm_genertion_worker/update_weights_from_collective")
+    def update_weights_from_collective(self) -> bool:
+        """Update the model weights from collective communication."""
+        try:
+            assert self.llm is not None, (
+                "Attempting to update weights with either an uninitialized vLLM or non-model-owner"
+            )
+
+            if self.cfg["vllm_cfg"]["async_engine"]:
+                raise RuntimeError(
+                    "update_weights_from_collective can only be used with async_engine=False. Use update_weights_from_collective_async instead."
+                )
+
+            result_or_coro = self.llm.collective_rpc(
+                "update_weights_from_collective", args=tuple()
+            )
+            worker_result = result_or_coro[0]
+
+            if not worker_result:
+                print(
+                    f"Error: Worker failed to update weights. Result: {worker_result}"
+                )
+                return False
+            return True
+        except Exception as e:
+            print(f"Exception during collective_rpc for weight update: {e}")
+            import traceback
+
+            traceback.print_exc()
+            return False
+
+    def reset_prefix_cache(self):
+        """Reset the prefix cache of vLLM engine."""
+        assert self.llm is not None, (
+            "Attempting to reset prefix cache with either an uninitialized vLLM or non-model-owner"
+        )
+
+        if self.cfg["vllm_cfg"]["async_engine"]:
+            raise RuntimeError(
+                "reset_prefix_cache can only be used with async_engine=False. Use reset_prefix_cache_async instead."
+            )
+
+        self.llm.llm_engine.reset_prefix_cache()
+        gc.collect()
+        torch.cuda.empty_cache()
+
+    def sleep(self):
+        """Put the vLLM engine to sleep."""
+        assert self.llm is not None, (
+            "Attempting to sleep with either an uninitialized vLLM or non-model-owner"
+        )
+
+        if self.cfg["vllm_cfg"]["async_engine"]:
+            raise RuntimeError(
+                "sleep cannot be used with async_engine=True. Use sleep_async instead."
+            )
+
+        # Reset the prefix cache to ensure that prefix cache is not reused after weights are updated
+        self.llm.llm_engine.reset_prefix_cache()
+        self.llm.sleep(level=1)
+
+        gc.collect()
+        torch.cuda.empty_cache()
+
+    def wake_up(self, **kwargs):
+        """Wake up the vLLM engine."""
+        assert self.llm is not None, (
+            "Attempting to wake up with either an uninitialized vLLM or non-model-owner"
+        )
+
+        if self.cfg["vllm_cfg"]["async_engine"]:
+            raise RuntimeError(
+                "wake_up cannot be used with async_engine=True. Use wake_up_async instead."
+            )
+
+        tags = kwargs.get("tags")
+
+        wake_up_args = {}
+        if tags is not None:
+            wake_up_args["tags"] = tags
+
+        self.llm.wake_up(**wake_up_args)
+
+    def shutdown(self) -> bool:
+        """Clean up vLLM resources."""
+        try:
+            if self.llm is not None:
+                # Clean up extension resources (e.g., ZMQ sockets)
+                self.llm.collective_rpc("cleanup", args=tuple())
+
+                # Explicitly delete the engine. This may trigger its __del__ method.
+                del self.llm
+
+            self.llm = None
+            self.tokenizer = None
+
+            # Force garbage collection
+            gc.collect()
+            torch.cuda.empty_cache()
+
+            return True
+        except Exception as e:
+            print(f"Error during vLLM shutdown: {e}")
+            return False
diff --git a/nemo_rl/models/generation/vllm/vllm_worker_async.py b/nemo_rl/models/generation/vllm/vllm_worker_async.py
new file mode 100644
index 0000000000..d4e8161b44
--- /dev/null
+++ b/nemo_rl/models/generation/vllm/vllm_worker_async.py
@@ -0,0 +1,1003 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import asyncio
+import gc
+import threading
+import uuid
+from typing import Any, AsyncGenerator, Optional, cast
+
+import ray
+import torch
+import uvicorn
+from fastapi import FastAPI
+
+from nemo_rl.distributed.batched_data_dict import BatchedDataDict
+from nemo_rl.distributed.virtual_cluster import _get_free_port_local, _get_node_ip_local
+from nemo_rl.distributed.worker_group_utils import get_nsight_config_if_pattern_matches
+from nemo_rl.models.generation.interfaces import (
+    GenerationDatumSpec,
+    GenerationOutputSpec,
+    verify_right_padding,
+)
+from nemo_rl.models.generation.vllm.utils import format_prompt_for_vllm_generation
+from nemo_rl.models.generation.vllm.vllm_worker import BaseVllmGenerationWorker
+
+
+def _replace_prefix_tokens(
+    tokenizer,
+    model_prefix_token_ids: list[int],
+    template_prefix_token_ids: list[int],
+    template_token_ids: list[int],
+) -> list[int]:
+    """This is a subroutine used inside the vLLM Chat Completion server.
+
+    This function is for fixing up the chat template-tokenized messages history
+    to match the model output tokenization up to the last assistant turn,
+    in order to preserve the monotonic tokens property for optimized multi-turn
+    training.
+
+    Some environments (namely Penguin) require an OpenAI compatible server
+    endpoint rather than an inference engine handle. This is fine for the most
+    part, but it may cause issues when the environment is used as a part of
+    training.
+
+    RL training frameworks train models on token IDs, but the OpenAI compatible
+    server communicates in what is basically de-tokenized text. When multiple
+    model calls are made to the OpenAI compatible server in a single trajectory,
+    model generations in previous model calls may be re-tokenized to something
+    that is different than what was generated. This is not too big of an issue
+    (that we know of) at inference time, but the log probs the model produces
+    are different enough for the differently re-tokenized generation result that
+    it causes the training to be off policy. Off policy isn't necessarily a bad
+    thing in isolation, but this source of off-policyness may cause unexpected
+    issues if not properly accounted for. It also mis-aligns the token ID
+    sequences across model calls, which feels very strange during training.
+
+    There are real cases where the model output string _does not match_ the chat
+    template tokenization of the parsed model output. A concrete example is
+    inconsistent whitespace tokens around tool call special tokens.
+
+    TODO When NeMo RL supports training image generation models, we want to
+    revisit and possibly update this function. This issue occurs when the model
+    generates tokens that are de-tokenized into text or images, and then
+    re-tokenized into tokens. So if there is a situation like that with images
+    and image tokenization is non-unique, then we will need to uppdate this
+    function.
+
+    Example (turn-by-turn, concise; eos_token_id = 2):
+        Turn 1:
+            - prefill_T1 (template prefill) = [11,12,13,40,41]
+            - model output = [220,17,2]  # decodes to " 4" + EOS
+            - model_prefix_token_ids = prefill_T1 + model output
+              => [11,12,13,40,41,220,17,2]
+
+        Turn 2 (template retokenizes prior assistant text differently):
+            - template_prefix_token_ids = [11,12,13,40,41,1001,2]  # 1001 decodes to " 4"
+            - template_token_ids = [11,12,13,40,41,1001,2,21,22,40,41]
+
+        _replace_prefix_tokens keeps the exact prior model tokens up to EOS and
+        resumes from the template after that EOS:
+            output => [11,12,13,40,41,220,17,2,21,22,40,41]
+    """
+    if not model_prefix_token_ids:
+        return template_token_ids
+
+    eos_token_id = tokenizer.eos_token_id
+    assert eos_token_id is not None, "Your tokenizer must have an EOS token ID!"
+
+    model_cut_end = len(model_prefix_token_ids)
+    if model_prefix_token_ids:
+        # We are not always guaranteed that the model outputs an EOS token as the stop criteria of the previous model call e.g. when the model reaches max_tokens.
+        # And since chat templates will always add one for us, we just cut the model input to right before the EOS token ID (if applicable)
+        if model_prefix_token_ids[-1] == eos_token_id:
+            model_cut_end -= 1
+
+    # We take everything starting with the EOS token ID.
+    template_cut_start = -1
+    for pos in reversed(range(len(template_prefix_token_ids))):
+        if template_token_ids[pos] == eos_token_id:
+            template_cut_start = pos
+            break
+
+    # This should never be the case, but
+    assert template_cut_start >= 0, (
+        "No EOS token ID found in the chat-templated messages!"
+    )
+
+    return (
+        model_prefix_token_ids[:model_cut_end] + template_token_ids[template_cut_start:]
+    )
+
+
+@ray.remote(
+    runtime_env={**get_nsight_config_if_pattern_matches("vllm_async_generation_worker")}
+)  # pragma: no cover
+class VllmAsyncGenerationWorker(BaseVllmGenerationWorker):
+    def _create_engine(self, llm_kwargs: dict[str, Any]) -> None:
+        from vllm.config import CompilationConfig
+        from vllm.engine.arg_utils import AsyncEngineArgs
+        from vllm.v1.engine.async_llm import AsyncLLM
+
+        # (TODO: zhiyul) Remove this workaround after upgrading vLLM where the compilation_config passing issue is resolved.
+        if llm_kwargs.get("compilation_config", None):
+            llm_kwargs["compilation_config"] = CompilationConfig(
+                **llm_kwargs["compilation_config"]
+            )
+
+        self.llm_async_engine_args = AsyncEngineArgs(**llm_kwargs)
+        self.llm = AsyncLLM.from_engine_args(self.llm_async_engine_args)
+
+        self.server_thread, self.base_url, self.http_server = None, None, None
+        if self.cfg["vllm_cfg"].get("expose_http_server"):
+            self.server_thread, self.base_url, self.http_server = (
+                self._setup_vllm_server()
+            )
+
+    async def post_init_async(self):
+        self.vllm_device_ids = await self.report_device_id_async()
+
+    async def report_dp_openai_server_base_url(self) -> Optional[str]:
+        return self.base_url
+
+    def _setup_vllm_openai_api_server(self, app: FastAPI) -> FastAPI:
+        from copy import deepcopy
+        from logging import Filter as LoggingFilter
+        from logging import LogRecord
+        from typing import List, Optional, Union
+
+        from fastapi import Request
+        from fastapi.responses import JSONResponse, StreamingResponse
+        from vllm.entrypoints.openai.api_server import (
+            BaseModelPath,
+            OpenAIServingChat,
+            OpenAIServingModels,
+            OpenAIServingTokenization,
+        )
+        from vllm.entrypoints.openai.protocol import (
+            ChatCompletionRequest,
+            ChatCompletionResponse,
+            ErrorResponse,
+            TokenizeChatRequest,
+            TokenizeCompletionRequest,
+            TokenizeResponse,
+        )
+        from vllm.v1.engine.async_llm import logger as vllm_async_llm_logger
+
+        engine_client = self.llm
+        model_config = self.llm_async_engine_args.create_model_config()
+        base_model_paths = [
+            BaseModelPath(
+                name=model_config.served_model_name, model_path=model_config.model
+            ),
+            BaseModelPath(name=model_config.model, model_path=model_config.model),
+        ]
+
+        openai_serving_models = OpenAIServingModels(
+            engine_client=engine_client,
+            model_config=model_config,
+            base_model_paths=base_model_paths,
+            lora_modules=None,
+        )
+
+        class NeMoRLOpenAIChatRequestMixin:
+            def model_post_init(self, context):
+                # Penguin specific processing. This is just how Penguin returns the extra token information.
+                if self.required_prefix_token_ids is None:
+                    for message in reversed(self.messages):
+                        if "prompt_token_ids" in message:
+                            self.required_prefix_token_ids = (
+                                message["prompt_token_ids"]
+                                + message["generation_token_ids"]
+                            )
+                            break
+
+                return super().model_post_init(context)
+
+        class NeMoRLOpenAIServingMixin:
+            async def _preprocess_chat(
+                self,
+                request: NeMoRLOpenAIChatRequestMixin,
+                tokenizer,
+                messages,
+                chat_template,
+                chat_template_content_format,
+                add_generation_prompt=True,
+                continue_final_message=False,
+                tool_dicts=None,
+                documents=None,
+                chat_template_kwargs=None,
+                tool_parser=None,
+                add_special_tokens=False,
+            ):
+                # Materialize the message tool calls so we can deepcopy below.
+                for message in messages:
+                    if message.get("tool_calls"):
+                        message["tool_calls"] = list(message["tool_calls"])
+
+                # Deepcopy messages here since _preprocess_chat may be destructive.
+                messages_for_replace_prefix_tokens = deepcopy(messages)
+
+                # res is conversation, [request_prompt], [engine_prompt]
+                res = await super()._preprocess_chat(
+                    request,
+                    tokenizer,
+                    messages,
+                    chat_template,
+                    chat_template_content_format,
+                    add_generation_prompt,
+                    continue_final_message,
+                    tool_dicts,
+                    documents,
+                    chat_template_kwargs,
+                    tool_parser,
+                    add_special_tokens,
+                )
+
+                if request.required_prefix_token_ids is None:
+                    return res
+
+                # Find the last assistant message
+                last_assistant_message_idx = None
+                for i in reversed(range(len(messages_for_replace_prefix_tokens))):
+                    if messages_for_replace_prefix_tokens[i]["role"] == "assistant":
+                        last_assistant_message_idx = i
+                        break
+
+                if last_assistant_message_idx is None:
+                    # If there's no assistant message, we just use the entire thing.
+                    messages_to_last_assistant_message = (
+                        messages_for_replace_prefix_tokens
+                    )
+                else:
+                    # Include the last assistant message itself.
+                    messages_to_last_assistant_message = (
+                        messages_for_replace_prefix_tokens[
+                            : last_assistant_message_idx + 1
+                        ]
+                    )
+
+                # Call the actual preprocess chat subroutine so we don't miss anything. Whatever they do is whatever we do since we literally do what they do.
+                corresponding_res = await super()._preprocess_chat(
+                    request,
+                    tokenizer,
+                    messages_to_last_assistant_message,
+                    chat_template,
+                    chat_template_content_format,
+                    add_generation_prompt=False,
+                    continue_final_message=False,
+                    tool_dicts=tool_dicts,
+                    documents=documents,
+                    chat_template_kwargs=chat_template_kwargs,
+                    tool_parser=tool_parser,
+                    add_special_tokens=add_special_tokens,
+                )
+                actual_corresponding_token_ids = corresponding_res[2][0][
+                    "prompt_token_ids"
+                ]
+
+                engine_prompt = res[2][
+                    0
+                ]  # We need to modify engine_prompt.prompt_token_ids
+
+                final_prompt_token_ids = _replace_prefix_tokens(
+                    tokenizer=tokenizer,
+                    model_prefix_token_ids=request.required_prefix_token_ids,
+                    template_prefix_token_ids=actual_corresponding_token_ids,
+                    template_token_ids=engine_prompt["prompt_token_ids"],
+                )
+
+                engine_prompt["prompt_token_ids"] = final_prompt_token_ids
+
+                return res
+
+        ########################################
+        # /v1/chat/completions endpoint
+        ########################################
+
+        # This MRO is necessary i.e. NeMoRLOpenAIChatRequestMixin > ChatCompletionRequest
+        class NeMoRLChatCompletionRequest(
+            NeMoRLOpenAIChatRequestMixin, ChatCompletionRequest
+        ):
+            required_prefix_token_ids: Optional[List[int]] = None
+
+        # This MRO is necessary i.e. NeMoRLOpenAIServingMixin > OpenAIServingChat
+        class NeMoRLOpenAIServingChat(NeMoRLOpenAIServingMixin, OpenAIServingChat):
+            pass
+
+        serving_chat_default_kwargs = dict(
+            response_role="assistant",
+            request_logger=None,
+            chat_template=None,
+            chat_template_content_format="auto",
+        )
+        serving_chat_kwargs = serving_chat_default_kwargs | self.cfg["vllm_cfg"].get(
+            "http_server_serving_chat_kwargs", dict()
+        )
+        openai_serving_chat = NeMoRLOpenAIServingChat(
+            engine_client,
+            model_config,
+            openai_serving_models,
+            return_tokens_as_token_ids=True,
+            **serving_chat_kwargs,
+        )
+
+        generation_config = self.cfg
+
+        # The create_chat_completion and tokenize methods are taken from vllm/entrypoints/openai/api_server.py
+        @app.post("/v1/chat/completions")
+        async def create_chat_completion(
+            request: NeMoRLChatCompletionRequest, raw_request: Request
+        ):
+            # This needs to match the behavior in nemo_rl/models/generation/vllm/vllm_worker.py::BaseVllmGenerationWorker::_build_sampling_params
+            # Right now we explicitly assert set this to -1.
+            assert request.top_k in (None, -1), (
+                f"Top k sampling parameter must be unset, empty, or -1. Got `{request.top_k}`"
+            )
+            request.top_k = -1
+
+            # The request sampling params need to exactly match those as are set in NeMo RL.
+            # If they do not match, the inference will be off policy and destroy training stability.
+            assert request.temperature == generation_config["temperature"]
+            assert request.top_p == generation_config["top_p"]
+
+            generator = await openai_serving_chat.create_chat_completion(
+                request, raw_request
+            )
+
+            if isinstance(generator, ErrorResponse):
+                return JSONResponse(
+                    content=generator.model_dump(), status_code=generator.error.code
+                )
+
+            elif isinstance(generator, ChatCompletionResponse):
+                return JSONResponse(content=generator.model_dump())
+
+            return StreamingResponse(content=generator, media_type="text/event-stream")
+
+        ########################################
+        # /tokenize endpoint
+        ########################################
+
+        # This MRO is necessary i.e. NeMoRLOpenAIChatRequestMixin > TokenizeRequest
+        class NeMoRLTokenizeChatRequest(
+            NeMoRLOpenAIChatRequestMixin, TokenizeChatRequest
+        ):
+            required_prefix_token_ids: Optional[List[int]] = None
+
+        NeMoRLTokenizeRequest = Union[
+            TokenizeCompletionRequest, NeMoRLTokenizeChatRequest
+        ]
+
+        # This MRO is necessary i.e. NeMoRLOpenAIServingMixin > OpenAIServingTokenization
+        class NeMoRLOpenAIServingTokenization(
+            NeMoRLOpenAIServingMixin, OpenAIServingTokenization
+        ):
+            pass
+
+        openai_serving_tokenization = NeMoRLOpenAIServingTokenization(
+            engine_client,
+            model_config,
+            openai_serving_models,
+            request_logger=serving_chat_kwargs["request_logger"],
+            chat_template=serving_chat_kwargs["chat_template"],
+            chat_template_content_format=serving_chat_kwargs[
+                "chat_template_content_format"
+            ],
+        )
+
+        @app.post("/tokenize")
+        async def tokenize(request: NeMoRLTokenizeRequest, raw_request: Request):
+            generator = await openai_serving_tokenization.create_tokenize(
+                request, raw_request
+            )
+
+            if isinstance(generator, ErrorResponse):
+                return JSONResponse(
+                    content=generator.model_dump(), status_code=generator.error.code
+                )
+            elif isinstance(generator, TokenizeResponse):
+                return JSONResponse(content=generator.model_dump())
+
+        ########################################
+        # Logging
+        ########################################
+        print(
+            "Adding a vLLM logging filter so that the logs aren't spammed with `Added request ...` messages. This is to help errors pop up better and filter out noise."
+        )
+
+        class NoAddedRequestFilter(LoggingFilter):
+            def filter(self, record: LogRecord) -> bool:
+                msg = record.getMessage()
+                return "Added request" not in msg
+
+        vllm_async_llm_logger.addFilter(NoAddedRequestFilter())
+
+        return app
+
+    def _setup_vllm_server(self) -> "tuple[threading.Thread, str, uvicorn.Server]":
+        import threading
+        from logging import Filter as LoggingFilter
+        from logging import LogRecord, getLogger
+
+        import uvicorn
+        from fastapi import FastAPI
+
+        # We initialize the FastAPI app here in case we want to do some generic configuration before the subsequent server inits
+        # e.g. last-run middleware.
+        app = FastAPI()
+
+        app = self._setup_vllm_openai_api_server(app)
+
+        ########################################
+        # Server spinup
+        ########################################
+
+        node_ip = _get_node_ip_local()
+        free_port = _get_free_port_local()
+
+        base_url = f"http://{node_ip}:{free_port}/v1"
+        print(f"Starting server on {base_url}")
+
+        config = uvicorn.Config(
+            app,
+            host="0.0.0.0",
+            port=free_port,
+        )
+        server = uvicorn.Server(config=config)
+
+        print(
+            "Adding a uvicorn logging filter so that the logs aren't spammed with 200 OK messages. This is to help errors pop up better and filter out noise."
+        )
+
+        class No200Filter(LoggingFilter):
+            def filter(self, record: LogRecord) -> bool:
+                msg = record.getMessage()
+                return not msg.strip().endswith("200")
+
+        uvicorn_logger = getLogger("uvicorn.access")
+        uvicorn_logger.addFilter(No200Filter())
+
+        thread = threading.Thread(target=server.run, daemon=True)
+        thread.start()
+
+        return thread, base_url, server
+
+    async def init_collective_async(
+        self,
+        rank_prefix: int,
+        ip: str,
+        port: int,
+        world_size: int,
+        train_world_size: int,
+    ) -> None:
+        await self.llm.collective_rpc(
+            "init_collective",
+            args=(
+                rank_prefix,
+                ip,
+                port,
+                world_size,
+                train_world_size,
+            ),
+        )
+
+    async def generate_async(
+        self,
+        data: BatchedDataDict[GenerationDatumSpec],
+        greedy: bool = False,
+    ) -> AsyncGenerator[tuple[int, BatchedDataDict[GenerationOutputSpec]], None]:
+        """Generate a batch of data using vLLM's AsyncLLMEngine, yielding results as they are ready.
+
+        Args:
+            data: BatchedDataDict with input_ids and input_lengths
+            greedy: Whether to use greedy decoding instead of sampling
+
+        Yields:
+            Tuple of (original_index, BatchedDataDict conforming to GenerationOutputSpec for the single sequence)
+        """
+        if not self.cfg["vllm_cfg"]["async_engine"]:
+            raise RuntimeError(
+                "generate_async can only be used when async_engine is enabled in vLLM config."
+            )
+
+        # Handle empty input case
+        if len(data["input_ids"]) == 0:
+            return
+
+        verify_right_padding(data, pad_value=self.cfg["_pad_token_id"])
+
+        input_ids_batch = data["input_ids"]
+        input_lengths_batch = data["input_lengths"]
+        batch_size = input_ids_batch.shape[0]
+
+        # Ensure generate_async only receives single samples (batch_size = 1)
+        assert batch_size == 1, (
+            f"generate_async is restricted to handle only single samples, "
+            f"but received batch_size={batch_size}. Please handle batching outside this method."
+        )
+
+        batch_specific_stop_strings_list = data.get(
+            "stop_strings", [[] for _ in range(batch_size)]
+        )
+
+        # Create tasks for each sample in the batch
+        async def process_single_sample(sample_idx):
+            """Process a single sample and return the result."""
+            current_input_actual_length = input_lengths_batch[sample_idx].item()
+            prompt = format_prompt_for_vllm_generation(data, sample_idx)
+
+            per_sample_stop_strings = None
+            if batch_specific_stop_strings_list and sample_idx < len(
+                batch_specific_stop_strings_list
+            ):
+                per_sample_stop_strings = batch_specific_stop_strings_list[sample_idx]
+
+            final_stop_strings_for_sample = self._merge_stop_strings(
+                [per_sample_stop_strings] if per_sample_stop_strings else None
+            )
+
+            remaining_ctx = (
+                self.cfg["vllm_cfg"]["max_model_len"] - current_input_actual_length
+            )
+            allowed_new_tokens = max(0, min(self.cfg["max_new_tokens"], remaining_ctx))
+
+            # Handle case where no tokens can be generated due to length constraints
+            if allowed_new_tokens == 0:
+                # Access the input data directly from the function parameters
+                input_ids_single_row = input_ids_batch[sample_idx]
+
+                # Create output tensors with just the input (no generated tokens)
+                output_ids_single_item_batched = input_ids_single_row[
+                    :current_input_actual_length
+                ].unsqueeze(0)
+
+                logprobs_single_item = torch.zeros(
+                    (1, current_input_actual_length),
+                    dtype=torch.float32,
+                    device=input_ids_single_row.device,
+                )
+
+                generation_lengths_tensor = torch.tensor(
+                    [0], dtype=torch.long, device=input_ids_single_row.device
+                )
+
+                unpadded_sequence_lengths_tensor = torch.tensor(
+                    [current_input_actual_length],
+                    dtype=torch.long,
+                    device=input_ids_single_row.device,
+                )
+
+                result_batch = BatchedDataDict[GenerationOutputSpec](
+                    {
+                        "output_ids": output_ids_single_item_batched,
+                        "logprobs": logprobs_single_item,
+                        "generation_lengths": generation_lengths_tensor,
+                        "unpadded_sequence_lengths": unpadded_sequence_lengths_tensor,
+                    }
+                )
+
+                return (sample_idx, result_batch)
+
+            sampling_params_for_request = self._build_sampling_params(
+                greedy=greedy,
+                stop_strings=final_stop_strings_for_sample,
+                max_new_tokens=allowed_new_tokens,
+            )
+
+            request_id = str(uuid.uuid4())
+
+            # Generate using vLLM async engine
+            vllm_request_generator = self.llm.generate(
+                prompt=prompt,
+                sampling_params=sampling_params_for_request,
+                request_id=request_id,
+            )
+
+            # Get the final result from the generator
+            final_request_output = None
+            async for req_output in vllm_request_generator:
+                final_request_output = req_output
+
+            if final_request_output is None:
+                raise RuntimeError(f"No output received for request {request_id}")
+
+            # Process the output
+            generation_details = final_request_output.outputs[0]
+            generated_token_ids = list(generation_details.token_ids)
+            num_generated_tokens = len(generated_token_ids)
+
+            original_input_ids_single_row = input_ids_batch[sample_idx]
+            final_output_tensor_len = current_input_actual_length + num_generated_tokens
+
+            # Create output_ids tensor for this single item
+            output_ids_single_item = torch.full(
+                (final_output_tensor_len,),
+                self.cfg["_pad_token_id"],
+                dtype=original_input_ids_single_row.dtype,
+                device=original_input_ids_single_row.device,
+            )
+            # Copy original input (up to its actual length)
+            output_ids_single_item[:current_input_actual_length] = (
+                original_input_ids_single_row[:current_input_actual_length]
+            )
+            # Add generated tokens after the actual input
+            output_ids_single_item[
+                current_input_actual_length : current_input_actual_length
+                + num_generated_tokens
+            ] = torch.tensor(
+                generated_token_ids,
+                dtype=original_input_ids_single_row.dtype,
+                device=original_input_ids_single_row.device,
+            )
+
+            # Reshape to (1, seq_len) for BatchedDataDict
+            output_ids_single_item_batched = output_ids_single_item.unsqueeze(0)
+
+            # Create logprobs tensor for this single item
+            logprobs_single_item = torch.zeros(
+                (1, final_output_tensor_len),
+                dtype=torch.float32,
+                device=original_input_ids_single_row.device,
+            )
+            if hasattr(generation_details, "logprobs") and generation_details.logprobs:
+                for idx, logprob_dict_per_token in enumerate(
+                    generation_details.logprobs
+                ):
+                    if logprob_dict_per_token and idx < len(generated_token_ids):
+                        token_id_at_idx = generated_token_ids[idx]
+                        if token_id_at_idx in logprob_dict_per_token:
+                            logprob_value = logprob_dict_per_token[
+                                token_id_at_idx
+                            ].logprob
+                            position_in_output_tensor = (
+                                current_input_actual_length + idx
+                            )
+                            if position_in_output_tensor < final_output_tensor_len:
+                                logprobs_single_item[0, position_in_output_tensor] = (
+                                    logprob_value
+                                )
+
+            # Generation lengths
+            generation_lengths_tensor = torch.tensor(
+                [num_generated_tokens],
+                dtype=torch.long,
+                device=original_input_ids_single_row.device,
+            )
+
+            # Unpadded sequence lengths (actual_input + actual_generated)
+            unpadded_total_length = current_input_actual_length + num_generated_tokens
+            unpadded_sequence_lengths_tensor = torch.tensor(
+                [unpadded_total_length],
+                dtype=torch.long,
+                device=original_input_ids_single_row.device,
+            )
+
+            result_batch = BatchedDataDict[GenerationOutputSpec](
+                {
+                    "output_ids": output_ids_single_item_batched,
+                    "logprobs": logprobs_single_item,
+                    "generation_lengths": generation_lengths_tensor,
+                    "unpadded_sequence_lengths": unpadded_sequence_lengths_tensor,
+                }
+            )
+
+            return (sample_idx, result_batch)
+
+        # Create tasks for all samples and yield results as they complete
+        sample_tasks = [
+            asyncio.create_task(process_single_sample(i)) for i in range(batch_size)
+        ]
+
+        # Yield results as they become available
+        for completed_task in asyncio.as_completed(sample_tasks):
+            try:
+                result = await completed_task
+                yield result
+            except Exception as e:
+                # Cancel remaining tasks
+                for task in sample_tasks:
+                    if not task.done():
+                        task.cancel()
+                await asyncio.gather(*sample_tasks, return_exceptions=True)
+                raise e
+
+    async def generate_text_async(
+        self, data: BatchedDataDict[GenerationDatumSpec], greedy: bool = False
+    ) -> AsyncGenerator[tuple[int, BatchedDataDict[GenerationOutputSpec]], None]:
+        """Generate text responses asynchronously, yielding results as they are ready.
+
+        Args:
+            data: BatchedDataDict containing prompts with text strings
+            greedy: Whether to use greedy decoding instead of sampling
+
+        Yields:
+            Tuple of (original_index, BatchedDataDict containing single text response)
+        """
+        if not self.cfg["vllm_cfg"]["async_engine"]:
+            raise RuntimeError(
+                "generate_text_async can only be used when async_engine is enabled in vLLM config."
+            )
+
+        # Handle empty input case
+        if len(data["prompts"]) == 0:
+            return
+
+        prompts = data["prompts"]
+        batch_size = len(prompts)
+
+        # Extract stop_strings if provided, else use default from config
+        batch_stop_strings: list[list[str] | None] = data.get(
+            "stop_strings", [self.cfg.get("stop_strings")] * batch_size
+        )
+
+        # Create tasks for each prompt
+        async def process_single_prompt(prompt_idx):
+            """Process a single prompt and return the result."""
+            prompt = prompts[prompt_idx]
+
+            # Get stop strings for this specific prompt
+            per_prompt_stop_strings = None
+            if batch_stop_strings and prompt_idx < len(batch_stop_strings):
+                per_prompt_stop_strings = batch_stop_strings[prompt_idx]
+
+            # Merge stop strings
+            final_stop_strings = self._merge_stop_strings(
+                [per_prompt_stop_strings] if per_prompt_stop_strings else None
+            )
+
+            # Create sampling parameters
+            top_k = self.cfg["top_k"] if self.cfg["top_k"] is not None else -1
+            sampling_params = self.SamplingParams(
+                temperature=self.cfg["temperature"] if not greedy else 0,
+                top_p=self.cfg["top_p"],
+                top_k=top_k if not greedy else 1,
+                max_tokens=self.cfg["max_new_tokens"],
+                stop_token_ids=self.cfg["stop_token_ids"],
+                stop=final_stop_strings,
+                include_stop_str_in_output=True,  # returning stop strings like hf
+            )
+
+            request_id = str(uuid.uuid4())
+
+            # Generate using vLLM async engine
+            vllm_request_generator = self.llm.generate(
+                prompt=prompt,
+                sampling_params=sampling_params,
+                request_id=request_id,
+            )
+
+            # Get the final result from the generator
+            final_request_output = None
+            async for req_output in vllm_request_generator:
+                final_request_output = req_output
+
+            if final_request_output is None:
+                raise RuntimeError(f"No output received for request {request_id}")
+
+            # Extract the generated text
+            generated_text = final_request_output.outputs[0].text
+
+            # Create result in BatchedDataDict format
+            result_batch = BatchedDataDict[GenerationOutputSpec](
+                {"texts": [generated_text]}
+            )
+
+            return (prompt_idx, result_batch)
+
+        # Create tasks for all prompts and yield results as they complete
+        prompt_tasks = [
+            asyncio.create_task(process_single_prompt(i)) for i in range(batch_size)
+        ]
+
+        # Yield results as they become available
+        for completed_task in asyncio.as_completed(prompt_tasks):
+            try:
+                result = await completed_task
+                yield result
+            except Exception as e:
+                # Cancel remaining tasks
+                for task in prompt_tasks:
+                    if not task.done():
+                        task.cancel()
+                await asyncio.gather(*prompt_tasks, return_exceptions=True)
+                raise e
+
+    async def report_device_id_async(self) -> list[str]:
+        """Async version of report_device_id."""
+        assert self.llm is not None, (
+            "Attempting to report device id with either an uninitialized vLLM or non-model-owner"
+        )
+
+        if not self.cfg["vllm_cfg"]["async_engine"]:
+            raise RuntimeError(
+                "report_device_id_async can only be used with async_engine=True. Use report_device_id instead."
+            )
+
+        result_or_coro = await self.llm.collective_rpc("report_device_id", args=tuple())
+
+        if asyncio.iscoroutine(result_or_coro):
+            list_of_worker_results = await result_or_coro
+        else:
+            list_of_worker_results = result_or_coro
+
+        return cast(list[str], list_of_worker_results)
+
+    async def prepare_refit_info_async(self, state_dict_info: dict[str, Any]) -> None:
+        """Async version of prepare_refit_info."""
+        await self.llm.collective_rpc("prepare_refit_info", args=(state_dict_info,))
+
+    async def update_weights_via_ipc_zmq_async(
+        self,
+    ) -> bool:
+        """Async version of update_weights_via_ipc_zmq."""
+        try:
+            assert self.llm is not None, (
+                "Attempting to update weights with either an uninitialized vLLM or non-model-owner"
+            )
+
+            if not self.cfg["vllm_cfg"]["async_engine"]:
+                raise RuntimeError(
+                    "update_weights_via_ipc_zmq_async can only be used with async_engine=True. Use update_weights_via_ipc_zmq instead."
+                )
+
+            # TODO: switch to update_weights_from_local_ipc_handles for better performance once collectively report_device_id is supported in asyncLLM initialization
+            result_or_coro = await self.llm.collective_rpc(
+                "update_weights_via_ipc_zmq", args=tuple()
+            )
+
+            if asyncio.iscoroutine(result_or_coro):
+                worker_results = await result_or_coro
+            else:
+                worker_results = result_or_coro
+
+            worker_result = worker_results[0]
+
+            if not worker_result:
+                print(
+                    f"Error: Worker failed to update weights. Result: {worker_result}"
+                )
+                return False
+            return True
+        except Exception as e:
+            print(f"Exception during collective_rpc for weight update: {e}")
+            import traceback
+
+            traceback.print_exc()
+            return False
+
+    async def update_weights_from_collective_async(self) -> bool:
+        """Async version of update_weights_from_collective."""
+        try:
+            assert self.llm is not None, (
+                "Attempting to update weights with either an uninitialized vLLM or non-model-owner"
+            )
+
+            if not self.cfg["vllm_cfg"]["async_engine"]:
+                raise RuntimeError(
+                    "update_weights_from_collective_async can only be used with async_engine=True. Use update_weights_from_collective instead."
+                )
+
+            result_or_coro = await self.llm.collective_rpc(
+                "update_weights_from_collective", args=tuple()
+            )
+
+            if asyncio.iscoroutine(result_or_coro):
+                worker_results = await result_or_coro
+            else:
+                worker_results = result_or_coro
+
+            worker_result = worker_results[0]
+
+            if not worker_result:
+                print(
+                    f"Error: Worker failed to update weights. Result: {worker_result}"
+                )
+                return False
+            return True
+        except Exception as e:
+            print(f"Exception during collective_rpc for weight update: {e}")
+            import traceback
+
+            traceback.print_exc()
+            return False
+
+    async def reset_prefix_cache_async(self):
+        """Async version of reset_prefix_cache."""
+        assert self.llm is not None, (
+            "Attempting to reset prefix cache with either an uninitialized vLLM or non-model-owner"
+        )
+
+        if not self.cfg["vllm_cfg"]["async_engine"]:
+            raise RuntimeError(
+                "reset_prefix_cache_async can only be used with async_engine=True. Use reset_prefix_cache instead."
+            )
+
+        await self.llm.reset_prefix_cache()
+        gc.collect()
+        torch.cuda.empty_cache()
+
+    async def sleep_async(self):
+        """Async version of sleep."""
+        assert self.llm is not None, (
+            "Attempting to sleep with either an uninitialized vLLM or non-model-owner"
+        )
+
+        if not self.cfg["vllm_cfg"]["async_engine"]:
+            raise RuntimeError(
+                "sleep_async can only be used with async_engine=True. Use sleep instead."
+            )
+
+        # Reset the prefix cache to ensure that prefix cache is not reused after weights are updated
+        await self.llm.reset_prefix_cache()
+        await self.llm.sleep(level=1)
+
+        gc.collect()
+        torch.cuda.empty_cache()
+
+    async def wake_up_async(self, **kwargs):
+        """Async version of wake_up."""
+        assert self.llm is not None, (
+            "Attempting to wake up with either an uninitialized vLLM or non-model-owner"
+        )
+
+        if not self.cfg["vllm_cfg"]["async_engine"]:
+            raise RuntimeError(
+                "wake_up_async can only be used with async_engine=True. Use wake_up instead."
+            )
+
+        tags = kwargs.get("tags")
+
+        wake_up_args = {}
+        if tags is not None:
+            wake_up_args["tags"] = tags
+
+        await self.llm.wake_up(**wake_up_args)
+
+    async def shutdown(self) -> bool:
+        """Clean up vLLM resources."""
+        try:
+            if self.llm is not None:
+                # Clean up extension resources (e.g., ZMQ sockets)
+                await self.llm.collective_rpc("cleanup", args=tuple())
+                try:
+                    self.llm.shutdown()
+                except Exception as e_stop:
+                    print(f"Error calling shutdown_background_loop: {e_stop}")
+
+                # Explicitly delete the engine. This may trigger its __del__ method.
+                del self.llm
+
+            self.llm = None
+            self.tokenizer = None
+
+            # Force garbage collection
+            gc.collect()
+            torch.cuda.empty_cache()
+
+            if self.server_thread is not None:
+                from threading import Thread
+
+                from uvicorn import Server
+
+                self.http_server: Server
+                self.server_thread: Thread
+
+                self.http_server.should_exit = True
+                self.server_thread.join()
+
+            return True
+        except Exception as e:
+            print(f"Error during vLLM shutdown: {e}")
+            return False
diff --git a/nemo_rl/models/generation/vllm_backend.py b/nemo_rl/models/generation/vllm_backend.py
deleted file mode 100644
index af7e69d046..0000000000
--- a/nemo_rl/models/generation/vllm_backend.py
+++ /dev/null
@@ -1,214 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os
-from typing import Any, Iterable, Optional
-
-import torch
-
-try:
-    import vllm  # noqa: F401
-except ImportError:
-    raise ImportError(
-        "vLLM is not installed. Please check that the py_executable in the runtime_env of VllmGenerationWorker "
-        "covers the vllm dependency. You may have to update nemo_rl/distributed/ray_actor_environment_registry.py. "
-        "If you are working interactively, you can install by running  `uv sync --extra vllm` anywhere in the repo."
-    )
-
-
-def _patch_gemma3_mm():
-    """Patch gemma3_mm.py to support new HF multimodal format (post transformers v4.52).
-
-    Patch taken from:https://github.com/vllm-project/vllm/pull/19151/files#diff-5890909300e4e6c3160444e4587ec3fd80498bb83f598b22ce81337f75992b06
-    """
-    from packaging.version import Version as PkgVersion
-
-    assert PkgVersion(vllm.__version__) < PkgVersion("0.9.2"), (
-        f"You are using vllm version {vllm.__version__}. "
-        "Please remove this patch (_patch_gemma3_mm in nemo_rl/models/generation/vllm_backend.py) "
-        "since it is included in vllm>=0.9.2."
-    )
-
-    from vllm.logger import init_logger
-    from vllm.model_executor.models import gemma3_mm
-    from vllm.model_executor.models.utils import (
-        AutoWeightsLoader,
-        WeightsMapper,
-    )
-
-    logger = init_logger("gemma3_mm_patch")
-
-    gemma3_mm.Gemma3ForConditionalGeneration.hf_to_vllm_mapper = WeightsMapper(
-        orig_to_new_prefix={
-            # mapping for new names in checkpoint saved after transformers v4.52
-            "model.language_model.": "language_model.model.",
-            "model.vision_tower.": "vision_tower.",
-            "model.multi_modal_projector.": "multi_modal_projector.",
-            "lm_head.": "language_model.lm_head.",
-        }
-    )
-
-    def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
-        loader = AutoWeightsLoader(self)
-        return loader.load_weights(weights, mapper=self.hf_to_vllm_mapper)
-
-    gemma3_mm.Gemma3ForConditionalGeneration.load_weights = load_weights
-    logger.info("Successfully patched gemma3_mm.py in vllm_backend.")
-
-
-_patch_gemma3_mm()
-
-
-class VllmInternalWorkerExtension:
-    def init_collective(
-        self, rank_prefix: int, ip: str, port: int, world_size: int
-    ) -> None:
-        """Initialize the collective communication."""
-        from vllm.distributed.device_communicators.pynccl import PyNcclCommunicator
-        from vllm.distributed.utils import StatelessProcessGroup
-
-        local_rank = torch.distributed.get_rank()
-        rank = rank_prefix + local_rank + 1  # 1 is the head node of the train cluster
-
-        # Temporary fix for vllm==0.9.0 which overrides the NCCL_CUMEM_ENABLE to 0 and causes
-        # https://github.com/NVIDIA-NeMo/RL/issues/564. This can be removed after it is upgraded to vllm>=0.9.1rc1.
-        os.environ["NCCL_CUMEM_ENABLE"] = "1"
-
-        pg = StatelessProcessGroup.create(
-            host=ip, port=port, rank=rank, world_size=world_size
-        )
-        self.model_update_group = PyNcclCommunicator(  # pyrefly: ignore[implicitly-defined-attribute]  This class does not define __init__ so assignments like this should be ignored
-            pg, device=self.device
-        )
-
-    def report_device_id(self) -> str:
-        from nemo_rl.utils.nvml import get_device_uuid
-
-        return get_device_uuid(self.device.index)
-
-    def prepare_refit_info(
-        self, state_dict_info: Optional[dict[str, Any]] = None
-    ) -> None:
-        """Prepare the info for refit.
-
-        DtensorPolicyWorker:
-            colocated inference: state_dict_info is None
-            non-colocated inference: state_dict_info is a dict of {tensor_name: (shape, dtype)}
-
-        MegatronPolicyWorker:
-            colocated inference: state_dict_info is a dict of {tensor_name: (shape, dtype, numel)}
-            non-colocated inference: not implemented yet
-        """
-        self.state_dict_info = state_dict_info  # pyrefly: ignore[implicitly-defined-attribute]  This class does not define __init__ so assignments like this should be ignored
-
-    def update_weights_from_global_ipc_handles(self, global_device_ipc_handles):
-        """Update weights from global IPC handles.
-
-        Args:
-            global_device_ipc_handles (dict): Dictionary mapping device UUIDs to parameter IPC handles.
-
-        Returns:
-            bool: True if weights were successfully updated.
-        """
-        device_uuid = self.report_device_id()
-        local_device_ipc_handles = global_device_ipc_handles[device_uuid]
-        return self.update_weights_from_local_ipc_handles(local_device_ipc_handles)
-
-    def update_weights_from_local_ipc_handles(self, local_device_ipc_handles):
-        """Update weights from local IPC handles.
-
-        Args:
-            local_device_ipc_handles (dict): parameter IPC handles for local device.
-
-        Returns:
-            bool: True if weights were successfully updated.
-        """
-        try:
-            is_tensor_packed = local_device_ipc_handles[0]
-            if is_tensor_packed:
-                _, all_handles, tensor_metadata = local_device_ipc_handles
-            else:
-                _, name_and_handle_list = local_device_ipc_handles
-
-            device_id = self.device.index
-            weights = []
-
-            if is_tensor_packed:
-                assert self.state_dict_info is not None, (
-                    "state_dict_info is not prepared. "
-                    "Please call prepare_refit_info when initializing the worker."
-                )
-
-                # Extract packed tensor from IPC handle
-                dtype_to_packed_tensor = {}
-                for dtype, tensor_handle in all_handles:
-                    func, args = tensor_handle
-                    list_args = list(args)
-                    list_args[6] = device_id
-                    tensor = func(*list_args)
-                    dtype_to_packed_tensor[dtype] = tensor
-
-                # Unpack tensor to weights. Here we only return a view of the tensor to avoid
-                # using extra memory.
-                for key, metadata in tensor_metadata.items():
-                    # dtype for the 1st and 2nd steps may be different (e.g. e_score_correction_bias)
-                    if isinstance(metadata, tuple):
-                        # use dtype of current step
-                        offset, dtype = metadata
-                        shape, _, size = self.state_dict_info[key]
-                        # update record
-                        self.state_dict_info[key] = (shape, dtype, size)
-                    else:
-                        offset = metadata
-                        shape, dtype, size = self.state_dict_info[key]
-                    tensor = dtype_to_packed_tensor[dtype][offset : offset + size].view(
-                        *shape
-                    )
-                    weights.append((key, tensor))
-            else:
-                # Process each handle to get the tensor
-                for name, handle in name_and_handle_list:
-                    func, args = handle
-                    list_args = list(args)
-                    list_args[6] = device_id
-                    tensor = func(*list_args)
-                    weights.append((name, tensor))
-
-            # Load weights into the model
-            self.model_runner.model.load_weights(weights=weights)
-            return True
-        except Exception as e:
-            print(
-                f"Error in VllmInternalWorkerExtension.update_weights_from_ipc_handles: {e}"
-            )
-            return False
-
-    def update_weights_from_collective(self) -> bool:
-        """Update the model weights from collective communication."""
-        assert self.state_dict_info is not None, (
-            "state_dict_info is not prepared. "
-            "Please call prepare_refit_info when initializing the worker."
-        )
-
-        try:
-            for name, (shape, dtype) in self.state_dict_info.items():
-                weight = torch.empty(shape, dtype=dtype, device="cuda")
-                self.model_update_group.broadcast(weight, src=0)
-                self.model_runner.model.load_weights(weights=[(name, weight)])
-        except Exception as e:
-            print(
-                f"Error in VllmInternalWorkerExtension.update_weights_from_collective: {e}"
-            )
-            return False
-
-        return True
diff --git a/nemo_rl/models/huggingface/common.py b/nemo_rl/models/huggingface/common.py
index c057f6d89a..ad26e36327 100644
--- a/nemo_rl/models/huggingface/common.py
+++ b/nemo_rl/models/huggingface/common.py
@@ -39,22 +39,16 @@ class ModelFlag(Enum):
     configuration in different parts of the NeMo RL codebase.
 
     Flags:
-        SKIP_DTENSOR_TIED_WEIGHTS_CHECK: Models that should skip the tied weights check
-                                 for the DTensor Policy even without setting the
-                                 NRL_SKIP_TIED_WEIGHT_CHECK flag.
         VLLM_LOAD_FORMAT_AUTO: Models that should use the "auto" load format when initializing
                                VLLM.
 
     Each flag has a `matches` method that determines if the flag applies to a given model_name.
     """
 
-    SKIP_DTENSOR_TIED_WEIGHTS_CHECK = auto()
     VLLM_LOAD_FORMAT_AUTO = auto()
 
     def matches(self, model_name: str) -> bool:
         match self:
-            case ModelFlag.SKIP_DTENSOR_TIED_WEIGHTS_CHECK:
-                return is_gemma_model(model_name)
             case ModelFlag.VLLM_LOAD_FORMAT_AUTO:
                 return is_gemma_model(model_name)
             case _:
diff --git a/nemo_rl/models/megatron/common.py b/nemo_rl/models/megatron/common.py
index bc0d499f08..e56855b410 100644
--- a/nemo_rl/models/megatron/common.py
+++ b/nemo_rl/models/megatron/common.py
@@ -16,6 +16,7 @@
 
 import torch
 import torch.distributed as dist
+from megatron.bridge.training.state import GlobalState
 from megatron.core.models.gpt import GPTModel
 from megatron.core.packed_seq_params import PackedSeqParams
 from megatron.core.parallel_state import (
@@ -26,7 +27,6 @@
     get_tensor_model_parallel_rank,
 )
 from megatron.training.utils import get_ltor_masks_and_position_ids
-from nemo.tron.state import GlobalState
 
 from nemo_rl.algorithms.loss_functions import LossFunction, SequencePackingLossWrapper
 from nemo_rl.distributed.batched_data_dict import BatchedDataDict
@@ -260,6 +260,7 @@ def forward_step_arbitrary_loss(
     pad_individual_seqs_to_multiple_of: int = 1,
     pad_full_seq_to: Optional[int] = None,
     cp_normalize: bool = True,
+    policy_cfg: Optional[dict] = None,
 ):
     """Forward training step with support for packed sequences and context parallelism.
 
@@ -273,6 +274,7 @@ def forward_step_arbitrary_loss(
         pack_sequences (bool): Whether to pack sequences for efficiency
         seq_length_key (Optional[str]): Key in data_dict containing actual sequence lengths
         cp_normalize (bool): Whether to normalize the loss by the cp_size
+        policy_cfg (Optional[dict]): Policy configuration containing generation parameters
 
     Notes on packed sequences with context parallelism (CP):
         - When CP > 1, each sequence is padded to a multiple of (cp_size * 2)
@@ -331,17 +333,44 @@ def forward_step_arbitrary_loss(
         else:
             input_ids_cp_sharded = input_ids
             attention_mask, _, position_ids = get_ltor_masks_and_position_ids(
-                input_ids, 0, False, False, False
+                data=input_ids,
+                eod_token=0,  # used for loss_mask, which we don't use
+                pad_token=0,  # used for loss_mask, which we don't use
+                reset_position_ids=False,
+                reset_attention_mask=False,
+                eod_mask_loss=False,
+                pad_mask_loss=False,
             )
 
+    multimodal_data = data_dict.get_multimodal_dict(
+        as_tensors=True, device=input_ids_cp_sharded.device
+    )
+    if len(multimodal_data) > 0:
+        position_ids = None
+
+    additional_kwargs = {}
+    # Mamba models currently do not support packed_seq_params
+    if packed_seq_params is not None:
+        additional_kwargs["packed_seq_params"] = packed_seq_params
+
     with straggler_timer:
         output_tensor = model(
-            input_ids_cp_sharded,
-            position_ids,
-            attention_mask,
-            packed_seq_params=packed_seq_params,
+            input_ids=input_ids_cp_sharded,
+            position_ids=position_ids,
+            attention_mask=attention_mask,
+            **additional_kwargs,
+            **multimodal_data,
         )
 
+        # Apply temperature scaling to logits for training
+        # This matches the dtensor worker's _apply_temperature_scaling in the train method
+        if (
+            policy_cfg is not None
+            and "generation" in policy_cfg
+            and policy_cfg["generation"] is not None
+        ):
+            output_tensor.div_(policy_cfg["generation"]["temperature"])
+
         # Unpack the output tensor if we did packed sequences
         if pack_sequences and packed_seq_params is not None:
             # remove padding
diff --git a/nemo_rl/models/megatron/community_import.py b/nemo_rl/models/megatron/community_import.py
index fdaf2f3690..271cda579c 100644
--- a/nemo_rl/models/megatron/community_import.py
+++ b/nemo_rl/models/megatron/community_import.py
@@ -13,50 +13,87 @@
 # limitations under the License.
 
 import os
+from typing import Any, Optional
 
-from transformers import AutoConfig
+from megatron.bridge import AutoBridge
 
+from nemo_rl.models.policy import MegatronConfig
 
-def import_model_from_hf_name(hf_model_name: str, output_path: str):
-    hf_config = AutoConfig.from_pretrained(hf_model_name, trust_remote_code=True)
-    if hf_config.model_type == "llama":
-        from nemo.tron.converter.llama import HFLlamaImporter
 
-        print(f"Importing model {hf_model_name} to {output_path}...")
-        importer = HFLlamaImporter(
-            hf_model_name,
-            output_path=output_path,
-        )
-    elif hf_config.model_type == "qwen2":
-        from nemo.tron.converter.qwen import HFQwen2Importer
+def import_model_from_hf_name(
+    hf_model_name: str,
+    output_path: str,
+    megatron_config: Optional[MegatronConfig] = None,
+    **config_overrides: Any,
+):
+    """Import a Hugging Face model into Megatron checkpoint format and save the Megatron checkpoint to the output path.
+
+    Args:
+        hf_model_name: Hugging Face model ID or local path (e.g., 'meta-llama/Llama-3.1-8B-Instruct').
+        output_path: Directory to write the Megatron checkpoint (e.g., /tmp/megatron_ckpt).
+        megatron_config: Optional megatron config with paralellism settings for distributed megatron model import.
+    """
+    bridge = AutoBridge.from_hf_pretrained(
+        hf_model_name, trust_remote_code=True, **config_overrides
+    )
 
-        print(f"Importing model {hf_model_name} to {output_path}...")
-        importer = HFQwen2Importer(
-            hf_model_name,
-            output_path=output_path,
-        )
-    elif hf_config.model_type in ("qwen3", "qwen3_moe"):
-        from nemo.tron.converter.qwen import HFQwen3Importer
+    model_provider = bridge.to_megatron_provider(load_weights=True)
 
-        print(f"Importing model {hf_model_name} to {output_path}...")
-        importer = HFQwen3Importer(
-            hf_model_name,
-            output_path=output_path,
-        )
-    elif hf_config.model_type in ("deepseek_v2", "deepseek_v3"):
-        from nemo.tron.converter.deepseek import HFDeepSeekImporter
+    # Keep track of defaults so can restore them to the config after loading the model
+    orig_tensor_model_parallel_size = model_provider.tensor_model_parallel_size
+    orig_pipeline_model_parallel_size = model_provider.pipeline_model_parallel_size
+    orig_context_parallel_size = model_provider.context_parallel_size
+    orig_expert_model_parallel_size = model_provider.expert_model_parallel_size
+    orig_expert_tensor_parallel_size = model_provider.expert_tensor_parallel_size
+    orig_num_layers_in_first_pipeline_stage = (
+        model_provider.num_layers_in_first_pipeline_stage
+    )
+    orig_num_layers_in_last_pipeline_stage = (
+        model_provider.num_layers_in_last_pipeline_stage
+    )
+    orig_pipeline_dtype = model_provider.pipeline_dtype
+
+    if megatron_config is not None:
+        model_provider.tensor_model_parallel_size = megatron_config[
+            "tensor_model_parallel_size"
+        ]
+        model_provider.pipeline_model_parallel_size = megatron_config[
+            "pipeline_model_parallel_size"
+        ]
+        model_provider.context_parallel_size = megatron_config["context_parallel_size"]
+        model_provider.expert_model_parallel_size = megatron_config[
+            "expert_model_parallel_size"
+        ]
+        model_provider.expert_tensor_parallel_size = megatron_config[
+            "expert_tensor_parallel_size"
+        ]
+        model_provider.num_layers_in_first_pipeline_stage = megatron_config[
+            "num_layers_in_first_pipeline_stage"
+        ]
+        model_provider.num_layers_in_last_pipeline_stage = megatron_config[
+            "num_layers_in_last_pipeline_stage"
+        ]
+        model_provider.pipeline_dtype = megatron_config["pipeline_dtype"]
+        model_provider.sequence_parallel = megatron_config["sequence_parallel"]
+    model_provider.finalize()
+    model_provider.initialize_model_parallel(seed=0)
+    megatron_model = model_provider.provide_distributed_model(wrap_with_ddp=False)
+
+    # The above parallelism settings are used to load the model in a distributed manner.
+    # However, we do not want to save the parallelism settings to the checkpoint config
+    # because they may result in validation errors when loading the checkpoint.
+    config = megatron_model[0].config
+    config.tensor_model_parallel_size = orig_tensor_model_parallel_size
+    config.pipeline_model_parallel_size = orig_pipeline_model_parallel_size
+    config.context_parallel_size = orig_context_parallel_size
+    config.expert_model_parallel_size = orig_expert_model_parallel_size
+    config.expert_tensor_parallel_size = orig_expert_tensor_parallel_size
+    config.num_layers_in_first_pipeline_stage = orig_num_layers_in_first_pipeline_stage
+    config.num_layers_in_last_pipeline_stage = orig_num_layers_in_last_pipeline_stage
+    config.pipeline_dtype = orig_pipeline_dtype
+
+    bridge.save_megatron_model(megatron_model, output_path)
 
-        print(f"Importing model {hf_model_name} to {output_path}...")
-        importer = HFDeepSeekImporter(
-            hf_model_name,
-            output_path=output_path,
-        )
-    else:
-        raise ValueError(
-            f"Unknown model type: {hf_config.model_type}. Currently, DeepSeek, Qwen and Llama are supported. "
-            "If you'd like to run with a different model, please raise an issue or consider adding your own converter."
-        )
-    importer.apply()
     # resetting mcore state
     import megatron.core.rerun_state_machine
 
@@ -69,34 +106,39 @@ def export_model_from_megatron(
     output_path: str,
     hf_tokenizer_path: str,
     overwrite: bool = False,
+    hf_overrides: Optional[dict[str, Any]] = {},
 ):
     if os.path.exists(output_path) and not overwrite:
         raise FileExistsError(
             f"HF checkpoint already exists at {output_path}. Delete it to run or set overwrite=True."
         )
 
-    hf_config = AutoConfig.from_pretrained(hf_model_name, trust_remote_code=True)
+    try:
+        from megatron.bridge.training.model_load_save import (
+            temporary_distributed_context,
+        )
+    except ImportError:
+        raise ImportError("megatron.bridge.training is not available.")
+
+    bridge = AutoBridge.from_hf_pretrained(
+        hf_model_name, trust_remote_code=True, **hf_overrides
+    )
 
-    if hf_config.model_type == "llama":
-        from nemo.tron.converter.llama import HFLlamaExporter
+    # Export performs on CPU with proper distributed context
+    with temporary_distributed_context(backend="gloo"):
+        # Need to set model parallel cuda manual seed for mamba mixer
+        from megatron.core.tensor_parallel import model_parallel_cuda_manual_seed
 
-        exporter_cls = HFLlamaExporter
-    elif hf_config.model_type == "qwen2":
-        from nemo.tron.converter.qwen import HFQwen2Exporter
+        model_parallel_cuda_manual_seed(0)
 
-        exporter_cls = HFQwen2Exporter
-    else:
-        raise ValueError(
-            f"Unknown model: {hf_model_name}. Currently, only Qwen2 and Llama are supported. "
-            "If you'd like to run with a different model, please raise an issue or consider adding your own converter."
+        # Load the Megatron model
+        megatron_model = bridge.load_megatron_model(
+            input_path, skip_temp_dist_context=True
         )
-    print(f"Exporting model {hf_model_name} to {output_path}...")
-    exporter = exporter_cls(
-        input_path=input_path,
-        output_path=output_path,
-        hf_tokenizer_path=hf_tokenizer_path,
-    )
-    exporter.apply()
+
+        # Save in HuggingFace format
+        bridge.save_hf_pretrained(megatron_model, output_path)
+
     # resetting mcore state
     import megatron.core.rerun_state_machine
 
diff --git a/nemo_rl/models/megatron/converters/common.py b/nemo_rl/models/megatron/converters/common.py
deleted file mode 100644
index 92a4177608..0000000000
--- a/nemo_rl/models/megatron/converters/common.py
+++ /dev/null
@@ -1,523 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import re
-from collections import defaultdict
-from typing import Any
-
-import einops
-import numpy as np
-import torch
-from megatron.core import parallel_state
-from nemo.lightning.io.state import (
-    StateDictTransform,
-    TransformCTX,
-    _match_keys,
-    _ModelState,
-)
-from transformers import AutoConfig, AutoModelForCausalLM
-from transformers.integrations.accelerate import init_empty_weights
-
-import nemo_rl.models.megatron.converters.deepseek as deepseek_converter
-import nemo_rl.models.megatron.converters.llama as llama_converter
-import nemo_rl.models.megatron.converters.qwen2 as qwen2_converter
-import nemo_rl.models.megatron.converters.qwen3 as qwen3_converter
-
-_GROUP_TO_RANKS_CACHE = {}
-
-
-def get_local_layer_num(s):
-    """Assumes layer number is preceeded by 'layers.'."""
-    segments = s.split(".")
-    number = None
-    for i, segment in enumerate(segments):
-        if segment == "layers":
-            if segments[i + 1].isdigit():
-                number = int(segments[i + 1])
-                break
-    return number
-
-
-def get_local_expert_num(s):
-    """Assumes experts have 'experts.' in their name. Expert num succeeds '.weight'."""
-    segments = s.split(".")
-    if "experts" not in segments or segments[-1] == "_extra_state":
-        return None
-    number = int(segments[-1].strip("weight"))
-    return number
-
-
-def get_global_layer_num(s, cfg) -> int:
-    """Assumes layer number is preceeded by 'layers.'.
-
-    Assumes pipeline model parallel size is set.
-    In the state dict, the layer number is the local layer number (PP local).
-    This function converts the local layer number to the global layer number.
-    """
-    local_layer_num = get_local_layer_num(s)
-    assert local_layer_num is not None, f"Local layer number is None for {s}"
-    pp_rank = parallel_state.get_pipeline_model_parallel_rank()
-    pp_size = parallel_state.get_pipeline_model_parallel_world_size()
-
-    first_stage_layers = cfg.num_layers_in_first_pipeline_stage
-    last_stage_layers = cfg.num_layers_in_last_pipeline_stage
-
-    if first_stage_layers is None and last_stage_layers is None:
-        first_stage_layers = last_stage_layers = cfg.num_layers // pp_size
-    elif first_stage_layers is None:
-        first_stage_layers = (cfg.num_layers - last_stage_layers) // (pp_size - 1)
-    elif last_stage_layers is None:
-        last_stage_layers = (cfg.num_layers - first_stage_layers) // (pp_size - 1)
-
-    # Calculate global offset based on rank
-    if pp_rank == 0:
-        global_offset = 0
-    elif pp_rank == pp_size - 1:
-        global_offset = cfg.num_layers - last_stage_layers
-    else:
-        middle_layers = cfg.num_layers - first_stage_layers - last_stage_layers
-        layers_per_middle_stage = middle_layers // (pp_size - 2)
-        global_offset = first_stage_layers + (pp_rank - 1) * layers_per_middle_stage
-
-    return global_offset + local_layer_num
-
-
-def get_global_expert_num(s, cfg):
-    """Assumes experts have 'experts.' in their name. Expert num succeeds '.weight'.
-
-    Assumes expert model parallel size is set.
-    In the state dict, the expert number is the local expert number (expert local).
-    This function converts the local expert number to the global expert number.
-    """
-    local_expert_num = get_local_expert_num(s)
-    global_expert_num = (
-        parallel_state.get_expert_model_parallel_rank()
-        * cfg.num_moe_experts
-        // parallel_state.get_expert_model_parallel_world_size()
-        + local_expert_num
-    )
-    return global_expert_num
-
-
-def get_global_key_from_local_key(local_key, model_cfg):
-    local_layer = get_local_layer_num(local_key)
-    if local_layer is not None:
-        global_layer = get_global_layer_num(local_key, model_cfg)
-        # Replace the first occurrence of the digits after "layers." with the global layer number.
-        global_key = re.sub(r"(?<=layers\.)\d+", str(global_layer), local_key, count=1)
-    else:
-        global_key = local_key
-    local_expert = get_local_expert_num(global_key)
-    if local_expert is not None:
-        global_expert = get_global_expert_num(global_key, model_cfg)
-        # Replace the last occurrence of the digits after "weight" with the global expert number.
-        global_key = re.sub(r"(?<=weight)\d+", str(global_expert), global_key)
-    return global_key
-
-
-def split_fc1_tp(ctx: TransformCTX, linear_fc1: torch.Tensor):
-    # gate proj and up proj are mixed right now, and we need to reshape them
-    # [ gate_tp0 ]     [ gate_tp0 ]
-    # [  up_tp0  ] --\ [ gate_tp1 ] --\ (split gate)
-    # [ gate_tp1 ] --/ [  up_tp0  ] --/ (split  up)
-    # [  up_tp1  ]     [  up_tp1  ]
-    megatron_config = ctx.source.config
-    tp = megatron_config.tensor_model_parallel_size
-    linear_fc1 = einops.rearrange(linear_fc1, "(t c d) a1 ->  c (t d) a1", c=2, t=tp)
-    mlp_gate_proj_weight = linear_fc1[0]
-    mlp_up_proj_weight = linear_fc1[1]
-    return mlp_gate_proj_weight, mlp_up_proj_weight
-
-
-def split_fc1_etp(ctx: TransformCTX, linear_fc1: torch.Tensor):
-    # gate proj and up proj are mixed right now, and we need to reshape them
-    # [ gate_tp0 ]     [ gate_tp0 ]
-    # [  up_tp0  ] --\ [ gate_tp1 ] --\ (split gate)
-    # [ gate_tp1 ] --/ [  up_tp0  ] --/ (split  up)
-    # [  up_tp1  ]     [  up_tp1  ]
-    megatron_config = ctx.source.config
-    etp = megatron_config.expert_tensor_parallel_size
-    linear_fc1 = einops.rearrange(linear_fc1, "(t c d) a1 ->  c (t d) a1", c=2, t=etp)
-    mlp_gate_proj_weight = linear_fc1[0]
-    mlp_up_proj_weight = linear_fc1[1]
-    return mlp_gate_proj_weight, mlp_up_proj_weight
-
-
-def split_qkv_gpu(ctx: TransformCTX, linear_qkv: torch.Tensor):
-    """Split interleave-concatenated qkv to q, k, v.
-
-    Example: export layer linear_qkv to HF {q|k|v}_proj
-    """
-    megatron_config = ctx.source.config
-
-    head_num = megatron_config.num_attention_heads
-    num_query_groups = megatron_config.num_query_groups
-    heads_per_group = head_num // num_query_groups
-    # hidden_size = megatron_config.hidden_size
-    head_size = megatron_config.kv_channels
-    qkv_total_dim = head_num + 2 * num_query_groups
-
-    linear_qkv = linear_qkv.reshape([qkv_total_dim, head_size, -1])
-    # when converting base model (linear_qkv), hidden size = megatron_config.hidden_size
-    # when converting lora (linear_qkv.adapter.linear_out), hidden size = lora_r
-    hidden_size = linear_qkv.size(-1)
-    q_slice = torch.cat(
-        [
-            torch.arange(
-                (heads_per_group + 2) * i, (heads_per_group + 2) * i + heads_per_group
-            )
-            for i in range(num_query_groups)
-        ]
-    )
-    k_slice = torch.arange(heads_per_group, qkv_total_dim, (heads_per_group + 2))
-    v_slice = torch.arange(heads_per_group + 1, qkv_total_dim, (heads_per_group + 2))
-
-    q_proj = linear_qkv[q_slice].reshape(-1, hidden_size)
-    k_proj = linear_qkv[k_slice].reshape(-1, hidden_size)
-    v_proj = linear_qkv[v_slice].reshape(-1, hidden_size)
-
-    return q_proj, k_proj, v_proj
-
-
-def split_qkv_bias_gpu(ctx: TransformCTX, qkv_bias: torch.Tensor):
-    """Split interleave-concatenated qkv bias to separate q, k, v bias.
-
-    Example: export layer linear_qkv bias to HF {q|k|v}_proj bias
-    """
-    megatron_config = ctx.source.config
-
-    head_num = megatron_config.num_attention_heads
-    num_query_groups = megatron_config.num_query_groups
-    heads_per_group = head_num // num_query_groups
-    head_size = megatron_config.kv_channels
-    qkv_total_dim = head_num + 2 * num_query_groups
-
-    qkv_bias = qkv_bias.reshape([qkv_total_dim, head_size])
-    q_slice = torch.cat(
-        [
-            torch.arange(
-                (heads_per_group + 2) * i, (heads_per_group + 2) * i + heads_per_group
-            )
-            for i in range(num_query_groups)
-        ]
-    )
-    k_slice = torch.arange(heads_per_group, qkv_total_dim, (heads_per_group + 2))
-    v_slice = torch.arange(heads_per_group + 1, qkv_total_dim, (heads_per_group + 2))
-
-    q_bias = qkv_bias[q_slice].reshape(-1)
-    k_bias = qkv_bias[k_slice].reshape(-1)
-    v_bias = qkv_bias[v_slice].reshape(-1)
-
-    return q_bias, k_bias, v_bias
-
-
-def update_transforms_for_nemorl(export_transforms):
-    # In place update
-    for transform in export_transforms:
-        if transform.transform.__name__ == "split_fc1":
-            if (
-                "experts" in transform.source_key
-                and "shared_experts" not in transform.source_key
-            ):
-                transform.transform = split_fc1_etp
-            else:
-                transform.transform = split_fc1_tp
-        elif transform.transform.__name__ == "split_qkv":
-            # This transform previously moved qkv weights to cpu
-            transform.transform = split_qkv_gpu
-        elif transform.transform.__name__ == "split_qkv_bias":
-            # This transform previously moved qkv weights to cpu
-            transform.transform = split_qkv_bias_gpu
-    return export_transforms
-
-
-class MegatronToHFConverter:
-    def __init__(self, hf_model_name, megatron_model):
-        # We only care about the state_dict keys and the config, so we
-        # don't need to load the model weights
-        config = AutoConfig.from_pretrained(hf_model_name, trust_remote_code=True)
-        with init_empty_weights():
-            self.target_model = AutoModelForCausalLM.from_config(
-                config, trust_remote_code=True
-            )
-
-        local_keys = list(megatron_model.state_dict().keys())
-        global_keys = [
-            get_global_key_from_local_key(k, megatron_model.config) for k in local_keys
-        ]
-
-        pp_group = parallel_state.get_pipeline_model_parallel_group()
-        pp_world_size = torch.distributed.get_world_size(pp_group)
-        pp_gathered_global_keys = [None] * pp_world_size
-        torch.distributed.all_gather_object(
-            pp_gathered_global_keys, global_keys, group=pp_group
-        )
-        pp_gathered_global_keys = list({k for l in pp_gathered_global_keys for k in l})  # type: ignore
-
-        ep_group = parallel_state.get_expert_model_parallel_group()
-        ep_world_size = parallel_state.get_expert_model_parallel_world_size()
-        ep_gathered_global_keys = [None] * ep_world_size
-        torch.distributed.all_gather_object(
-            ep_gathered_global_keys, pp_gathered_global_keys, group=ep_group
-        )
-        ep_gathered_global_keys = list({k for l in ep_gathered_global_keys for k in l})
-
-        global_keys = ep_gathered_global_keys
-        global_keys_map = {k: None for k in global_keys}
-
-        if config.model_type == "qwen2":
-            self.export_mapping = qwen2_converter.get_export_mapping(megatron_model)
-            self.export_transforms = qwen2_converter.get_export_transforms(config)
-
-            def get_source_fn(
-                source_state_dict: dict[str, Any], source_config: dict[str, Any]
-            ) -> _ModelState:
-                return _ModelState(source_state_dict)
-
-            self.get_source_fn = get_source_fn
-        elif config.model_type in ("qwen3", "qwen3_moe"):
-            self.export_mapping = qwen3_converter.get_export_mapping(config)
-            self.export_transforms = qwen3_converter.get_export_transforms(config)
-
-            def get_source_fn(
-                source_state_dict: dict[str, Any], source_config: dict[str, Any]
-            ) -> _ModelState:
-                return _ModelState(source_state_dict)
-
-            self.get_source_fn = get_source_fn
-        elif config.model_type == "llama":
-            self.export_mapping = llama_converter.get_export_mapping()
-            self.export_transforms = llama_converter.get_export_transforms(config)
-
-            def get_source_fn(
-                source_state_dict: dict[str, Any], source_config: dict[str, Any]
-            ) -> _ModelState:
-                return _ModelState(source_state_dict)
-
-            self.get_source_fn = get_source_fn
-        elif config.model_type in ("deepseek_v2", "deepseek_v3"):
-            self.export_mapping = deepseek_converter.get_export_mapping(
-                source=global_keys_map,
-                source_config=megatron_model.config.__dict__,
-            )
-            self.export_transforms = deepseek_converter.get_export_transforms()
-            self.get_source_fn = deepseek_converter.get_source_fn
-        else:
-            raise ValueError(
-                f"No converter mapping and transforms found for {hf_model_name} with model_type {config.model_type}"
-            )
-
-        self.export_transforms = update_transforms_for_nemorl(self.export_transforms)
-
-        updated_global_keys_map = self.get_source_fn(
-            global_keys_map, megatron_model.config.__dict__
-        ).state_dict()
-
-        # Set the value of the state_dict to the megatron key name so that
-        # StateDictTransform will set the value of the target state dict to
-        # the megatron key name
-        dummy_source = _ModelState({k: k for k in updated_global_keys_map.keys()})
-
-        ctx = TransformCTX(
-            source=dummy_source,
-            source_state=dummy_source.state_dict(),
-            target=self.target_model,
-            target_state=self._get_empty_state_dict(),
-        )
-        for key, val in self.export_mapping.items():
-            ctx = StateDictTransform(key, val)(ctx)
-
-        for transform in self.export_transforms:
-            if type(transform.target_key) == tuple:
-                for t in transform.target_key:
-                    ctx = StateDictTransform(transform.source_key, t)(ctx)
-            else:
-                ctx = StateDictTransform(transform.source_key, transform.target_key)(
-                    ctx
-                )
-
-        hf_keys_to_megatron_keys = ctx.target_state
-        megatron_keys_to_hf_keys = defaultdict(set)
-        for hf_key, megatron_key in hf_keys_to_megatron_keys.items():
-            if isinstance(megatron_key, list):
-                for k in megatron_key:
-                    megatron_keys_to_hf_keys[k].add(hf_key)
-            else:
-                megatron_keys_to_hf_keys[megatron_key].add(hf_key)
-        self.megatron_keys_to_hf_keys = dict(megatron_keys_to_hf_keys)
-
-    def _get_empty_state_dict(self, source_keys=None):
-        if source_keys is None:
-            # If source_keys is None, then we use all the target model keys
-            target_keys = self.target_model.state_dict().keys()
-        else:
-            # Otherwise, we only use the target keys corresponding to the source_keys
-            target_keys = set()
-            for k in source_keys:
-                target_keys = target_keys.union(self.megatron_keys_to_hf_keys[k])
-
-        state_dict = {k: None for k in target_keys}
-        return state_dict
-
-    def _group(
-        self,
-        state_dict,
-        key,
-        item,
-        main_state_dict_keys,
-        main_items,
-        exception_state_dict_keys_list,
-        exception_items,
-    ):
-        source_matches = _match_keys(list(state_dict.keys()), key)
-        if source_matches.size == 1 and source_matches == np.array(None):
-            # no match, don't include these keys
-            return
-        elif source_matches.ndim == 1:
-            # normal case
-            main_state_dict_keys.extend(source_matches)
-            main_items.append(item)
-        elif source_matches.ndim == 2:
-            for source_match in source_matches:
-                if None in source_match:
-                    # partial wildcard match case (e.g. an MoE layer with missing experts in this batch)
-                    non_none_sources = [s for s in source_match if s is not None]
-                    exception_state_dict_keys_list.append(non_none_sources)
-                    exception_items.append(item)
-                else:
-                    # normal case
-                    main_state_dict_keys.extend(source_match)
-                    main_items.append(item)
-        else:
-            raise NotImplementedError(
-                f"source_matches.ndim = {source_matches.ndim}. Expressions with more than 2 wildcard expressions are not supported."
-            )
-
-    def _get_groups(self, state_dict):
-        """This function is used to group mappings and transforms together.
-
-        Goes through the mappings and transforms once to collect mapping and transform groups
-        [(mapping, state_dict_keys)], [(transforms, state_dict_keys)] that can be converted
-        together.
-
-        This is necessary because:
-        1. If the mapping or transform expression has 2 wildcard expressions,
-           _match_keys assumes the matches for each wildcard are the same size. For example,
-           if the mapping is "layers.*.mlp.experts.*.linear_fc1.weight", where the first wildcard
-           matches the layer number and the second wildcard matches the expert number, it assumes
-           the number of experts is the same for each layer. This will fail in the case we're doing
-           batched streaming refit and the current state dict is missing experts from some layers.
-           To handle this, we separate out the partial keys (e.g. the ones corresponding to less experts)
-           in a separate group and run them through the mapping and transforms separately.
-
-           NOTE: this function currently only handles expressions with up to 2 wildcard expressions
-           and will fail if the mapping or transform expression has more than 2 wildcard expressions.
-
-        2. An expression matches 0 keys in the current state dict. This can happen during batched
-           streaming refit if the current state dict doesn't have any keys that match the expression.
-           To handle this, we skip these mapping/transforms.
-
-        """
-        # Most of the keys will be able to converted together (main)
-        # For the keys that can't be converted together (exception), we need to handle them separately
-        main_state_dict_keys: list[str] = []
-        exception_mappings_state_dict_keys_list: list[list[str]] = []
-        exception_transforms_state_dict_keys_list: list[list[str]] = []
-
-        main_mappings: list[tuple[str, Any]] = []
-        exception_mappings: list[tuple[str, Any]] = []
-        for key, val in self.export_mapping.items():
-            self._group(
-                state_dict,
-                key,
-                (key, val),
-                main_state_dict_keys,
-                main_mappings,
-                exception_mappings_state_dict_keys_list,
-                exception_mappings,
-            )
-
-        main_transforms = []
-        exception_transforms = []
-        for transform in self.export_transforms:
-            if type(transform.source_key) == tuple:
-                source_keys = transform.source_key
-            else:
-                source_keys = (transform.source_key,)
-            for source_key in source_keys:
-                self._group(
-                    state_dict,
-                    source_key,
-                    transform,
-                    main_state_dict_keys,
-                    main_transforms,
-                    exception_transforms_state_dict_keys_list,
-                    exception_transforms,
-                )
-
-        mapping_groups = [({k: v for k, v in main_mappings}, main_state_dict_keys)]
-        for (k, v), exception_state_dict_keys in zip(
-            exception_mappings, exception_mappings_state_dict_keys_list
-        ):
-            mapping_groups.append(({k: v}, exception_state_dict_keys))
-        transform_groups = [(main_transforms, main_state_dict_keys)]
-        for exception_transform, exception_state_dict_keys in zip(
-            exception_transforms, exception_transforms_state_dict_keys_list
-        ):
-            transform_groups.append(([exception_transform], exception_state_dict_keys))
-
-        return mapping_groups, transform_groups
-
-    def convert(self, state_dict, megatron_config):
-        state_dict = self.get_source_fn(
-            state_dict, megatron_config.__dict__
-        ).state_dict()
-
-        mapping_groups, transform_groups = self._get_groups(state_dict)
-
-        converted_state_dict = {}
-        for mapping, state_dict_keys in mapping_groups:
-            source = _ModelState({k: state_dict[k] for k in state_dict_keys})
-            source.config = megatron_config
-            ctx = TransformCTX(
-                source=source,
-                source_state=source.state_dict(),
-                target=self.target_model,
-                target_state=self._get_empty_state_dict(list(state_dict_keys)),
-            )
-
-            for key, val in mapping.items():
-                ctx = StateDictTransform(key, val)(ctx)
-
-            for k, v in ctx.target_state.items():
-                if v is not None:
-                    converted_state_dict[k] = v
-
-        for transforms, state_dict_keys in transform_groups:
-            source = _ModelState({k: state_dict[k] for k in state_dict_keys})
-            source.config = megatron_config
-            ctx = TransformCTX(
-                source=source,
-                source_state=source.state_dict(),
-                target=self.target_model,
-                target_state=self._get_empty_state_dict(list(state_dict_keys)),
-            )
-            for transform in transforms:
-                ctx = transform(ctx)
-
-            for k, v in ctx.target_state.items():
-                if v is not None:
-                    converted_state_dict[k] = v
-
-        return converted_state_dict
diff --git a/nemo_rl/models/megatron/converters/deepseek.py b/nemo_rl/models/megatron/converters/deepseek.py
deleted file mode 100644
index 512e4a8dbe..0000000000
--- a/nemo_rl/models/megatron/converters/deepseek.py
+++ /dev/null
@@ -1,128 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from typing import Any
-
-from nemo.lightning import io
-from nemo.lightning.io.state import TransformFns, _ModelState
-
-
-def get_export_mapping(source, source_config):
-    mapping = {
-        # Embed
-        "embedding.word_embeddings.weight": "model.embed_tokens.weight",
-        # Attention
-        "decoder.layers.*.input_layernorm.weight": "model.layers.*.input_layernorm.weight",
-        "decoder.layers.*.self_attention.linear_proj.weight": "model.layers.*.self_attn.o_proj.weight",
-        "decoder.layers.*.self_attention.linear_q_down_proj.weight": "model.layers.*.self_attn.q_a_proj.weight",
-        "decoder.layers.*.self_attention.linear_q_up_proj.weight": "model.layers.*.self_attn.q_b_proj.weight",
-        "decoder.layers.*.self_attention.linear_kv_down_proj.weight": "model.layers.*.self_attn.kv_a_proj_with_mqa.weight",
-        "decoder.layers.*.self_attention.linear_kv_up_proj.weight": "model.layers.*.self_attn.kv_b_proj.weight",
-        "decoder.layers.*.self_attention.linear_q_up_proj.layer_norm_weight": "model.layers.*.self_attn.q_a_layernorm.weight",
-        "decoder.layers.*.self_attention.linear_kv_up_proj.layer_norm_weight": "model.layers.*.self_attn.kv_a_layernorm.weight",
-        "decoder.layers.*.pre_mlp_layernorm.weight": "model.layers.*.post_attention_layernorm.weight",
-        # Dense MLP
-        "decoder.layers.*.mlp.linear_fc2.weight": "model.layers.*.mlp.down_proj.weight",
-        # MoE
-        "decoder.layers.*.mlp.router.weight": "model.layers.*.mlp.gate.weight",
-        "decoder.layers.*.mlp.experts.linear_fc2.weight*": "model.layers.*.mlp.experts.*.down_proj.weight",
-        "decoder.layers.*.mlp.shared_experts.linear_fc2.weight": "model.layers.*.mlp.shared_experts.down_proj.weight",
-        # LM Head
-        "decoder.final_layernorm.weight": "model.norm.weight",
-        "output_layer.weight": "lm_head.weight",
-    }
-    # For lite model
-    if source_config["q_lora_rank"] is None:
-        del mapping["decoder.layers.*.self_attention.linear_q_down_proj.weight"]
-        del mapping["decoder.layers.*.self_attention.linear_q_up_proj.weight"]
-        mapping["decoder.layers.*.self_attention.linear_q_proj.weight"] = (
-            "model.layers.*.self_attn.q_proj.weight"
-        )
-    # Account for Mcore local spec
-    if (
-        source_config["q_lora_rank"] is not None
-        and "decoder.layers.0.self_attention.q_layernorm.weight" in source
-    ):
-        mapping["decoder.layers.*.self_attention.q_layernorm.weight"] = mapping.pop(
-            "decoder.layers.*.self_attention.linear_q_up_proj.layer_norm_weight"
-        )
-
-    if "decoder.layers.0.self_attention.kv_layernorm.weight" in source:
-        mapping["decoder.layers.*.self_attention.kv_layernorm.weight"] = mapping.pop(
-            "decoder.layers.*.self_attention.linear_kv_up_proj.layer_norm_weight"
-        )
-
-    if source_config.get("moe_router_enable_expert_bias", False):
-        mapping.update(
-            {
-                "decoder.layers.*.mlp.router.expert_bias": "model.layers.*.mlp.gate.e_score_correction_bias",
-            }
-        )
-    return mapping
-
-
-def get_export_transforms():
-    transforms = [
-        io.state_transform(
-            source_key="decoder.layers.*.mlp.linear_fc1.weight",
-            target_key=(
-                "model.layers.*.mlp.gate_proj.weight",
-                "model.layers.*.mlp.up_proj.weight",
-            ),
-            fn=TransformFns.split_fc1,
-        ),
-        io.state_transform(
-            source_key="decoder.layers.*.mlp.experts.linear_fc1.weight*",
-            target_key=(
-                "model.layers.*.mlp.experts.*.gate_proj.weight",
-                "model.layers.*.mlp.experts.*.up_proj.weight",
-            ),
-            fn=TransformFns.split_fc1,
-        ),
-        io.state_transform(
-            source_key="decoder.layers.*.mlp.shared_experts.linear_fc1.weight",
-            target_key=(
-                "model.layers.*.mlp.shared_experts.gate_proj.weight",
-                "model.layers.*.mlp.shared_experts.up_proj.weight",
-            ),
-            fn=TransformFns.split_fc1,
-        ),
-    ]
-    return transforms
-
-
-def get_source_fn(
-    source_state_dict: dict[str, Any], source_config: dict[str, Any]
-) -> _ModelState:
-    """Modify source state_dict before conversion.
-
-    In deepseek, HF weight `model.layers.*.post_attention_layernorm.weight` is mapped to mcore weight
-    a) `decoder.layers.*.mlp.linear_fc1.layer_norm_weight`, if the layer is dense
-    b) `decoder.layers.*.pre_mlp_layernorm.weight`, if the layer is MoE
-
-    We rename decoder.layers.*.mlp.linear_fc1.layer_norm_weight in the first case to unify key names
-    """
-    for layer_i in range(source_config["num_layers"]):
-        if (
-            f"decoder.layers.{layer_i}.mlp.linear_fc1.layer_norm_weight"
-            in source_state_dict
-        ):
-            weight = source_state_dict.pop(
-                f"decoder.layers.{layer_i}.mlp.linear_fc1.layer_norm_weight"
-            )
-            source_state_dict[f"decoder.layers.{layer_i}.pre_mlp_layernorm.weight"] = (
-                weight
-            )
-    modified_source = _ModelState(source_state_dict)
-    return modified_source
diff --git a/nemo_rl/models/megatron/converters/llama.py b/nemo_rl/models/megatron/converters/llama.py
deleted file mode 100644
index 101378f86e..0000000000
--- a/nemo_rl/models/megatron/converters/llama.py
+++ /dev/null
@@ -1,65 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from nemo.lightning import io
-from nemo.lightning.io.state import TransformFns
-
-
-def get_export_mapping():
-    mapping = {
-        "decoder.layers.*.self_attention.linear_proj.weight": "model.layers.*.self_attn.o_proj.weight",
-        "decoder.layers.*.mlp.linear_fc2.weight": "model.layers.*.mlp.down_proj.weight",
-        "decoder.layers.*.self_attention.linear_qkv.layer_norm_weight": "model.layers.*.input_layernorm.weight",
-        "decoder.layers.*.mlp.linear_fc1.layer_norm_weight": "model.layers.*.post_attention_layernorm.weight",
-        "decoder.final_layernorm.weight": "model.norm.weight",
-    }
-    return mapping
-
-
-def get_export_transforms(hf_config):
-    transforms = [
-        io.state_transform(
-            source_key="decoder.layers.*.self_attention.linear_qkv.weight",
-            target_key=(
-                "model.layers.*.self_attn.q_proj.weight",
-                "model.layers.*.self_attn.k_proj.weight",
-                "model.layers.*.self_attn.v_proj.weight",
-            ),
-            fn=TransformFns.split_qkv,
-        ),
-        io.state_transform(
-            source_key="decoder.layers.*.mlp.linear_fc1.weight",
-            target_key=(
-                "model.layers.*.mlp.gate_proj.weight",
-                "model.layers.*.mlp.up_proj.weight",
-            ),
-            fn=TransformFns.split_fc1,
-        ),
-        io.state_transform(
-            source_key="embedding.word_embeddings.weight",
-            target_key="model.embed_tokens.weight",
-            fn=TransformFns.prune_padding,
-        ),
-    ]
-
-    if not hf_config.tie_word_embeddings:
-        transforms.append(
-            io.state_transform(
-                source_key="output_layer.weight",
-                target_key="lm_head.weight",
-                fn=TransformFns.prune_padding,
-            )
-        )
-
-    return transforms
diff --git a/nemo_rl/models/megatron/converters/qwen2.py b/nemo_rl/models/megatron/converters/qwen2.py
deleted file mode 100644
index 92fbf84e88..0000000000
--- a/nemo_rl/models/megatron/converters/qwen2.py
+++ /dev/null
@@ -1,74 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from nemo.lightning import io
-from nemo.lightning.io.state import TransformFns
-
-
-def get_export_mapping(source):
-    mapping = {
-        "decoder.layers.*.self_attention.linear_proj.weight": "model.layers.*.self_attn.o_proj.weight",
-        "decoder.layers.*.mlp.linear_fc2.weight": "model.layers.*.mlp.down_proj.weight",
-        "decoder.layers.*.self_attention.linear_qkv.layer_norm_weight": "model.layers.*.input_layernorm.weight",
-        "decoder.layers.*.mlp.linear_fc1.layer_norm_weight": "model.layers.*.post_attention_layernorm.weight",
-        "decoder.final_layernorm.weight": "model.norm.weight",
-    }
-    return mapping
-
-
-def get_export_transforms(hf_config):
-    transforms = [
-        io.state_transform(
-            source_key="decoder.layers.*.self_attention.linear_qkv.weight",
-            target_key=(
-                "model.layers.*.self_attn.q_proj.weight",
-                "model.layers.*.self_attn.k_proj.weight",
-                "model.layers.*.self_attn.v_proj.weight",
-            ),
-            fn=TransformFns.split_qkv,
-        ),
-        io.state_transform(
-            source_key="decoder.layers.*.self_attention.linear_qkv.bias",
-            target_key=(
-                "model.layers.*.self_attn.q_proj.bias",
-                "model.layers.*.self_attn.k_proj.bias",
-                "model.layers.*.self_attn.v_proj.bias",
-            ),
-            fn=TransformFns.split_qkv_bias,
-        ),
-        io.state_transform(
-            source_key="decoder.layers.*.mlp.linear_fc1.weight",
-            target_key=(
-                "model.layers.*.mlp.gate_proj.weight",
-                "model.layers.*.mlp.up_proj.weight",
-            ),
-            fn=TransformFns.split_fc1,
-        ),
-        io.state_transform(
-            source_key="embedding.word_embeddings.weight",
-            target_key="model.embed_tokens.weight",
-            fn=TransformFns.prune_padding,
-        ),
-    ]
-
-    if not hf_config.tie_word_embeddings:
-        transforms.append(
-            io.state_transform(
-                source_key="output_layer.weight",
-                target_key="lm_head.weight",
-                fn=TransformFns.prune_padding,
-            ),
-        )
-
-    return transforms
diff --git a/nemo_rl/models/megatron/converters/qwen3.py b/nemo_rl/models/megatron/converters/qwen3.py
deleted file mode 100644
index 1dcb278106..0000000000
--- a/nemo_rl/models/megatron/converters/qwen3.py
+++ /dev/null
@@ -1,89 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from nemo.lightning import io
-from nemo.lightning.io.state import TransformFns
-
-
-def get_export_mapping(config):
-    mapping = {
-        "**.self_attention.linear_proj.weight": "**.self_attn.o_proj.weight",
-        "**.self_attention.linear_qkv.layer_norm_weight": "**.input_layernorm.weight",
-        "**.self_attention.q_layernorm.weight": "**.self_attn.q_norm.weight",
-        "**.self_attention.k_layernorm.weight": "**.self_attn.k_norm.weight",
-        "decoder.final_layernorm.weight": "model.norm.weight",
-    }
-    is_moe = getattr(config, "num_experts", 0) > 0
-    if is_moe:
-        mapping.update(
-            {
-                "**.mlp.experts.linear_fc2.weight*": "**.mlp.experts.*.down_proj.weight",
-                "**.mlp.router.weight": "**.mlp.gate.weight",
-                "**.pre_mlp_layernorm.weight": "**.post_attention_layernorm.weight",
-            }
-        )
-    else:
-        mapping.update(
-            {
-                "**.mlp.linear_fc2.weight": "**.mlp.down_proj.weight",
-                "**.mlp.linear_fc1.layer_norm_weight": "**.post_attention_layernorm.weight",
-            }
-        )
-    return mapping
-
-
-def get_export_transforms(config):
-    is_moe = getattr(config, "num_experts", 0) > 0
-    transforms = [
-        io.state_transform(
-            source_key="**.self_attention.linear_qkv.weight",
-            target_key=(
-                "**.self_attn.q_proj.weight",
-                "**.self_attn.k_proj.weight",
-                "**.self_attn.v_proj.weight",
-            ),
-            fn=TransformFns.split_qkv,
-        ),
-        (
-            io.state_transform(
-                source_key="**.mlp.linear_fc1.weight",
-                target_key=("**.mlp.gate_proj.weight", "**.mlp.up_proj.weight"),
-                fn=TransformFns.split_fc1,
-            )
-            if not is_moe
-            else io.state_transform(
-                source_key="**.mlp.experts.linear_fc1.weight*",
-                target_key=(
-                    "**.mlp.experts.*.gate_proj.weight",
-                    "**.mlp.experts.*.up_proj.weight",
-                ),
-                fn=TransformFns.split_fc1,
-            )
-        ),
-        io.state_transform(
-            source_key="embedding.word_embeddings.weight",
-            target_key="model.embed_tokens.weight",
-            fn=TransformFns.prune_padding,
-        ),
-    ]
-    if not config.tie_word_embeddings:
-        transforms.append(
-            io.state_transform(
-                source_key="output_layer.weight",
-                target_key="lm_head.weight",
-                fn=TransformFns.prune_padding,
-            )
-        )
-
-    return transforms
diff --git a/nemo_rl/models/megatron/refit_utils.py b/nemo_rl/models/megatron/refit_utils.py
deleted file mode 100644
index 2463d6ae4f..0000000000
--- a/nemo_rl/models/megatron/refit_utils.py
+++ /dev/null
@@ -1,332 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import re
-import time
-from typing import Any, List, Tuple
-
-import torch
-from megatron.core import parallel_state
-from megatron.core.extensions.transformer_engine import (
-    TEColumnParallelGroupedLinear,
-    TEColumnParallelLinear,
-    TERowParallelGroupedLinear,
-    TERowParallelLinear,
-)
-from megatron.core.tensor_parallel.layers import (
-    ColumnParallelLinear,
-    RowParallelLinear,
-    VocabParallelEmbedding,
-)
-from torch.distributed import get_process_group_ranks
-
-from nemo_rl.models.megatron.converters.common import get_global_key_from_local_key
-
-
-def get_tp_dim(model, param_name, named_modules_dict):
-    # pass in named_modules_dict so we can get it ahead of time instead
-    # of once for each param
-    pattern = re.compile(r"\.(?:weight|bias)\d*$")
-    if not pattern.search(param_name):
-        return None
-
-    prefix = ""
-    if hasattr(model, "module"):
-        prefix = "module."
-        if hasattr(model.module, "module"):
-            prefix = "module.module."
-    key = prefix + ".".join(param_name.split(".")[:-1])
-    module = named_modules_dict.get(key)
-    if module is None:
-        return None
-    if hasattr(module, "parallel_mode") and module.parallel_mode is not None:
-        # TE layers sometimes have parallel_mode we can check directly
-        if module.parallel_mode == "column":
-            return 0
-        elif module.parallel_mode == "row":
-            return 1
-        else:
-            return None
-    elif isinstance(
-        module,
-        (
-            VocabParallelEmbedding,
-            ColumnParallelLinear,
-            TEColumnParallelGroupedLinear,
-            TEColumnParallelLinear,
-        ),
-    ):
-        return 0
-    elif isinstance(
-        module, (RowParallelLinear, TERowParallelGroupedLinear, TERowParallelLinear)
-    ):
-        return 1
-    else:
-        return None
-
-
-@torch.no_grad()
-def gather_params(model, keys: list[str], key_to_global_keys: dict[str, list[str]]):
-    st = time.perf_counter()
-
-    tp_group = parallel_state.get_tensor_model_parallel_group()
-    tp_world_size = torch.distributed.get_world_size(tp_group)
-    etp_group = parallel_state.get_expert_tensor_parallel_group()
-    etp_world_size = torch.distributed.get_world_size(etp_group)
-    pp_group = parallel_state.get_pipeline_model_parallel_group()
-    pp_world_size = torch.distributed.get_world_size(pp_group)
-    pp_global_ranks = torch.distributed.get_process_group_ranks(group=pp_group)
-    pp_local_rank_id = parallel_state.get_pipeline_model_parallel_rank()
-    ep_group = parallel_state.get_expert_model_parallel_group()
-    ep_world_size = torch.distributed.get_world_size(ep_group)
-
-    named_modules_dict = dict(model.named_modules())
-    state_dict = model.state_dict()
-    gathered_params = {}
-    ep_pattern = re.compile(r"mlp\.experts.*\.weight\d*$")
-
-    for local_key, owner_pp_local_rank_id, shape, dtype in sorted(keys):
-        if local_key in state_dict and owner_pp_local_rank_id == pp_local_rank_id:
-            param = state_dict[local_key]
-
-            tp_dim = get_tp_dim(model, local_key, named_modules_dict)
-
-            # If the parameter is TP-sharded, gather its slices on GPU.
-            if tp_dim is not None:
-                if ep_pattern.search(local_key):
-                    world_size = etp_world_size
-                    group = etp_group
-                else:
-                    world_size = tp_world_size
-                    group = tp_group
-
-                gathered_slices = [torch.empty_like(param) for _ in range(world_size)]
-                torch.distributed.all_gather(gathered_slices, param, group=group)
-                full_param = torch.cat(gathered_slices, dim=tp_dim)
-            else:
-                full_param = param
-        else:
-            full_param = torch.empty(
-                *shape, dtype=dtype, device=torch.cuda.current_device()
-            )
-
-        # Broadcast across PP group.
-        src_global_rank = pp_global_ranks[owner_pp_local_rank_id]
-
-        # Broadcast from the rank that has the parameter
-        torch.distributed.broadcast(full_param, src=src_global_rank, group=pp_group)
-        pp_gathered_params = [full_param]
-
-        # gather across EP group
-        if ep_pattern.search(local_key):
-            stacked_pp_gathered_params = torch.stack(pp_gathered_params)
-
-            ep_gathered_params = [
-                torch.empty(
-                    stacked_pp_gathered_params.shape,
-                    dtype=dtype,
-                    device=torch.cuda.current_device(),
-                )
-                for _ in range(ep_world_size)
-            ]
-            torch.distributed.all_gather(
-                ep_gathered_params, stacked_pp_gathered_params, group=ep_group
-            )
-            flat_gathered_params = [
-                x for y in ep_gathered_params for x in torch.unbind(y)
-            ]
-
-        else:
-            flat_gathered_params = pp_gathered_params
-
-        flat_gathered_global_keys = key_to_global_keys[
-            (local_key, owner_pp_local_rank_id)
-        ]
-        for k, p in zip(flat_gathered_global_keys, flat_gathered_params):
-            if k is not None:
-                gathered_params[k] = p
-
-    print(f"Time taken to gather params: {time.perf_counter() - st}")
-    return gathered_params
-
-
-@torch.no_grad()
-def get_param_info(model, dtype):
-    # Get parallel info
-    tp_group = parallel_state.get_tensor_model_parallel_group()
-    tp_world_size = torch.distributed.get_world_size(tp_group)
-    tp_group_rank_ids = get_process_group_ranks(tp_group)
-
-    etp_group = parallel_state.get_expert_tensor_parallel_group()
-    etp_world_size = torch.distributed.get_world_size(etp_group)
-    etp_group_rank_ids = get_process_group_ranks(etp_group)
-
-    pp_group = parallel_state.get_pipeline_model_parallel_group()
-    pp_world_size = torch.distributed.get_world_size(pp_group)
-    pp_group_rank_ids = get_process_group_ranks(pp_group)
-    pp_local_rank_id = parallel_state.get_pipeline_model_parallel_rank()
-
-    ep_group = parallel_state.get_expert_model_parallel_group()
-    ep_world_size = torch.distributed.get_world_size(ep_group)
-    ep_group_rank_ids = get_process_group_ranks(ep_group)
-
-    # Collect parameter info
-    param_info = []
-
-    # Dictionary of modules we can quickly look up to check if a module has TP
-    named_modules_dict = dict(model.named_modules())
-
-    # Process each parameter in the model
-    # state_dict includes parameters and persistent buffers
-    ep_pattern = re.compile(r"mlp\.experts.*\.weight\d*$")
-    for name, param in model.state_dict().items():
-        # Skip _extra_state entries (these are metadata, not actual weights)
-        if "_extra_state" in name:
-            continue
-
-        use_etp = True if ep_pattern.search(name) else False
-        if use_etp:
-            tensor_mp_rank_ids = etp_group_rank_ids
-        else:
-            tensor_mp_rank_ids = tp_group_rank_ids
-
-        shape = list(param.shape)
-        tp_dim = get_tp_dim(model, name, named_modules_dict)
-        if tp_dim is not None:
-            tp_rank_ids = tuple(sorted(tensor_mp_rank_ids))
-            shape[tp_dim] *= len(tp_rank_ids)
-        else:
-            tp_rank_ids = (torch.distributed.get_rank(),)
-
-        pp_rank_ids = tuple(sorted(pp_group_rank_ids))
-        ep_rank_ids = tuple(sorted(ep_group_rank_ids))
-
-        if ep_pattern.search(name):
-            ep_rank_ids = tuple(sorted(ep_group_rank_ids))
-        else:
-            ep_rank_ids = (torch.distributed.get_rank(),)
-
-        # Calculate size for this parameter
-        prec_to_bytes = {
-            torch.bfloat16: 2,
-            torch.float16: 2,
-            torch.float32: 4,
-        }
-        scale = prec_to_bytes[dtype] / prec_to_bytes[param.dtype]
-        size_in_bytes = (
-            param.element_size()
-            * param.numel()
-            * len(tensor_mp_rank_ids)
-            * len(ep_rank_ids)
-            * scale
-        )
-        param_info.append(
-            (
-                (
-                    name,
-                    pp_local_rank_id,
-                    tuple(shape),
-                    param.dtype,
-                ),
-                size_in_bytes,
-            )
-        )
-    # Gather parameter info from all pipeline parallel ranks to ensure complete coverage
-    pp_group = parallel_state.get_pipeline_model_parallel_group()
-    pp_world_size = torch.distributed.get_world_size(pp_group)
-
-    # Gather all parameter info from all PP ranks
-    pp_gathered_param_infos = [None] * pp_world_size
-    torch.distributed.all_gather_object(
-        pp_gathered_param_infos, param_info, group=pp_group
-    )
-    pp_gathered_param_infos = [x for y in pp_gathered_param_infos for x in y]  # type: ignore
-
-    # Gather parameter info from all expert parallel ranks to ensure complete coverage
-    ep_group = parallel_state.get_expert_model_parallel_group()
-    ep_world_size = torch.distributed.get_world_size(ep_group)
-
-    # Gather all parameter info from all EP ranks
-    ep_gathered_param_infos = [None] * ep_world_size
-    torch.distributed.all_gather_object(
-        ep_gathered_param_infos, pp_gathered_param_infos, group=ep_group
-    )
-    all_param_infos = [x for y in ep_gathered_param_infos for x in y]
-
-    # Merge all parameter infos, keeping only unique parameter names
-    merged_param_info = []
-    seen_params = set()
-
-    for name, size in all_param_infos:
-        if name not in seen_params:
-            merged_param_info.append((name, size))
-            seen_params.add(name)
-
-    # Update param_info with the merged information
-    param_info = merged_param_info
-    print(f"Prepared {len(param_info)} tensors for refit")
-
-    return param_info
-
-
-@torch.no_grad()
-def get_local_key_to_global_keys(model, state_dict_info: List[Tuple[Any, int]]):
-    """Get the local key to global keys mapping."""
-    # Get parallel info
-    tp_group = parallel_state.get_tensor_model_parallel_group()
-    tp_world_size = torch.distributed.get_world_size(tp_group)
-
-    pp_group = parallel_state.get_pipeline_model_parallel_group()
-    pp_world_size = torch.distributed.get_world_size(pp_group)
-    pp_global_ranks = torch.distributed.get_process_group_ranks(group=pp_group)
-    pp_local_rank_id = parallel_state.get_pipeline_model_parallel_rank()
-
-    ep_group = parallel_state.get_expert_model_parallel_group()
-    ep_world_size = torch.distributed.get_world_size(ep_group)
-
-    # start calculating the global key
-    ep_pattern = re.compile(r"mlp\.experts.*\.weight\d*$")
-    state_dict = model.state_dict()
-    final_key_to_global_keys = {}
-
-    for param_info, size in state_dict_info:
-        local_key, owner_pp_local_rank_id, _, _ = param_info
-
-        # Step 1: create global key from local key
-        # if: for if a parameter is sharded along PP or EP;
-        # else: not sharded (like embedding)
-        pp_gathered_objs = [None]
-        if local_key in state_dict and owner_pp_local_rank_id == pp_local_rank_id:
-            pp_gathered_objs[0] = get_global_key_from_local_key(local_key, model.config)
-
-        # Step 2: gather global keys from ranks in PP group
-        src_global_rank = pp_global_ranks[owner_pp_local_rank_id]
-        torch.distributed.broadcast_object_list(
-            pp_gathered_objs, src=src_global_rank, group=pp_group
-        )
-
-        # Step 3: gather global keys from ranks in EP group
-        if ep_pattern.search(local_key):
-            ep_gathered_objs = [None] * ep_world_size
-            torch.distributed.all_gather_object(
-                ep_gathered_objs, pp_gathered_objs, group=ep_group
-            )
-            flat_gathered_objs = [x for y in ep_gathered_objs for x in y]
-        else:
-            flat_gathered_objs = pp_gathered_objs
-
-        final_key_to_global_keys[(local_key, owner_pp_local_rank_id)] = (
-            flat_gathered_objs
-        )
-
-    return final_key_to_global_keys
diff --git a/nemo_rl/models/policy/__init__.py b/nemo_rl/models/policy/__init__.py
index 59f85db870..bf8599a595 100644
--- a/nemo_rl/models/policy/__init__.py
+++ b/nemo_rl/models/policy/__init__.py
@@ -12,28 +12,45 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Any, NotRequired, TypedDict, Union
+from typing import Any, Literal, NotRequired, TypedDict, Union
 
 from nemo_rl.models.generation.interfaces import GenerationConfig
 
 
+class DTensorConfigDisabled(TypedDict):
+    enabled: Literal[False]
+
+
 class DTensorConfig(TypedDict):
-    enabled: bool
-    cpu_offload: NotRequired[bool]
-    sequence_parallel: NotRequired[bool]
-    activation_checkpointing: NotRequired[bool]
-    tensor_parallel_size: NotRequired[int]
-    context_parallel_size: NotRequired[int]
-    custom_parallel_plan: NotRequired[str]
+    enabled: Literal[True]
+    env_vars: NotRequired[dict[str, str] | None]
+    _v2: NotRequired[bool]
+    cpu_offload: bool
+    sequence_parallel: bool
+    activation_checkpointing: bool
+    tensor_parallel_size: int
+    context_parallel_size: int
+    custom_parallel_plan: str | None
+    clear_cache_every_n_steps: NotRequired[int | None]
+
+
+class SequencePackingConfigDisabled(TypedDict):
+    enabled: Literal[False]
 
 
 class SequencePackingConfig(TypedDict):
-    enabled: bool
+    enabled: Literal[True]
     train_mb_tokens: int
-    logprob_mb_tokens: int
+    # Not required because some algorithms like SFT don't calculate log probs
+    logprob_mb_tokens: NotRequired[int]
     algorithm: str
 
 
+class RewardModelConfig(TypedDict):
+    enabled: bool
+    reward_model_type: str
+
+
 class MegatronOptimizerConfig(TypedDict):
     optimizer: str
     lr: float
@@ -52,6 +69,11 @@ class MegatronOptimizerConfig(TypedDict):
     use_distributed_optimizer: bool
     use_precision_aware_optimizer: bool
     clip_grad: float
+    # knob to enable optimizer cpu offload
+    optimizer_cpu_offload: bool
+    # knob to set the fraction of parameters to keep on CPU
+    # currently if optimizer_cpu_offload is true, this knob must be 1.0
+    optimizer_offload_fraction: float
 
 
 class MegatronSchedulerConfig(TypedDict):
@@ -59,7 +81,7 @@ class MegatronSchedulerConfig(TypedDict):
     end_weight_decay: float
     weight_decay_incr_style: str
     lr_decay_style: str
-    lr_decay_iters: int
+    lr_decay_iters: NotRequired[int | None]
     lr_warmup_iters: int
     lr_warmup_init: float
 
@@ -68,35 +90,52 @@ class MegatronDDPConfig(TypedDict):
     grad_reduce_in_fp32: bool
     overlap_grad_reduce: bool
     overlap_param_gather: bool
-    average_in_collective: bool
     use_custom_fsdp: bool
     data_parallel_sharding_strategy: str
 
 
+# Type exists to be lax if not specified
+class MegatronConfigDisabled(TypedDict):
+    enabled: Literal[False]
+
+
 class MegatronConfig(TypedDict):
-    enabled: bool
+    enabled: Literal[True]
+    env_vars: NotRequired[dict[str, str] | None]
+    # 1 is the minimum recommendation for RL since we almost always need to offload before beginning generation.
+    # Setting to 0 is faster, but you are more likely to run out of GPU memory. In SFT/DPO, the default is 0.
     empty_unused_memory_level: int
     activation_checkpointing: bool
-    converter_type: str
     tensor_model_parallel_size: int
     pipeline_model_parallel_size: int
-    num_layers_in_first_pipeline_stage: int
-    num_layers_in_last_pipeline_stage: int
+    num_layers_in_first_pipeline_stage: int | None
+    num_layers_in_last_pipeline_stage: int | None
     context_parallel_size: int
     pipeline_dtype: str
     sequence_parallel: bool
     freeze_moe_router: bool
     expert_tensor_parallel_size: int
     expert_model_parallel_size: int
-
-    optimizer: NotRequired[MegatronOptimizerConfig]
-    scheduler: NotRequired[MegatronSchedulerConfig]
+    # If True, defer the casting of logits to float32 until the backward pass.
+    # If you are using logprob_chunk_size, you must set this to True.
+    defer_fp32_logits: NotRequired[bool]
+    # gives ~20% training perf speedup with sequence packing
+    apply_rope_fusion: bool
+    # gives ~25% training perf speedup with sequence packing and apply_rope_fusion
+    bias_activation_fusion: bool
+    # Force overwrite of the initial checkpoint even if it exists (default: False)
+    force_overwrite_initial_ckpt: NotRequired[bool]
+
+    optimizer: MegatronOptimizerConfig
+    scheduler: MegatronSchedulerConfig
     distributed_data_parallel_config: MegatronDDPConfig
 
 
 class TokenizerConfig(TypedDict):
     name: str
     chat_template: NotRequired[str]
+    # Arguments to pass to tokenizer.apply_chat_template(...). This can be used to pass kwargs like enable_thinking=true
+    chat_template_kwargs: NotRequired[dict[str, Any] | None]
 
 
 class PytorchOptimizerConfig(TypedDict):
@@ -107,24 +146,29 @@ class PytorchOptimizerConfig(TypedDict):
 class SinglePytorchSchedulerConfig(TypedDict):
     name: str
     kwargs: dict[str, Any]
-    milestones: NotRequired[list[int]]  # Used in SequentialLR configuration
+
+
+class SinglePytorchMilestonesConfig(TypedDict):
+    milestones: list[int]  # Used in SequentialLR configuration
 
 
 SchedulerMilestones = dict[str, list[int]]
 
 
+class DynamicBatchingConfigDisabled(TypedDict):
+    enabled: Literal[False]
+
+
 class DynamicBatchingConfig(TypedDict):
     # dynamic_batching improves performance by ensuring logprob and training microbatches
     # have a sufficent number of tokens to maximize GPU utilization. Specifically, variable length
     # responses are sorted by sequence length and bucketed into microbatches with a total
     # amount of tokens is approximately close to 'train_mb_tokens' and 'logprob_mb_tokens' for the
     # training and logprob stages respectively.
-    enabled: bool
-
-    ## required if enabled is true
-    train_mb_tokens: NotRequired[int]
-    logprob_mb_tokens: NotRequired[int]
-    sequence_length_round: NotRequired[int]
+    enabled: Literal[True]
+    train_mb_tokens: int
+    logprob_mb_tokens: NotRequired[int]  # Only used for some algorithms
+    sequence_length_round: int
 
 
 class PolicyConfig(TypedDict):
@@ -133,18 +177,29 @@ class PolicyConfig(TypedDict):
     train_global_batch_size: int
     train_micro_batch_size: int
     logprob_batch_size: NotRequired[int]
+    # If set, log probability computation is chunked along the sequence dimension to avoid GPU OOM (especially during backward pass).
+    # Within each chunk loop, logits casting (from float16/bfloat16 to float32) is done to prevent holding the entire float32 logits tensor in memory.
+    # If None, chunking is disabled and the full sequence is processed at once.
+    logprob_chunk_size: NotRequired[int | None]
     generation: NotRequired[GenerationConfig]
     generation_batch_size: NotRequired[
         int
     ]  # used in static batched (framework) generation
     precision: str
-    dtensor_cfg: DTensorConfig
-    megatron_cfg: NotRequired[MegatronConfig]
-    dynamic_batching: DynamicBatchingConfig
-    sequence_packing: NotRequired[SequencePackingConfig]
+    reward_model_cfg: NotRequired[RewardModelConfig]
+    dtensor_cfg: DTensorConfig | DTensorConfigDisabled
+    megatron_cfg: NotRequired[MegatronConfig | MegatronConfigDisabled]
+    hf_config_overrides: NotRequired[dict[str, Any]]
+    dynamic_batching: DynamicBatchingConfig | DynamicBatchingConfigDisabled
+    sequence_packing: NotRequired[SequencePackingConfig | SequencePackingConfigDisabled]
     make_sequence_length_divisible_by: int
     max_total_sequence_length: int
-    max_grad_norm: NotRequired[Union[float, int]]
+    # This sets the clipping norm for the DTensorPolicyWorkers (Megatron's is called clip_grad)
+    max_grad_norm: NotRequired[float | int | None]
     refit_buffer_size_gb: NotRequired[float]
-    optimizer: NotRequired[PytorchOptimizerConfig]
-    scheduler: NotRequired[list[SinglePytorchSchedulerConfig] | SchedulerMilestones]
+    optimizer: NotRequired[PytorchOptimizerConfig | None]
+    scheduler: NotRequired[
+        list[SinglePytorchSchedulerConfig | SinglePytorchMilestonesConfig]
+        | SchedulerMilestones
+        | None
+    ]
diff --git a/nemo_rl/models/policy/dtensor_policy_worker.py b/nemo_rl/models/policy/dtensor_policy_worker.py
index b590032408..9b15733e2e 100644
--- a/nemo_rl/models/policy/dtensor_policy_worker.py
+++ b/nemo_rl/models/policy/dtensor_policy_worker.py
@@ -16,12 +16,14 @@
 import gc
 import itertools
 import os
+import warnings
 from collections import defaultdict
 from contextlib import AbstractContextManager, contextmanager, nullcontext
 from typing import Any, Generator, Iterable, Optional, Set, Union, cast
 
 import ray
 import torch
+import zmq
 from accelerate import init_empty_weights
 from torch import nn
 from torch.distributed.checkpoint.state_dict import (
@@ -36,22 +38,29 @@
 from torch.distributed.tensor.experimental._attention import (
     set_rotate_method,
 )
-from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
-from transformers.integrations.accelerate import find_tied_parameters
+from transformers import (
+    AutoConfig,
+    AutoModelForSequenceClassification,
+    AutoProcessor,
+    AutoTokenizer,
+)
 from transformers.models.gemma3.modeling_gemma3 import Gemma3ForCausalLM
 
 from nemo_rl.algorithms.interfaces import LossFunction, LossType
 from nemo_rl.algorithms.loss_functions import SequencePackingLossWrapper
 from nemo_rl.distributed.batched_data_dict import BatchedDataDict
+from nemo_rl.distributed.model_utils import (
+    allgather_cp_sharded_tensor,
+    distributed_vocab_topk,
+    get_logprobs_from_vocab_parallel_logits,
+)
 from nemo_rl.models.dtensor.parallelize import (
     _parallelize_model,
     clip_grad_by_total_norm_,
     get_grad_norm,
-    get_logprobs_from_vocab_parallel_logits,
     to_local_if_dtensor,
 )
 from nemo_rl.models.huggingface.common import (
-    ModelFlag,
     get_flash_attention_kwargs,
     pack_sequences,
 )
@@ -59,19 +68,22 @@
 from nemo_rl.models.policy.interfaces import (
     LogprobOutputSpec,
     ReferenceLogprobOutputSpec,
+    ScoreOutputSpec,
 )
 from nemo_rl.models.policy.utils import (
-    configure_expandable_segments,
+    configure_dynamo_cache,
     get_gpu_info,
     get_runtime_env_for_policy_worker,
     import_class_from_path,
-    is_vllm_v1_engine_enabled,
+    resolve_model_class,
     sliding_window_overwrite,
 )
 from nemo_rl.utils.native_checkpoint import (
     load_checkpoint,
     save_checkpoint,
 )
+from nemo_rl.utils.nsys import wrap_with_nvtx_name
+from nemo_rl.utils.packed_tensor import packed_broadcast_producer
 
 
 @contextmanager
@@ -140,12 +152,20 @@ def __init__(
         self,
         config: PolicyConfig,
         tokenizer: AutoTokenizer,
+        processor: Optional[AutoProcessor] = None,
         weights_path: Optional[str] = None,
         optimizer_path: Optional[str] = None,
         init_optimizer: bool = True,
         init_reference_model: bool = True,
         **kwargs: Any,
     ):
+        """Initialize the DTensorPolicyWorker."""
+        self.tokenizer = tokenizer
+        self.processor = processor
+        self.is_vlm = processor is not None
+
+        print(f"Initializing DTensorPolicyWorker with is_vlm={self.is_vlm}")
+
         self.is_generation_colocated = None
         if "generation" in config and config["generation"] is not None:
             self.is_generation_colocated = config["generation"]["colocated"]["enabled"]
@@ -155,8 +175,9 @@ def __init__(
         if not self.is_generation_colocated:
             os.environ["NCCL_CUMEM_ENABLE"] = "1"
 
-        # Only enable expandable_segments on Hopper and newer architectures (compute capability 9.x+)
-        configure_expandable_segments()
+        # Disable dynamo autotune_local_cache to avoid crash when there's already a cache
+        # with different order of node_bundles
+        configure_dynamo_cache()
 
         self.cfg = config
         # torch distributed init. Envars for rank, world_size, and master_addr and master_port are set from the ray remote call
@@ -166,6 +187,7 @@ def __init__(
         model_name = self.cfg["model_name"]
 
         self.cpu_offload = self.cfg["dtensor_cfg"]["cpu_offload"]
+        self.offload_optimizer_for_logprob = self.cfg["offload_optimizer_for_logprob"]
         self.max_grad_norm = self.cfg["max_grad_norm"]
 
         if self.cfg["precision"] == "float32":
@@ -180,11 +202,16 @@ def __init__(
         print(f"[Rank {self.rank}] Loading model {model_name} on CPU...")
         self.enable_seq_packing = self.cfg["sequence_packing"]["enabled"]
         if self.enable_seq_packing:
+            assert not self.is_vlm, (
+                "Sequence packing is not supported for VLM models. Please set policy.sequence_packing.enabled = False to train VLM models."
+            )
             print(
                 f"[Rank {self.rank}] Sequence packing is enabled for model {model_name}"
             )
             print(f"[Rank {self.rank}] Using FlashAttention2 for sequence packing")
 
+        hf_config_overrides = self.cfg.get("hf_config_overrides", {}) or {}
+
         model_config = AutoConfig.from_pretrained(
             model_name,
             # Always load the model in float32 to keep master weights in float32.
@@ -197,12 +224,47 @@ def __init__(
             attn_implementation="flash_attention_2"
             if self.enable_seq_packing
             else None,
+            **hf_config_overrides,
         )
 
+        # reward model
+        self._is_reward_model = (
+            "reward_model_cfg" in self.cfg and self.cfg["reward_model_cfg"]["enabled"]
+        )
+        if self._is_reward_model:
+            # Ensure sequence packing is disabled.
+            if self.enable_seq_packing:
+                raise NotImplementedError(
+                    "Sequence packing is not supported for reward models"
+                )
+            # Load model as a Reward Model.
+            rm_type = self.cfg["reward_model_cfg"]["reward_model_type"]
+            if rm_type == "bradley_terry":
+                model_class = AutoModelForSequenceClassification
+                if model_config.num_labels != 1:
+                    # For Bradley-Terry reward models, the linear head has a single output.
+                    # In the transformers library, the default setting for model_config.num_labels is 2
+                    # (https://github.com/huggingface/transformers/blob/v4.52.4/src/transformers/configuration_utils.py#L259).
+                    # Since num_labels is used as the out_features for the linear head
+                    # (https://github.com/huggingface/transformers/blob/v4.52.4/src/transformers/models/llama/modeling_llama.py#L738)
+                    # if num_labels is not 1, we set it to 1. This change may trigger a warning that some weights are not initialized
+                    # from the model checkpoint and are instead initialized using model_config.initializer_range
+                    # (https://github.com/huggingface/transformers/blob/v4.52.4/src/transformers/models/llama/configuration_llama.py#L62).
+                    print(
+                        "model_config.num_labels is not 1. Setting it to 1 since this value is used as the out_features "
+                        "for the linear head of Bradley-Terry reward models."
+                    )
+                    model_config.num_labels = 1
+            else:
+                raise ValueError(f"Unknown reward model type: {rm_type}")
+        else:
+            # DO NOT assume AutoModelForCausalLM, multimodal models can inherit from AutoModelForImageTextToText, AutoModelForTextToWaveform, etc.
+            model_class = resolve_model_class(model_config.model_type)
+
         full_state_dict = None
         if self.rank == 0:
             print(f"[Rank {self.rank}] Loading model {model_name} on CPU...")
-            model = AutoModelForCausalLM.from_pretrained(
+            model = model_class.from_pretrained(
                 model_name,
                 device_map="cpu",  # load weights onto CPU initially
                 trust_remote_code=True,
@@ -214,23 +276,14 @@ def __init__(
         print(f"[Rank {self.rank}] Initializing empty model for FSDP...")
         # All ranks initialize model on meta device, so FSDP can shard it.
         # The actual weights will be broadcast from rank 0.
-
         with init_empty_weights():
-            self.model = AutoModelForCausalLM.from_config(
+            self.model = model_class.from_config(
                 model_config,
+                trust_remote_code=True,
             )
 
-        # caching since this property is not always preserved after FSDP
-        self.num_tied_weights = len(find_tied_parameters(self.model))
-        self.skip_tie_check = os.environ.get(
-            "NRL_SKIP_TIED_WEIGHT_CHECK"
-        ) or ModelFlag.SKIP_DTENSOR_TIED_WEIGHTS_CHECK.matches(model_name)
-
-        self.tokenizer = tokenizer
-        # ------------------------------------------------
-        # 3) Move to GPU + Composable FSDP
-        #    (Initialize device mesh, shard submodules, then shard entire model)
-        # ------------------------------------------------
+        if self.model.config.pad_token_id is None:
+            self.model.config.pad_token_id = tokenizer.pad_token_id
 
         tp_size = self.cfg["dtensor_cfg"]["tensor_parallel_size"]
         cp_size = self.cfg["dtensor_cfg"]["context_parallel_size"]
@@ -248,6 +301,10 @@ def __init__(
             print(
                 "[WARNING]: sequence_parallel=True, but tp_size=1 which has no effect. Enable tp_size > 1 to use sequence parallelism."
             )
+        elif sequence_parallel_enabled and tp_size > 1:
+            raise RuntimeError(
+                "Sequence parallel + tp_size >1 is currently broken in torch==2.8.0. See https://github.com/NVIDIA-NeMo/Automodel/issues/652 for more details."
+            )
 
         if cp_size > 1:
             assert not isinstance(self.model, Gemma3ForCausalLM), (
@@ -261,9 +318,20 @@ def __init__(
                 "See https://github.com/NVIDIA-NeMo/RL/issues/659 for more details."
             )
 
+            assert not self.is_vlm, (
+                "Context parallel is yet not supported for VLM models. Please set cp_size = 1 to train VLM models."
+            )
+
+        # torch==2.8 uses LOCAL_RANK to set the device here (https://github.com/pytorch/pytorch/blob/ba56102387ef21a3b04b357e5b183d48f0afefc7/torch/distributed/device_mesh.py#L500),
+        # but CUDA_VISIBLE_DEVICES is set to only 1 gpu, so we need to temporarily set LOCAL_RANK to 0.
+        # TODO: consider changing the default LOCAL_RANK set in worker_groups.py
+        prev_local_rank = os.environ["LOCAL_RANK"]
+        os.environ["LOCAL_RANK"] = "0"
+
         device_mesh = torch.distributed.device_mesh.init_device_mesh(
             "cuda", (dp_size, cp_size, tp_size), mesh_dim_names=("dp", "cp", "tp")
         )
+        os.environ["LOCAL_RANK"] = prev_local_rank
 
         self.dp_cp_mesh = device_mesh[("dp", "cp")]._flatten(mesh_dim_name="dp_cp")
 
@@ -277,6 +345,10 @@ def __init__(
         self.cp_size = cp_size
         self.device_mesh = device_mesh
 
+        # ------------------------------------------------
+        # 3) Move to GPU + Composable FSDP
+        #    (Initialize device mesh, shard submodules, then shard entire model)
+        # ------------------------------------------------
         self.model = _parallelize_model(
             self.model,
             self.dp_cp_mesh,
@@ -304,7 +376,7 @@ def __init__(
 
         # Handle tied word embeddings after loading the state dict
         # We need to actually tie the parameters at the model level
-        is_tied_lm_head = getattr(
+        is_tied_lm_head = hasattr(self.model, "lm_head") and getattr(
             getattr(self.model, "config", {}), "tie_word_embeddings", False
         )
         if is_tied_lm_head:
@@ -379,15 +451,6 @@ def __init__(
                 "No weights path provided. Starting from scratch (default policy init)"
             )
 
-        # vars used for refit
-        ## will be initialized in prepare_refit_info
-        self.refit_param_info = None
-        ## used for streaming update inference engine weights
-        self._held_sharded_state_dict_reference: Optional[dict[str, torch.Tensor]] = (
-            None
-        )
-        self._held_streamed_param_reference: Optional[dict[str, torch.Tensor]] = None
-
     # Refer to nemo impl. Below is original comment.
     # based on https://github.com/pytorch/torchtitan/blob/main/torchtitan/distributed/utils.py#L113
     @staticmethod
@@ -421,13 +484,8 @@ def create_context_parallel_ctx(
     # based on https://github.com/pytorch/torchtitan/blob/cddd7dc809f36fe0ed51cdaaea0671c084d75442/torchtitan/distributed/utils.py#L178
 
     def _apply_temperature_scaling(self, logits: torch.Tensor) -> torch.Tensor:
-        # Apply temperature scaling to logits if configured and not using V1 engine.
         if "generation" in self.cfg and self.cfg["generation"] is not None:
-            # The V1 engine returns raw logits before temperature scaling.
-            # The V0 engine returns scaled logits.
-            # Therefore, we only divide if we are not using the V1 engine.
-            if not is_vllm_v1_engine_enabled():
-                logits.div_(self.cfg["generation"]["temperature"])
+            logits.div_(self.cfg["generation"]["temperature"])
         return logits
 
     @staticmethod
@@ -451,17 +509,18 @@ def train_context(cp_context: Optional[Generator[None, None, None]] = None):
 
             yield
 
-    def init_collective(self, ip: str, port: int, world_size: int) -> None:
+    def init_collective(
+        self, ip: str, port: int, world_size: int, *, train_world_size: int
+    ) -> None:
         """Initialize the collective communication."""
         from vllm.distributed.device_communicators.pynccl import PyNcclCommunicator
         from vllm.distributed.utils import StatelessProcessGroup
 
-        if self.rank == 0:
-            pg = StatelessProcessGroup.create(
-                host=ip, port=port, rank=0, world_size=world_size
-            )
-            device = torch.cuda.current_device()
-            self.model_update_group = PyNcclCommunicator(pg, device=device)
+        pg = StatelessProcessGroup.create(
+            host=ip, port=port, rank=self.rank, world_size=world_size
+        )
+        device = torch.cuda.current_device()
+        self.model_update_group = PyNcclCommunicator(pg, device=device)
 
     def is_alive(self) -> bool:
         return True
@@ -473,6 +532,7 @@ def get_gpu_info(self) -> dict[str, Any]:
         """Return information about the GPU being used by this worker."""
         return get_gpu_info(self.model)
 
+    @wrap_with_nvtx_name("dtensor_policy_worker/train")
     def train(
         self,
         data: BatchedDataDict[Any],
@@ -482,15 +542,6 @@ def train(
         mbs: Optional[int] = None,
     ) -> dict[str, Any]:
         """Train the policy on a batch of data with a given loss function."""
-        # Check if the model has tied weights
-        if (
-            self.num_tied_weights != 0
-            and self.cfg["dtensor_cfg"]["tensor_parallel_size"] > 1
-            and not self.skip_tie_check
-        ):
-            raise ValueError(
-                f"Using dtensor policy with tp size {self.cfg['dtensor_cfg']['tensor_parallel_size']} for model ({self.cfg['model_name']}) that has tied weights (num_tied_weights={self.num_tied_weights}) is not supported (https://github.com/NVIDIA-NeMo/RL/issues/227). Please use dtensor policy with tensor parallel == 1 instead."
-            )
         if gbs is None:
             gbs = self.cfg["train_global_batch_size"]
         if mbs is None:
@@ -593,9 +644,21 @@ def train(
                     mb_iterator = batch.make_microbatch_iterator(mbs)
                     iterator_len = batch.size // mbs
 
+                empty_cache_steps = self.cfg.get("dtensor_cfg", {}).get(
+                    "clear_cache_every_n_steps"
+                )
+                if empty_cache_steps:
+                    warnings.warn(
+                        f"Emptying cache every {empty_cache_steps} microbatches, doing so unnnecessarily would incur a large performance overhead."
+                    )
+
                 for mb_idx, mb in enumerate(
                     itertools.chain(mb_iterator, dummy_iterator)
                 ):
+                    # Conditioanlly empty cache when sensitive to fragmentation
+                    if empty_cache_steps and mb_idx % empty_cache_steps == 0:
+                        torch.cuda.empty_cache()
+
                     with torch.autocast(device_type="cuda", dtype=self.dtype):
                         if self.enable_seq_packing:
                             input_ids = mb.get("input_ids").cuda()
@@ -623,7 +686,7 @@ def train(
 
                             attention_mask = torch.ones(
                                 (batch_size, seq_len),
-                                dtype=torch.long,
+                                dtype=torch.bool,
                                 device=input_ids.device,
                             )
                             position_ids = torch.arange(
@@ -631,8 +694,21 @@ def train(
                             ).repeat(batch_size, 1)
                             flash_attn_kwargs = {}
 
+                        # add vlm kwargs to model call
+                        vlm_kwargs = mb.get_multimodal_dict(
+                            as_tensors=True, device=input_ids.device
+                        )
+                        if len(vlm_kwargs) > 0:
+                            position_ids = None
+                            assert not self.cfg["dtensor_cfg"]["sequence_parallel"], (
+                                "Sequence parallel is not supported with multimodal since there's an issue when you do not pass position_ids. See https://github.com/NVIDIA-NeMo/Automodel/issues/652"
+                            )
+
                     context_parallel_ctx = None
                     if self.cp_size > 1:
+                        assert len(vlm_kwargs) == 0, (
+                            f"multimodal kwargs={vlm_kwargs} are not supported for context parallel"
+                        )
                         seq_index = torch.arange(
                             seq_len, device=input_ids.device
                         ).repeat(1, 1)
@@ -652,19 +728,33 @@ def train(
 
                     with DTensorPolicyWorker.train_context(context_parallel_ctx):
                         with torch.autocast(device_type="cuda", dtype=self.dtype):
-                            outputs = self.model(
+                            model_args = dict(
                                 input_ids=input_ids,
                                 attention_mask=attention_mask,
                                 position_ids=position_ids,
                                 use_cache=False,
                                 flash_attn_kwargs=flash_attn_kwargs,
+                                **vlm_kwargs,
                             )
 
+                            if self._is_reward_model:
+                                # `flash_attn_kwarg` is not supported for `LlamaForSequenceClassification`.
+                                # Note that it should be empty anyway since sequence packing
+                                # is not supported for reward models.
+                                assert not flash_attn_kwargs
+                                del model_args["flash_attn_kwargs"]
+                            # remove flash_attn_kwargs if there are multimodal kwargs
+                            if len(vlm_kwargs) > 0:
+                                del model_args["flash_attn_kwargs"]
+
+                            outputs = self.model(**model_args)
+
                         # Get logprobs
                         if not hasattr(outputs, "logits"):
                             logits = self.model.lm_head(outputs.last_hidden_state)
                         else:
                             logits = outputs.logits
+                        del outputs
 
                         # Apply temperature scaling
                         logits = self._apply_temperature_scaling(logits)
@@ -724,13 +814,13 @@ def train(
                             )
                         else:
                             loss_fn_ = loss_fn
-
                         loss, loss_metrics = loss_fn_(
                             logits,
                             mb,
                             global_valid_seqs,
                             global_valid_toks,
                         )
+                        del logits
 
                         # skip the update for dummy batches
                         if mb_idx < iterator_len:
@@ -774,7 +864,6 @@ def train(
                                 self.model.parameters(),
                                 max_grad_norm=self.max_grad_norm,
                                 total_norm=grad_norm,
-                                dtype=torch.float32,
                             )
                         grad_norm = torch.tensor([grad_norm])
 
@@ -783,6 +872,8 @@ def train(
 
                 losses.append(torch.tensor(mb_losses).sum().item())
 
+            # release gradient memory before rollouts
+            self.optimizer.zero_grad()
             # increment scheduler after all batches in rollout are processed
             if not eval_mode:
                 self.scheduler.step()
@@ -806,11 +897,15 @@ def train(
                 "global_loss": global_loss.cpu(),
                 "grad_norm": grad_norm,
                 "rank": torch.distributed.get_rank(),
+                "gpu_name": torch.cuda.get_device_name(),
+                "model_dtype": self.dtype,
                 "all_mb_metrics": dict(mb_metrics),
             }
 
             return metrics
 
+    # TODO @Rayen Tian: Related Issue: Refactor shared logic between score() and get_logprobs() (https://github.com/NVIDIA-NeMo/RL/issues/1094)
+    @wrap_with_nvtx_name("dtensor_policy_worker/get_logprobs")
     def get_logprobs(
         self, data: BatchedDataDict[Any], micro_batch_size: Optional[int] = None
     ) -> BatchedDataDict[LogprobOutputSpec]:
@@ -831,6 +926,7 @@ def get_logprobs(
             if micro_batch_size is not None
             else self.cfg["logprob_batch_size"]
         )
+        logprob_chunk_size = self.cfg.get("logprob_chunk_size", None)
 
         # dim 1 is always assumed to be the sequence dim, sanity check this here
         sequence_dim = 1
@@ -878,9 +974,15 @@ def get_logprobs(
                 step += 1
                 input_ids = lp_batch.get("input_ids").cuda()
                 input_lengths = lp_batch.get("input_lengths")
+                vlm_kwargs = lp_batch.get_multimodal_dict(
+                    as_tensors=True, device=input_ids.device
+                )
 
                 batch_size, seq_len = input_ids.shape
                 if self.enable_seq_packing:
+                    assert len(vlm_kwargs) == 0, (
+                        "multimodal kwargs are not supported for sequence packing"
+                    )
                     input_ids, position_ids, _ = pack_sequences(
                         input_ids=input_ids,
                         input_lengths=input_lengths,
@@ -896,13 +998,13 @@ def get_logprobs(
                         input_lengths=input_lengths,
                     )
                 else:
-                    # Create attention mask for right-padded data
-                    attention_mask = torch.zeros(
-                        (batch_size, seq_len), dtype=torch.long, device=input_ids.device
+                    # Create post_attention_mask for right-padded data for masking token after forwarding.
+                    post_attention_mask = torch.zeros(
+                        (batch_size, seq_len), dtype=torch.bool, device=input_ids.device
                     )
                     for i, length in enumerate(input_lengths):
                         # For right-padded sequence, set 1s at the beginning of the sequence
-                        attention_mask[i, :length] = 1
+                        post_attention_mask[i, :length] = 1
 
                     # explicitly create position ids for the input, otherwise the sharding
                     # for DTensor will be incorrect
@@ -911,17 +1013,25 @@ def get_logprobs(
                     ).repeat(batch_size, 1)
                     flash_attn_kwargs = {}
 
-                with torch.autocast(device_type="cuda", dtype=self.dtype):
                     # DTensor requires the casual attention kernel to hit,
                     # yet our attention mask above is not always all 1s
                     # this is fine because we mask with the actual attention mask
                     # later, but for input it has to be all 1s
-                    attention_mask_input_all_ones = torch.ones(
-                        (batch_size, seq_len), dtype=torch.long, device=input_ids.device
+                    attention_mask = torch.ones(
+                        (batch_size, seq_len),
+                        dtype=torch.bool,
+                        device=input_ids.device,
                     )
 
+                # if there are multimodal kwargs, we don't need to add position_ids (computed internally)
+                if len(vlm_kwargs) > 0:
+                    position_ids = None
+
                 context_parallel_ctx = None
                 if self.cp_size > 1:
+                    assert len(vlm_kwargs) == 0, (
+                        "multimodal kwargs are not supported for context parallel"
+                    )
                     seq_index = torch.arange(seq_len, device=input_ids.device).repeat(
                         1, 1
                     )
@@ -937,13 +1047,18 @@ def get_logprobs(
 
                 with DTensorPolicyWorker.train_context(context_parallel_ctx):
                     with torch.autocast(device_type="cuda", dtype=self.dtype):
-                        outputs = self.model(
+                        model_args = dict(
                             input_ids=input_ids,
-                            attention_mask=attention_mask_input_all_ones,
+                            attention_mask=attention_mask,
                             position_ids=position_ids,
                             use_cache=False,
                             flash_attn_kwargs=flash_attn_kwargs,
+                            **vlm_kwargs,
                         )
+                        if len(vlm_kwargs) > 0:
+                            del model_args["flash_attn_kwargs"]
+
+                        outputs = self.model(**model_args)
 
                     logits = outputs.logits
 
@@ -988,18 +1103,47 @@ def get_logprobs(
                             )
 
                         token_logprobs = get_logprobs_from_vocab_parallel_logits(
-                            logits.to(torch.float32),
+                            logits,
                             input_ids_dtensor,
                             seq_index_tensor,
+                            chunk_size=logprob_chunk_size,
                         )
 
                         assert token_logprobs.shape[1] == seq_len - 1
                     else:
                         if isinstance(logits, DTensor):
                             token_logprobs = get_logprobs_from_vocab_parallel_logits(
-                                logits.to(torch.float32), input_ids
+                                logits,
+                                input_ids,
+                                chunk_size=logprob_chunk_size,
                             )
                         else:
+                            if logprob_chunk_size is not None:
+                                logits_seq_len = int(logits.shape[1])
+                                num_chunks = (
+                                    logits_seq_len + logprob_chunk_size - 1
+                                ) // logprob_chunk_size
+                                chunked_log_probs = []
+                                for chunk_idx in range(num_chunks):
+                                    chunk_start = chunk_idx * logprob_chunk_size
+                                    chunk_end = min(
+                                        logits_seq_len,
+                                        (chunk_idx + 1) * logprob_chunk_size,
+                                    )
+                                    chunk_logits = logits[
+                                        :, chunk_start:chunk_end, :
+                                    ].to(torch.float32)
+                                    log_probs = torch.nn.functional.log_softmax(
+                                        chunk_logits, dim=-1
+                                    )
+                                    chunked_log_probs.append(log_probs)
+                                log_probs = torch.cat(chunked_log_probs, dim=1)
+                                del chunked_log_probs
+                            else:
+                                logits = logits.to(torch.float32)
+                                log_probs = torch.nn.functional.log_softmax(
+                                    logits, dim=-1
+                                )
                             # Extract logprobs for each token in the sequence by gathering the logprob
                             # corresponding to the next token at each position
                             # Input shapes:
@@ -1007,15 +1151,14 @@ def get_logprobs(
                             #   token_ids: [batch_size, sequence_length] - actual tokens
                             # Output shape: [batch_size, sequence_length] - logprob of each token given previous
                             # We get logprob of token[t+1] from logits[t], prepending 0 to maintain sequence length
-
-                            log_probs = torch.nn.functional.log_softmax(
-                                outputs.logits.to(torch.float32), dim=-1
-                            )
                             next_tokens = input_ids[:, 1:]
                             log_probs = log_probs[:, :-1]
                             token_logprobs = log_probs.gather(
                                 dim=-1, index=next_tokens.unsqueeze(-1)
                             ).squeeze(-1)
+                            del log_probs
+
+                del outputs, logits
 
                 token_logprobs = torch.cat(
                     [torch.zeros_like(token_logprobs[:, :1]), token_logprobs], dim=1
@@ -1027,7 +1170,7 @@ def get_logprobs(
 
                 if not self.enable_seq_packing:
                     # Apply mask to zero out padding tokens logprobs
-                    token_logprobs = token_logprobs * attention_mask
+                    token_logprobs = token_logprobs * post_attention_mask
                 else:
                     # For packed sequences, unpack logprobs
                     unpacked_logprobs = torch.zeros(
@@ -1062,6 +1205,432 @@ def get_logprobs(
 
         return return_data
 
+    # TODO @Rayen Tian: Related Issue: Refactor shared logic between score() and get_logprobs() (https://github.com/NVIDIA-NeMo/RL/issues/1094)
+    @wrap_with_nvtx_name("dtensor_policy_worker/score")
+    def score(self, data: BatchedDataDict) -> BatchedDataDict[ScoreOutputSpec]:
+        global_batch_size = min(self.cfg["batch_size"], data.size)
+
+        sequence_dim = 1
+        seq_dim_size = data.get("input_ids").shape[sequence_dim]
+        for k, v in data.items():
+            if torch.is_tensor(v) and len(v.shape) > 1:
+                assert v.shape[sequence_dim] == seq_dim_size, (
+                    f"Dim 1 must be the sequence dim, expected dim 1={seq_dim_size} but got shape {v.shape}"
+                )
+        self.model.eval()
+
+        with unshard_fsdp2_model(self.model), torch.no_grad():
+            data.to("cuda")
+            dummy_iterator = iter([])
+            if self.cfg["dynamic_batching"]["enabled"]:
+                mb_iterator = data.make_microbatch_iterator_with_dynamic_shapes()
+                iterator_len = data.get_microbatch_iterator_dynamic_shapes_len()
+            elif self.enable_seq_packing:
+                mb_iterator = data.make_microbatch_iterator_for_packable_sequences()
+                iterator_len, max_seqlen = (
+                    data.get_microbatch_iterator_for_packable_sequences_len()
+                )
+                max_batch_ct = torch.tensor([iterator_len], device="cuda")
+                torch.distributed.all_reduce(
+                    max_batch_ct, op=torch.distributed.ReduceOp.MAX
+                )
+                dummy_batch_ct = int(max_batch_ct.item() - iterator_len)
+                dummy_iterator = data.make_microbatch_iterator_for_packable_sequences()
+                dummy_iterator = itertools.islice(
+                    itertools.cycle(dummy_iterator), dummy_batch_ct
+                )
+            else:
+                mb_iterator = data.make_microbatch_iterator(global_batch_size)
+                iterator_len = data.size // global_batch_size
+
+            step = 0
+            all_rm_scores = []
+            for batch_idx, generate_batch in enumerate(
+                itertools.chain(mb_iterator, dummy_iterator)
+            ):
+                step += 1
+                input_ids = generate_batch.get("input_ids").cuda()
+                input_lengths = generate_batch.get("input_lengths")
+                batch_size, seq_len = input_ids.shape
+                if self.enable_seq_packing:
+                    input_ids, position_ids, _ = pack_sequences(
+                        input_ids=input_ids,
+                        input_lengths=input_lengths,
+                        packed_sequence_size=[
+                            batch_size
+                        ],  # flash attention 2 expects flattened input
+                        padding_value=self.tokenizer.eos_token_id,
+                        return_attention_mask=False,
+                    )
+                    seq_len = input_ids.shape[1]
+                    attention_mask = None
+                    flash_attn_kwargs = get_flash_attention_kwargs(
+                        input_lengths=input_lengths,
+                    )
+                else:
+                    # Create attention mask for right-padded data
+                    post_attention_mask = torch.zeros(
+                        (batch_size, seq_len), dtype=torch.bool, device=input_ids.device
+                    )
+                    for i, length in enumerate(input_lengths):
+                        # For right-padded sequence, set 1s at the beginning of the sequence
+                        post_attention_mask[i, :length] = 1
+                    position_ids = torch.arange(
+                        seq_len, device=input_ids.device
+                    ).repeat(batch_size, 1)
+
+                    attention_mask = torch.ones(
+                        (batch_size, seq_len),
+                        dtype=torch.bool,
+                        device=input_ids.device,
+                    )
+
+                context_parallel_ctx = None
+                if self.cp_size > 1:
+                    seq_index = torch.arange(seq_len, device=input_ids.device).repeat(
+                        1, 1
+                    )
+                    cp_buffers = [input_ids, position_ids, seq_index]
+
+                    # Create context parallel context
+                    context_parallel_ctx = self.create_context_parallel_ctx(
+                        cp_mesh=self.cp_mesh,
+                        cp_buffers=cp_buffers,
+                        cp_seq_dims=[sequence_dim] * len(cp_buffers),
+                        cp_no_restore_buffers=set(cp_buffers),
+                    )
+                with DTensorPolicyWorker.train_context(context_parallel_ctx):
+                    with torch.autocast(device_type="cuda", dtype=self.dtype):
+                        model_args = dict(
+                            input_ids=input_ids,
+                            attention_mask=attention_mask,
+                            position_ids=position_ids,
+                            use_cache=False,
+                        )
+                        outputs = self.model(**model_args)
+
+                    if not hasattr(outputs, "logits"):
+                        logits = self.model.lm_head(outputs.last_hidden_state)
+                    else:
+                        logits = outputs.logits
+                    # Apply temperature scaling
+                    logits = self._apply_temperature_scaling(logits)
+                if isinstance(logits, DTensor):
+                    logits = logits.to(torch.float32)
+                else:
+                    logits = outputs.logits.to(torch.float32)
+
+                rm_scores = to_local_if_dtensor(logits)
+                rm_scores = rm_scores.squeeze(-1)
+                all_rm_scores.append(rm_scores)
+
+        all_rm_scores = torch.cat(all_rm_scores, dim=0)
+        all_rm_scores = all_rm_scores.squeeze(-1).cpu()
+        return_data = BatchedDataDict[ScoreOutputSpec](
+            {
+                "scores": all_rm_scores,
+            }
+        )
+        return return_data
+
+    @wrap_with_nvtx_name("dtensor_policy_worker/get_topk_logits")
+    def get_topk_logits(
+        self,
+        data: BatchedDataDict[Any],
+        k: int,
+        micro_batch_size: Optional[int] = None,
+    ) -> BatchedDataDict[Any]:
+        """Return per-position top-k logits and corresponding global indices.
+
+        Notes:
+        - Return shapes are [B, S, k].
+        - Computes top-k over the full sequence (no trimming of the last position).
+        - If alignment with next-token targets is required, the caller should handle it.
+        - If logits are TP-sharded DTensor, performs distributed global top-k across TP.
+        - Supports context parallelism with proper CP gather.
+        - Otherwise, computes local top-k on full-vocab tensor.
+        """
+        topk_batch_size = (
+            micro_batch_size
+            if micro_batch_size is not None
+            else self.cfg["logprob_batch_size"]
+        )
+
+        sequence_dim = 1
+        seq_dim_size = data.get("input_ids").shape[sequence_dim]
+
+        out_topk_vals = []
+        out_topk_idx = []
+        self.model.eval()
+
+        with torch.no_grad():
+            data.to("cuda")
+            dummy_iterator = iter([])
+            if self.cfg["dynamic_batching"]["enabled"]:
+                # dynamic batching support (no CP/packed)
+                mb_iterator = data.make_microbatch_iterator_with_dynamic_shapes()
+                iterator_len = data.get_microbatch_iterator_dynamic_shapes_len()
+            elif self.enable_seq_packing:
+                mb_iterator = data.make_microbatch_iterator_for_packable_sequences()
+                iterator_len, max_seqlen = (
+                    data.get_microbatch_iterator_for_packable_sequences_len()
+                )
+                max_batch_ct = torch.tensor([iterator_len], device="cuda")
+                torch.distributed.all_reduce(
+                    max_batch_ct, op=torch.distributed.ReduceOp.MAX
+                )
+
+                # Sequence packing can end up with unevenly distributed batch counts across DP ranks.
+                # We add dummy batches to the end of the iterator to make the batch counts equal.
+                dummy_batch_ct = int(max_batch_ct.item() - iterator_len)
+                dummy_iterator = data.make_microbatch_iterator_for_packable_sequences()
+                dummy_iterator = itertools.islice(
+                    itertools.cycle(dummy_iterator), dummy_batch_ct
+                )
+            else:
+                mb_iterator = data.make_microbatch_iterator(topk_batch_size)
+                iterator_len = data.size // topk_batch_size
+
+            for batch_idx, lp_batch in enumerate(
+                itertools.chain(mb_iterator, dummy_iterator)
+            ):
+                input_ids = lp_batch.get("input_ids").cuda()
+                input_lengths = lp_batch.get("input_lengths")
+                vlm_kwargs = lp_batch.get_multimodal_dict(
+                    as_tensors=True, device=input_ids.device
+                )
+                batch_size, seq_len = input_ids.shape
+
+                # Store original shapes for unpacking later
+                original_batch_size = batch_size
+                original_seq_len = seq_len
+
+                if self.enable_seq_packing:
+                    assert len(vlm_kwargs) == 0, (
+                        "multimodal kwargs are not supported for sequence packing"
+                    )
+                    input_ids, position_ids, _ = pack_sequences(
+                        input_ids=input_ids,
+                        input_lengths=input_lengths,
+                        packed_sequence_size=[
+                            batch_size
+                        ],  # flash attention 2 expects flattened input
+                        padding_value=self.tokenizer.eos_token_id,
+                        return_attention_mask=False,
+                    )
+                    seq_len = input_ids.shape[1]
+                    attention_mask = None
+                    flash_attn_kwargs = get_flash_attention_kwargs(
+                        input_lengths=input_lengths,
+                    )
+                else:
+                    # Build attention mask (right-padded inputs)
+                    attention_mask = torch.zeros(
+                        (batch_size, seq_len), dtype=torch.long, device=input_ids.device
+                    )
+                    for i, length in enumerate(input_lengths):
+                        attention_mask[i, :length] = 1
+
+                    position_ids = torch.arange(
+                        seq_len, device=input_ids.device
+                    ).repeat(batch_size, 1)
+
+                    flash_attn_kwargs = {}
+
+                with torch.autocast(device_type="cuda", dtype=self.dtype):
+                    attention_mask_input_all_ones = torch.ones(
+                        (batch_size, seq_len), dtype=torch.long, device=input_ids.device
+                    )
+
+                # if there are multimodal kwargs, we don't need to add position_ids (computed internally)
+                if len(vlm_kwargs) > 0:
+                    position_ids = None
+
+                context_parallel_ctx = None
+                if self.cp_size > 1:
+                    assert len(vlm_kwargs) == 0, (
+                        "multimodal kwargs are not supported for context parallel"
+                    )
+                    seq_index = torch.arange(seq_len, device=input_ids.device).repeat(
+                        1, 1
+                    )
+                    cp_buffers = [input_ids, position_ids, seq_index]
+
+                    # Create context parallel context
+                    context_parallel_ctx = self.create_context_parallel_ctx(
+                        cp_mesh=self.cp_mesh,
+                        cp_buffers=cp_buffers,
+                        cp_seq_dims=[sequence_dim] * len(cp_buffers),
+                        cp_no_restore_buffers=set(cp_buffers),
+                    )
+
+                with DTensorPolicyWorker.train_context(context_parallel_ctx):
+                    with torch.autocast(device_type="cuda", dtype=self.dtype):
+                        model_args = dict(
+                            input_ids=input_ids,
+                            attention_mask=attention_mask_input_all_ones,
+                            position_ids=position_ids,
+                            use_cache=False,
+                            flash_attn_kwargs=flash_attn_kwargs,
+                            **vlm_kwargs,
+                        )
+                        if len(vlm_kwargs) > 0:
+                            del model_args["flash_attn_kwargs"]
+
+                        outputs = self.model(**model_args)
+
+                    if not hasattr(outputs, "logits"):
+                        logits = self.model.lm_head(outputs.last_hidden_state)
+                    else:
+                        logits = outputs.logits
+                    del outputs
+
+                    # Apply temperature scaling
+                    logits = self._apply_temperature_scaling(logits)
+
+                    if self.cp_size > 1:
+                        if isinstance(logits, DTensor):
+                            # Must be tp sharded
+                            assert (
+                                logits.device_mesh.ndim == 1
+                                and logits.device_mesh.mesh_dim_names[0] == "tp"
+                            ), "logits must be tp sharded"
+
+                            # CP is implicitly sharded on the seq dim, so we need to redistribute to the tp dim
+                            logits = DTensor.from_local(
+                                logits.to_local(),
+                                device_mesh=self.device_mesh[("cp", "tp")],
+                                placements=[Shard(sequence_dim), Shard(-1)],
+                            )
+                        else:
+                            logits = DTensor.from_local(
+                                logits,
+                                device_mesh=self.device_mesh[("cp", "tp")],
+                                placements=[Shard(sequence_dim), Shard(-1)],
+                            )
+
+                        # deal with TP first
+                        local_logits = logits.to_local()  # [B, S_cp, V_tp]
+
+                        tp_group = self.tp_mesh.get_group()
+                        tp_rank = torch.distributed.get_rank(tp_group)
+                        V_local = int(local_logits.shape[-1])
+                        vocab_start_index = tp_rank * V_local
+                        vocab_end_index = (tp_rank + 1) * V_local
+
+                        vals, idx = distributed_vocab_topk(
+                            local_logits,
+                            k=k,
+                            tp_group=tp_group,
+                            vocab_start_index=vocab_start_index,
+                            vocab_end_index=vocab_end_index,
+                        )
+                        # [B, S_cp, k]
+
+                        cp_group = self.cp_mesh.get_group()
+
+                        vals = allgather_cp_sharded_tensor(
+                            vals, cp_group, seq_dim=sequence_dim
+                        )
+                        idx = allgather_cp_sharded_tensor(
+                            idx, cp_group, seq_dim=sequence_dim
+                        )
+                        # [B, S, k]
+                    else:
+                        # Compute top-k over full sequence length (do not drop last position)
+                        if isinstance(logits, DTensor):
+                            local_logits = logits.to_local()  # [B, S, V_local]
+                            tp_group = self.tp_mesh.get_group()
+                            tp_rank = torch.distributed.get_rank(tp_group)
+                            V_local = int(local_logits.shape[-1])
+                            vocab_start_index = tp_rank * V_local
+                            vocab_end_index = (tp_rank + 1) * V_local
+
+                            vals, idx = distributed_vocab_topk(
+                                local_logits,
+                                k=k,
+                                tp_group=tp_group,
+                                vocab_start_index=vocab_start_index,
+                                vocab_end_index=vocab_end_index,
+                            )
+                        else:
+                            full_logits = logits.to(torch.float32)
+                            vals, idx = torch.topk(full_logits, k=k, dim=-1)
+
+                # Handle sequence packing unpacking
+                if self.enable_seq_packing:
+                    # Unpack top-k results from packed format back to original batch format
+                    # vals: [1, packed_seq_len, k] -> [original_batch_size, original_seq_len, k]
+                    # idx: [1, packed_seq_len, k] -> [original_batch_size, original_seq_len, k]
+
+                    # Create tensors to store unpacked results
+                    unpacked_vals = torch.zeros(
+                        (original_batch_size, original_seq_len, k),
+                        dtype=vals.dtype,
+                        device=vals.device,
+                    )
+                    unpacked_idx = torch.zeros(
+                        (original_batch_size, original_seq_len, k),
+                        dtype=idx.dtype,
+                        device=idx.device,
+                    )
+
+                    # Get cumulative sequence lengths for unpacking
+                    cu_seqlens = flash_attn_kwargs.cu_seqlens_q
+
+                    for i in range(original_batch_size):
+                        start = cu_seqlens[i].item()
+                        end = cu_seqlens[i + 1].item()
+                        seq_len_actual = input_lengths[i].item()
+
+                        # Extract the corresponding portion from packed results
+                        # Note: vals and idx are [1, packed_seq_len, k] due to packing
+                        unpacked_vals[i, :seq_len_actual, :] = vals[0, start:end, :]
+                        unpacked_idx[i, :seq_len_actual, :] = idx[0, start:end, :]
+
+                    # Replace with unpacked results
+                    vals = unpacked_vals
+                    idx = unpacked_idx
+
+                    # Update batch_size and seq_len for consistency
+                    batch_size = original_batch_size
+                    seq_len = original_seq_len
+
+                # Keep only real sequence tokens (no trimming here; padded positions can be masked downstream)
+                # Shapes remain [B, S, k].
+                out_topk_vals.append(vals.cpu())
+                out_topk_idx.append(idx.cpu())
+
+        ret = BatchedDataDict[Any]()
+        # Pad each micro-batch result on sequence dim to common length (S), similar to get_logprobs
+        all_topk_vals_padded = []
+        all_topk_idx_padded = []
+        target_seq_len = seq_dim_size
+        for vals, idx in zip(out_topk_vals, out_topk_idx):
+            pad_needed = target_seq_len - vals.shape[1]
+            if pad_needed > 0:
+                # pad along sequence dimension (second dim): (last_dim_pad_left, last_dim_pad_right, seq_pad_left, seq_pad_right, batch_pad_left, batch_pad_right)
+                vals = torch.nn.functional.pad(
+                    vals, (0, 0, 0, pad_needed, 0, 0), mode="constant", value=0.0
+                )
+                idx = torch.nn.functional.pad(
+                    idx, (0, 0, 0, pad_needed, 0, 0), mode="constant", value=0
+                )
+            all_topk_vals_padded.append(vals)
+            all_topk_idx_padded.append(idx)
+
+        ret["topk_logits"] = (
+            torch.cat(all_topk_vals_padded, dim=0)
+            if len(all_topk_vals_padded) > 1
+            else all_topk_vals_padded[0]
+        ).cpu()
+        ret["topk_indices"] = (
+            torch.cat(all_topk_idx_padded, dim=0)
+            if len(all_topk_idx_padded) > 1
+            else all_topk_idx_padded[0]
+        ).cpu()
+        return ret
+
     @contextmanager
     def use_reference_model(self) -> Generator[None, None, None]:
         """Context manager that temporarily swaps the reference model and active model.
@@ -1091,6 +1660,7 @@ def use_reference_model(self) -> Generator[None, None, None]:
                     val = to_local_if_dtensor(v)
                     val.copy_(curr_state_dict[k])
 
+    @wrap_with_nvtx_name("dtensor_policy_worker/get_reference_policy_logprobs")
     def get_reference_policy_logprobs(
         self, data: BatchedDataDict[Any], micro_batch_size: Optional[int] = None
     ) -> BatchedDataDict[ReferenceLogprobOutputSpec]:
@@ -1120,6 +1690,14 @@ def _add_noise_to_weights(self) -> None:
     def return_state_dict(self):
         return self.model.state_dict()
 
+    def return_model_config(self) -> dict[str, Any]:
+        """Return the model configuration as a dictionary.
+
+        Returns:
+            dict: Model configuration dictionary
+        """
+        return self.model.config
+
     def report_device_id(self) -> str:
         """Report the UUID of the current CUDA device using NVML.
 
@@ -1133,117 +1711,127 @@ def report_device_id(self) -> str:
         # Get device UUID using NVML
         return get_device_uuid(device_idx)
 
-    @torch.no_grad()
-    def prepare_refit_info(self) -> Optional[dict[str, Any]]:
-        state_dict = self.model.state_dict()
-
-        if self.is_generation_colocated:
-            # Collect info for streaming multiple tensors
-            self.refit_param_info = []
-            for name, tensor in state_dict.items():
-                # dtensor's numel will return complete tensor instead of only local tensor
-                size_in_bytes = tensor.element_size() * tensor.numel()
-                self.refit_param_info.append((name, size_in_bytes))
-
-        else:
-            # Collect info for collective communication
-            state_dict_info = {}
-            for name, tensor in state_dict.items():
-                state_dict_info[name] = (tensor.shape, self.dtype)
-
-            return state_dict_info
+    def get_zmq_address(self):
+        """Get the ZMQ address for the current device."""
+        return f"ipc:///tmp/{self.report_device_id()}.sock"
+
+    def maybe_init_zmq(self):
+        """Initialize the ZMQ socket if it doesn't exist."""
+        if not hasattr(self, "zmq_socket"):
+            self.zmq_context = zmq.Context()
+            self.zmq_socket = self.zmq_context.socket(zmq.REQ)
+            self.zmq_socket.setsockopt(
+                zmq.SNDTIMEO, 120000
+            )  # set timeout to 120 seconds
+            self.zmq_socket.setsockopt(
+                zmq.RCVTIMEO, 120000
+            )  # set timeout to 120 seconds
+            self.zmq_socket.setsockopt(zmq.LINGER, 0)
+            self.zmq_socket.bind(self.get_zmq_address())
 
     @torch.no_grad()
-    def prepare_weights_for_ipc(self) -> tuple[list[tuple[str, int]], float]:
-        """Prepare the weights for IPC.
+    def prepare_refit_info(self) -> Optional[dict[str, Any]]:
+        """Prepare state dict metadata for weight refitting and IPC streaming."""
+        state_dict_info = {}
+        for name, tensor in self.model.state_dict().items():
+            # all tensor will be casted to self.dtype in stream_weights_via_ipc_zmq/broadcast_weights_for_collective
+            state_dict_info[name] = (tensor.shape, self.dtype)
 
-        This function:
-        - Prepares the state_dict of the model.
-        - Collects the info for streaming multiple tensors.
+        return state_dict_info
 
-        Returns:
-            list: The list of parameters sizes.
-            float: The total available memory in bytes.
-        """
+    def get_free_memory_bytes(self) -> int:
+        """Get the available free memory."""
         from nemo_rl.utils.nvml import get_free_memory_bytes
 
-        # Get state_dict
-        self.model = self.move_to_cuda(self.model)
-        self._held_sharded_state_dict_reference: dict[str, torch.Tensor] = (
-            self.model.state_dict()
-        )
-
-        # Collect current available memory for refit
-        ## Get current device index from torch
         device_idx = torch.cuda.current_device()
-        ## Get device free memory using NVML
-        total_available_bytes = get_free_memory_bytes(device_idx)
-        ## Use 80% of the free memory for safety
-        memory_ratio = os.getenv("NRL_REFIT_BUFFER_MEMORY_RATIO", "0.8")
-        total_available_bytes *= float(memory_ratio)
-
-        return self.refit_param_info, total_available_bytes
+        return get_free_memory_bytes(device_idx)
 
     @torch.no_grad()
-    def get_weights_ipc_handles(self, keys: Iterable[str]) -> dict[str, Any]:
-        from torch.multiprocessing.reductions import reduce_tensor
-
-        assert self._held_sharded_state_dict_reference is not None, (
-            "prepare_weights_for_ipc must be called before get_weights_ipc_handles"
+    @wrap_with_nvtx_name("dtensor_policy_worker/stream_weights_via_ipc_zmq")
+    def stream_weights_via_ipc_zmq(self, buffer_size_bytes: int = 0) -> None:
+        """Stream model weights to peer process via ZMQ IPC socket."""
+        self.maybe_init_zmq()
+        # Manually move model to cuda for cpu offload case
+        if self.cpu_offload:
+            self.model = self.move_to_cuda(self.model)
+
+        from nemo_rl.models.policy.utils import stream_weights_via_ipc_zmq_impl
+
+        def dtensor_params_generator():
+            """Generator that yields (name, tensor) pairs, converting DTensors to local tensors."""
+            for name, tensor in self.model.state_dict().items():
+                if isinstance(tensor, DTensor):
+                    # Convert DTensor to full tensor for streaming
+                    full_tensor = tensor.full_tensor()
+                    # Convert to target dtype
+                    yield (
+                        name,
+                        full_tensor.to(self.dtype, non_blocking=True).contiguous(),
+                    )
+                else:
+                    # Convert to target dtype
+                    yield name, tensor.to(self.dtype, non_blocking=True).contiguous()
+
+        # Use the shared implementation
+        stream_weights_via_ipc_zmq_impl(
+            params_generator=dtensor_params_generator(),
+            buffer_size_bytes=buffer_size_bytes,
+            zmq_socket=self.zmq_socket,
+            rank=self.rank,
+            worker_name=str(self),
         )
 
-        # Clean up the held tensors to reduce peak memory
-        if self._held_streamed_param_reference is not None:
-            del self._held_streamed_param_reference
-            self._held_streamed_param_reference = None
-
-        converted_params = {}
-        for key in keys:
-            # Get full_tensor for dtensor (GPU > 1)
-            tensor = self._held_sharded_state_dict_reference[key]
-            if isinstance(tensor, DTensor):
-                full_tensor = tensor.full_tensor()
-            else:
-                full_tensor = tensor
-            # Convert parameters to the configured dtype
-            converted_params[key] = full_tensor.to(self.dtype, non_blocking=True)
-
-        # Temporary record the full tensor for cleanup
-        # It is needed for cleanup the last full_tensor in the refit process
-        self._held_streamed_param_reference = converted_params
-
-        # Get device UUID for IPC
-        device_uuid = self.report_device_id()
-        # Create handles for the tensors
-        all_handles = []
-        for key, p in converted_params.items():
-            handle = reduce_tensor(p.detach())
-            all_handles.append((key, handle))
-
-        # (pack_tensor_for_ipc: bool, handles: list)
-        serialized = (False, all_handles)
-
-        return {device_uuid: serialized}
-
     @torch.no_grad()
     def broadcast_weights_for_collective(self) -> None:
         """Broadcast the weights for collective communication."""
-        for _, tensor in self.model.state_dict().items():
+        # Manually move model to cuda for cpu offload case
+        if self.cpu_offload:
+            print(
+                "[WARNING]: Unless you are lacking of memory, it is not recommended to enable cpu_offload when "
+                "using non-colocated generation since it will have an extra onload and offload at refit stage."
+            )
+            self.model = self.move_to_cuda(self.model)
+
+        def _dtensor_post_iter_func(tensor, dtype):
             if isinstance(tensor, DTensor):
                 tensor = tensor.full_tensor()
-            if self.rank == 0:
-                tensor = tensor.to(self.dtype, non_blocking=True)
-                self.model_update_group.broadcast(tensor.data, src=0)
+            tensor = tensor.to(dtype, non_blocking=True)
+            return tensor
 
+        # param_iterator will return (name, tensor), we only need tensor
+        dtensor_post_iter_func = lambda x: _dtensor_post_iter_func(x[1], self.dtype)
+
+        packed_broadcast_producer(
+            iterator=iter(self.model.state_dict().items()),
+            group=self.model_update_group,
+            src=0,
+            post_iter_func=dtensor_post_iter_func,
+        )
+
+        # Manually move model to cpu for cpu offload case
+        # cpu offload needs model on CPU before model forward
+        if self.cpu_offload:
+            self.model = self.move_to_cpu(self.model)
+
+    @wrap_with_nvtx_name("dtensor_policy_worker/prepare_for_lp_inference")
     def prepare_for_lp_inference(self) -> None:
+        # onload model to cuda
         if not self.cpu_offload:
             self.move_to_cuda(self.model)
         else:
             self.model = self.move_buffer_to_device(self.model, "cuda")
 
         self.model.eval()
-        self.offload_before_refit()
 
+        # offload optimizer to cpu
+        torch.randn(1).cuda()  # wake up torch allocator
+        if self.optimizer is not None and self.offload_optimizer_for_logprob:
+            self.move_optimizer_to_device("cpu")
+
+        gc.collect()
+        torch.cuda.empty_cache()
+
+    @wrap_with_nvtx_name("dtensor_policy_worker/prepare_for_training")
     def prepare_for_training(self, *args, **kwargs) -> None:
         # onload models and optimizer state to cuda
         if not self.cpu_offload:
@@ -1253,55 +1841,38 @@ def prepare_for_training(self, *args, **kwargs) -> None:
             # to cuda automatically, so we need to do that manually
             self.model = self.move_buffer_to_device(self.model, "cuda")
 
-        # have to move buffers to cuda manually for cpu offload case
-        self.move_buffer_to_device(self.model, "cuda")
-
         self.model.train()
         # Move optimizer state to CUDA if it exists
+        # colocated generation will always offload optimizer to cuda before refit
         if (
-            hasattr(self, "optimizer")
-            and self.optimizer is not None
+            self.optimizer is not None
             and not self.cpu_offload
+            and (self.offload_optimizer_for_logprob or self.is_generation_colocated)
         ):
-            for state in self.optimizer.state.values():
-                for k, v in state.items():
-                    if isinstance(v, (DTensor, torch.Tensor)):
-                        state[k] = v.to("cuda")
+            self.move_optimizer_to_device("cuda")
 
         torch.cuda.empty_cache()
 
     @torch.no_grad()
+    @wrap_with_nvtx_name("dtensor_policy_worker/offload_before_refit")
     def offload_before_refit(self) -> None:
         """Offload the optimizer to the CPU."""
         torch.randn(1).cuda()  # wake up torch allocator
-        if hasattr(self, "optimizer") and self.optimizer is not None:
-            for state in self.optimizer.state.values():
-                for k, v in state.items():
-                    if isinstance(v, (DTensor, torch.Tensor)):
-                        state[k] = v.to("cpu")
+        if self.optimizer is not None:
+            self.move_optimizer_to_device("cpu")
 
         gc.collect()
         torch.cuda.empty_cache()
 
     @torch.no_grad()
+    @wrap_with_nvtx_name("dtensor_policy_worker/offload_after_refit")
     def offload_after_refit(self) -> None:
-        # Offload as much as possible on the CPU
+        """Offload as much as possible on the CPU."""
         self.model = self.move_to_cpu(self.model)
         self.model.eval()
         torch.randn(1).cuda()  # wake up torch allocator
         self.offload_before_refit()  # rerun the old offload function
 
-        # Clean up the held tensors
-        if self._held_sharded_state_dict_reference is not None:
-            del self._held_sharded_state_dict_reference
-            self._held_sharded_state_dict_reference = None
-        if self._held_streamed_param_reference is not None:
-            del self._held_streamed_param_reference
-            self._held_streamed_param_reference = None
-
-        gc.collect()
-        torch.cuda.empty_cache()
-
         # Print memory stats after offloading
         allocated = torch.cuda.memory_allocated() / (1024**3)  # Convert to GB
         reserved = torch.cuda.memory_reserved() / (1024**3)  # Convert to GB
@@ -1309,6 +1880,12 @@ def offload_after_refit(self) -> None:
             f"GPU Memory after optimizer offload: {allocated:.2f}GB allocated, {reserved:.2f}GB reserved"
         )
 
+    def move_optimizer_to_device(self, device: str | torch.device) -> None:
+        for state in self.optimizer.state.values():
+            for k, v in state.items():
+                if isinstance(v, (DTensor, torch.Tensor)):
+                    state[k] = v.to(device)
+
     def move_to_device(self, model: nn.Module, device: str | torch.device) -> nn.Module:
         model = self.move_buffer_to_device(model, device)
         return model.to(device)
@@ -1368,6 +1945,10 @@ def load_checkpoint(
 
     def shutdown(self) -> None:
         """Shutdown the policy."""
+        # Clean up extension resources like ZMQ sockets
+        if hasattr(self, "zmq_socket"):
+            self.zmq_socket.close()
+            self.zmq_context.term()
 
     def start_gpu_profiling(self) -> None:
         """Start GPU profiling."""
@@ -1376,3 +1957,9 @@ def start_gpu_profiling(self) -> None:
     def stop_gpu_profiling(self) -> None:
         """Stop GPU profiling."""
         torch.cuda.profiler.stop()
+
+    def report_node_ip_and_gpu_id(self) -> list[tuple[str, int]]:
+        """Report the node IP and GPU ID of the current worker."""
+        ip = ray._private.services.get_node_ip_address()
+        gpu_id = ray.get_gpu_ids()[0]
+        return (ip, gpu_id)
diff --git a/nemo_rl/models/policy/dtensor_policy_worker_v2.py b/nemo_rl/models/policy/dtensor_policy_worker_v2.py
new file mode 100644
index 0000000000..d1691b22ef
--- /dev/null
+++ b/nemo_rl/models/policy/dtensor_policy_worker_v2.py
@@ -0,0 +1,1949 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import gc
+import itertools
+import os
+import warnings
+from collections import defaultdict
+from contextlib import AbstractContextManager, contextmanager, nullcontext
+from typing import Any, Generator, Optional, cast
+
+import ray
+import torch
+import zmq
+from accelerate import init_empty_weights
+from nemo_automodel import (
+    NeMoAutoModelForSequenceClassification,
+)
+from nemo_automodel.components._transformers.utils import (
+    sliding_window_overwrite,
+)
+from nemo_automodel.components.distributed.cp_utils import (
+    create_context_parallel_ctx,
+    get_train_context,
+)
+from nemo_automodel.components.distributed.grad_utils import (
+    clip_grad_by_total_norm_,
+    get_grad_norm,
+)
+from nemo_automodel.components.distributed.parallelizer import (
+    fsdp2_strategy_parallelize,
+)
+from nemo_automodel.components.distributed.tensor_utils import (
+    get_cpu_state_dict,
+    to_local_if_dtensor,
+)
+from torch import nn
+from torch.distributed.checkpoint.state_dict import (
+    StateDictOptions,
+    set_model_state_dict,
+)
+from torch.distributed.fsdp import (
+    CPUOffloadPolicy,
+    MixedPrecisionPolicy,
+    OffloadPolicy,
+)
+from torch.distributed.tensor import DTensor, Shard
+from transformers import (
+    AutoConfig,
+    AutoProcessor,
+    AutoTokenizer,
+)
+from transformers.models.gemma3.modeling_gemma3 import Gemma3ForCausalLM
+
+from nemo_rl.algorithms.interfaces import LossFunction, LossType
+from nemo_rl.algorithms.loss_functions import SequencePackingLossWrapper
+from nemo_rl.distributed.batched_data_dict import BatchedDataDict
+from nemo_rl.distributed.model_utils import (
+    allgather_cp_sharded_tensor,
+    distributed_vocab_topk,
+    get_logprobs_from_vocab_parallel_logits,
+)
+from nemo_rl.models.huggingface.common import (
+    get_flash_attention_kwargs,
+    pack_sequences,
+)
+from nemo_rl.models.policy import PolicyConfig
+from nemo_rl.models.policy.interfaces import (
+    LogprobOutputSpec,
+    ReferenceLogprobOutputSpec,
+    ScoreOutputSpec,
+)
+from nemo_rl.models.policy.utils import (
+    configure_dynamo_cache,
+    get_gpu_info,
+    get_runtime_env_for_policy_worker,
+    import_class_from_path,
+    resolve_model_class,
+)
+from nemo_rl.utils.automodel_checkpoint import (
+    load_checkpoint,
+    save_checkpoint,
+)
+from nemo_rl.utils.checkpoint import CheckpointingConfig
+from nemo_rl.utils.nsys import wrap_with_nvtx_name
+from nemo_rl.utils.packed_tensor import packed_broadcast_producer
+
+
+@ray.remote(
+    runtime_env=get_runtime_env_for_policy_worker("dtensor_policy_worker_v2")
+)  # pragma: no cover
+class DTensorPolicyWorkerV2:
+    def __repr__(self) -> str:
+        """Customizes the actor's prefix in the Ray logs.
+
+        This makes it easier to identify which worker is producing specific log messages.
+        """
+        if torch.distributed.is_initialized():
+            return f"{self.__class__.__qualname__}[rank={torch.distributed.get_rank()}]"
+        else:
+            return f"{self.__class__.__qualname__}"
+
+    def __init__(
+        self,
+        config: PolicyConfig,
+        tokenizer: AutoTokenizer,
+        processor: Optional[AutoProcessor] = None,
+        weights_path: Optional[str] = None,
+        optimizer_path: Optional[str] = None,
+        init_optimizer: bool = True,
+        init_reference_model: bool = True,
+        **kwargs: Any,
+    ):
+        """Initialize the DTensorPolicyWorkerV2."""
+        self.tokenizer = tokenizer
+        self.processor = processor
+        self.is_vlm = processor is not None
+
+        print(f"Initializing DTensorPolicyWorkerV2 with is_vlm={self.is_vlm}")
+
+        self.is_generation_colocated = None
+        if "generation" in config and config["generation"] is not None:
+            self.is_generation_colocated = config["generation"]["colocated"]["enabled"]
+
+        # Explicitly set NCCL_CUMEM_ENABLE to 1 to avoid the P2P initialization error for PyNCCLCommunicator.
+        # See https://github.com/NVIDIA-NeMo/RL/issues/564 for more details.
+        if not self.is_generation_colocated:
+            os.environ["NCCL_CUMEM_ENABLE"] = "1"
+
+        # Disable dynamo autotune_local_cache to avoid crash when there's already a cache
+        # with different order of node_bundles
+        configure_dynamo_cache()
+
+        self.cfg = config
+        # torch distributed init. Envars for rank, world_size, and master_addr and master_port are set from the ray remote call
+        torch.distributed.init_process_group(backend="nccl")
+        self.rank = torch.distributed.get_rank()
+        world_size = torch.distributed.get_world_size()
+        model_name = self.cfg["model_name"]
+
+        self.cpu_offload = self.cfg["dtensor_cfg"]["cpu_offload"]
+        self.offload_optimizer_for_logprob = self.cfg["offload_optimizer_for_logprob"]
+        self.max_grad_norm = self.cfg["max_grad_norm"]
+
+        if self.cfg["precision"] == "float32":
+            self.dtype = torch.float32
+        elif self.cfg["precision"] == "bfloat16":
+            self.dtype = torch.bfloat16
+        elif self.cfg["precision"] == "float16":
+            self.dtype = torch.float16
+        else:
+            raise ValueError(f"Unknown precision: {self.cfg['precision']}")
+
+        print(f"[Rank {self.rank}] Loading model {model_name} on CPU...")
+        self.enable_seq_packing = self.cfg["sequence_packing"]["enabled"]
+        if self.enable_seq_packing:
+            assert not self.is_vlm, (
+                "Sequence packing is not supported for VLM models. Please set policy.sequence_packing.enabled = False to train VLM models."
+            )
+            print(
+                f"[Rank {self.rank}] Sequence packing is enabled for model {model_name}"
+            )
+            print(f"[Rank {self.rank}] Using FlashAttention2 for sequence packing")
+
+        hf_config_overrides = self.cfg.get("hf_config_overrides", {}) or {}
+
+        model_config = AutoConfig.from_pretrained(
+            model_name,
+            # Always load the model in float32 to keep master weights in float32.
+            # Keeping the master weights in lower precision has shown to cause issues with convergence.
+            torch_dtype=torch.float32,
+            trust_remote_code=True,
+            **sliding_window_overwrite(
+                model_name
+            ),  # due to https://github.com/huggingface/transformers/issues/38002
+            attn_implementation="flash_attention_2"
+            if self.enable_seq_packing
+            else None,
+            **hf_config_overrides,
+        )
+
+        self.allow_flash_attn_args = self.check_model_allow_flash_attn_args(
+            model_config
+        )
+
+        self._is_reward_model = (
+            "reward_model_cfg" in self.cfg and self.cfg["reward_model_cfg"]["enabled"]
+        )
+        if self._is_reward_model:
+            # Ensure sequence packing is disabled.
+            if self.enable_seq_packing:
+                raise NotImplementedError(
+                    "Sequence packing is not supported for reward models"
+                )
+            # Load model as a Reward Model.
+            rm_type = self.cfg["reward_model_cfg"]["reward_model_type"]
+            if rm_type == "bradley_terry":
+                model_class = NeMoAutoModelForSequenceClassification
+                if model_config.num_labels != 1:
+                    # For Bradley-Terry reward models, the linear head has a single output.
+                    # In the transformers library, the default setting for model_config.num_labels is 2
+                    # (https://github.com/huggingface/transformers/blob/v4.52.4/src/transformers/configuration_utils.py#L259).
+                    # Since num_labels is used as the out_features for the linear head
+                    # (https://github.com/huggingface/transformers/blob/v4.52.4/src/transformers/models/llama/modeling_llama.py#L738)
+                    # if num_labels is not 1, we set it to 1. This change may trigger a warning that some weights are not initialized
+                    # from the model checkpoint and are instead initialized using model_config.initializer_range
+                    # (https://github.com/huggingface/transformers/blob/v4.52.4/src/transformers/models/llama/configuration_llama.py#L62).
+                    print(
+                        "model_config.num_labels is not 1. Setting it to 1 since this value is used as the out_features "
+                        "for the linear head of Bradley-Terry reward models."
+                    )
+                    model_config.num_labels = 1
+            else:
+                raise ValueError(f"Unknown reward model type: {rm_type}")
+        else:
+            # DO NOT assume AutoModelForCausalLM, multimodal models can inherit from AutoModelForImageTextToText, AutoModelForTextToWaveform, etc.
+            model_class = resolve_model_class(model_config.model_type)
+
+        full_state_dict = None
+        model_state_dict_keys = None
+        if self.rank == 0:
+            print(f"[Rank {self.rank}] Loading model {model_name} on CPU...")
+            model = model_class.from_pretrained(
+                model_name,
+                device_map="cpu",  # load weights onto CPU initially
+                trust_remote_code=True,
+                config=model_config,
+                use_liger_kernel=False,
+                torch_dtype=str(model_config.torch_dtype),
+            )
+
+            full_state_dict = model.state_dict()
+            # Store the original model state dict keys before any parallelization
+            model_state_dict_keys = list(full_state_dict.keys())
+            del model
+
+        print(f"[Rank {self.rank}] Initializing empty model for FSDP...")
+        # All ranks initialize model on meta device, so FSDP can shard it.
+        # The actual weights will be broadcast from rank 0.
+
+        with init_empty_weights():
+            # NeMoAutoModelForCausalLM uses flash_attention_2 by default
+            # so we need to set it to None if sequence packing is disabled
+            # https://github.com/NVIDIA-NeMo/Automodel/blob/7e748be260651349307862426c0c168cebdeeec3/nemo_automodel/components/_transformers/auto_model.py#L180
+            self.model = model_class.from_config(
+                model_config,
+                attn_implementation="flash_attention_2"
+                if self.enable_seq_packing
+                else None,
+                use_liger_kernel=False,
+                trust_remote_code=True,
+                torch_dtype=str(model_config.torch_dtype),
+            )
+
+        if self.model.config.pad_token_id is None:
+            self.model.config.pad_token_id = tokenizer.pad_token_id
+
+        tp_size = self.cfg["dtensor_cfg"]["tensor_parallel_size"]
+        cp_size = self.cfg["dtensor_cfg"]["context_parallel_size"]
+        if cp_size > 1 and self.enable_seq_packing:
+            raise ValueError(
+                "Context parallel is not supported for sequence packing. Refer to https://github.com/NVIDIA/NeMo-RL/blob/main/docs/model-quirks.md#context-parallel-with-fsdp2 for more details."
+            )
+        dp_size = world_size // tp_size // cp_size
+        sequence_parallel_enabled = self.cfg["dtensor_cfg"]["sequence_parallel"]
+        assert world_size == dp_size * tp_size * cp_size, (
+            f"World size({world_size}) must equal to dp_size({dp_size}) * tp_size({tp_size}) * cp_size({cp_size}) to use DTensor"
+        )
+
+        if sequence_parallel_enabled and tp_size == 1:
+            print(
+                "[WARNING]: sequence_parallel=True, but tp_size=1 which has no effect. Enable tp_size > 1 to use sequence parallelism."
+            )
+        elif sequence_parallel_enabled and tp_size > 1:
+            raise RuntimeError(
+                "Sequence parallel + tp_size >1 is currently broken in torch==2.8.0. See https://github.com/NVIDIA-NeMo/Automodel/issues/652 for more details."
+            )
+
+        if cp_size > 1:
+            assert not isinstance(self.model, Gemma3ForCausalLM), (
+                "Context parallel is not supported for Gemma3ForCausalLM. Torch context parallel has many limitations. "
+                "Please refer to https://github.com/NVIDIA/NeMo-RL/blob/main/docs/model-quirks.md#context-parallel-with-fsdp2 for more details."
+            )
+
+            assert not (tp_size > 1 and sequence_parallel_enabled), (
+                "It's a known issue that context parallel can't be used together with sequence parallel in DTensor worker. "
+                "Please either set cp_size = 1 or disable sequence parallel. "
+                "See https://github.com/NVIDIA-NeMo/RL/issues/659 for more details."
+            )
+
+            assert not self.is_vlm, (
+                "Context parallel is yet not supported for VLM models. Please set cp_size = 1 to train VLM models."
+            )
+
+        # For FSDP2 compatibility, we need to support HSDP structure
+        # For now, we use dp_replicate_size = 1 (no hybrid sharding)
+        dp_replicate_size = 1
+        dp_shard_size = dp_size
+
+        # torch==2.8 uses LOCAL_RANK to set the device here (https://github.com/pytorch/pytorch/blob/ba56102387ef21a3b04b357e5b183d48f0afefc7/torch/distributed/device_mesh.py#L500),
+        # but CUDA_VISIBLE_DEVICES is set to only 1 gpu, so we need to temporarily set LOCAL_RANK to 0.
+        # TODO: consider changing the default LOCAL_RANK set in worker_groups.py
+        prev_local_rank = os.environ["LOCAL_RANK"]
+        os.environ["LOCAL_RANK"] = "0"
+
+        # Create device mesh with HSDP structure for FSDP2 compatibility
+        device_mesh = torch.distributed.device_mesh.init_device_mesh(
+            "cuda",
+            (dp_replicate_size, dp_shard_size, cp_size, tp_size),
+            mesh_dim_names=("dp_replicate", "dp_shard", "cp", "tp"),
+        )
+        os.environ["LOCAL_RANK"] = prev_local_rank
+
+        # Create flattened submeshes for different use cases
+        # Flatten dp_replicate + dp_shard for the "dp" dimension (backward compatibility)
+        device_mesh[("dp_replicate", "dp_shard")]._flatten(mesh_dim_name="dp")
+
+        # Flatten dp_shard + cp for FSDP2 sharding
+        device_mesh[("dp_shard", "cp")]._flatten(mesh_dim_name="dp_shard_cp")
+
+        # Flatten dp_replicate + dp_shard + cp for gradient operations
+        device_mesh[("dp_replicate", "dp_shard", "cp")]._flatten(mesh_dim_name="dp_cp")
+
+        # Store mesh references for backward compatibility
+        self.dp_cp_mesh = device_mesh["dp_cp"]
+        self.dp_mesh = device_mesh["dp"]
+        self.tp_mesh = device_mesh["tp"]
+        self.cp_mesh = device_mesh["cp"]
+
+        self.dp_size = dp_size
+        self.tp_size = tp_size
+        self.cp_size = cp_size
+        self.device_mesh = device_mesh
+
+        # ------------------------------------------------
+        # 3) Move to GPU + Composable FSDP
+        #    (Initialize device mesh, shard submodules, then shard entire model)
+        # ------------------------------------------------
+        self.model = fsdp2_strategy_parallelize(
+            self.model,
+            device_mesh=self.device_mesh,
+            mp_policy=MixedPrecisionPolicy(
+                param_dtype=self.dtype,
+                reduce_dtype=torch.float32,
+                output_dtype=torch.float32,
+            ),
+            offload_policy=CPUOffloadPolicy(pin_memory=False)
+            if self.cpu_offload
+            else OffloadPolicy(),
+            sequence_parallel=sequence_parallel_enabled,
+            activation_checkpointing=self.cfg["dtensor_cfg"][
+                "activation_checkpointing"
+            ],
+            tp_shard_plan=self.cfg["dtensor_cfg"]["custom_parallel_plan"],
+            dp_replicate_mesh_name="dp_replicate",
+            dp_shard_cp_mesh_name="dp_shard_cp",
+            tp_mesh_name="tp",
+        )
+
+        print(f"[Rank {self.rank}] Loading state dict from rank 0...")
+        # This will broadcast the state dict from rank 0 to all other ranks
+        # and load it into the FSDP model.
+        set_model_state_dict(
+            self.model,
+            model_state_dict=full_state_dict,
+            options=StateDictOptions(
+                full_state_dict=True,
+                broadcast_from_rank0=True,
+            ),
+        )
+
+        # Broadcast model state dict keys to all ranks and store as instance variable
+        keys_to_broadcast = [model_state_dict_keys]
+        torch.distributed.broadcast_object_list(keys_to_broadcast, src=0)
+        self.model_state_dict_keys = keys_to_broadcast[0]
+
+        # Handle tied word embeddings after loading the state dict
+        # We need to actually tie the parameters at the model level
+        is_tied_lm_head = hasattr(self.model, "lm_head") and getattr(
+            getattr(self.model, "config", {}), "tie_word_embeddings", False
+        )
+        if is_tied_lm_head:
+            embed_tokens_weight = None
+            for name, param in self.model.named_parameters():
+                if "embed_tokens" in name and name.endswith(".weight"):
+                    embed_tokens_weight = param
+                    break
+
+            if embed_tokens_weight is not None:
+                self.model.lm_head.weight = embed_tokens_weight
+
+        # Manually broadcast buffers
+        for _, buf in self.model.named_buffers():
+            torch.distributed.broadcast(to_local_if_dtensor(buf), src=0)
+
+        if self.cpu_offload:
+            self.model = self.move_to_device(self.model, "cpu")
+
+        if init_reference_model:
+            self.reference_model_state_dict = get_cpu_state_dict(
+                self.model.state_dict().items(), pin_memory=True
+            )
+
+        if init_optimizer:
+            optimizer_cls = import_class_from_path(self.cfg["optimizer"]["name"])
+            self.optimizer = optimizer_cls(
+                self.model.parameters(), **self.cfg["optimizer"]["kwargs"]
+            )
+        else:
+            self.optimizer = None
+
+        if "scheduler" in self.cfg and self.optimizer is not None:
+            if isinstance(self.cfg["scheduler"], dict):
+                scheduler_cls = import_class_from_path(
+                    cast(str, self.cfg["scheduler"]["name"])
+                )
+                self.scheduler = scheduler_cls(
+                    self.optimizer, **self.cfg["scheduler"]["kwargs"]
+                )
+            else:
+                schedulers = []
+                for scheduler_cfg in self.cfg["scheduler"]:
+                    if "name" in scheduler_cfg:
+                        schedulers.append(
+                            import_class_from_path(scheduler_cfg["name"])(
+                                self.optimizer, **scheduler_cfg["kwargs"]
+                            )
+                        )
+                    else:
+                        assert "milestones" in scheduler_cfg, (
+                            "unknown scheduler config: ",
+                            scheduler_cfg,
+                        )
+                        milestones: list[int] = scheduler_cfg["milestones"]
+
+                self.scheduler = torch.optim.lr_scheduler.SequentialLR(
+                    self.optimizer, schedulers, milestones
+                )
+
+        elif self.optimizer is not None:
+            ## default to a passthrough LR schedule
+            self.scheduler = torch.optim.lr_scheduler.LambdaLR(
+                self.optimizer, lr_lambda=lambda epoch: 1
+            )
+
+        # restore
+        if weights_path:
+            self.load_checkpoint(weights_path, optimizer_path)
+        else:
+            print(
+                "No weights path provided. Starting from scratch (default policy init)"
+            )
+
+    def _apply_temperature_scaling(self, logits: torch.Tensor) -> torch.Tensor:
+        if "generation" in self.cfg and self.cfg["generation"] is not None:
+            logits.div_(self.cfg["generation"]["temperature"])
+        return logits
+
+    def init_collective(
+        self, ip: str, port: int, world_size: int, *, train_world_size: int
+    ) -> None:
+        from vllm.distributed.device_communicators.pynccl import PyNcclCommunicator
+        from vllm.distributed.utils import StatelessProcessGroup
+
+        pg = StatelessProcessGroup.create(
+            host=ip, port=port, rank=self.rank, world_size=world_size
+        )
+        device = torch.cuda.current_device()
+        self.model_update_group = PyNcclCommunicator(pg, device=device)
+
+    def is_alive(self) -> bool:
+        return True
+
+    def check_model_allow_flash_attn_args(self, model_config) -> bool:
+        # Some models doesn't support flash_attn_kwargs
+        # Check nemotron nas.
+        if (
+            model_config.architectures[0] == "DeciLMForCausalLM"
+            and model_config.model_type == "nemotron-nas"
+        ):
+            return False
+
+        return True
+
+    def reset_peak_memory_stats(self) -> None:
+        torch.cuda.reset_peak_memory_stats()
+
+    def get_gpu_info(self) -> dict[str, Any]:
+        """Return information about the GPU being used by this worker."""
+        return get_gpu_info(self.model)
+
+    @wrap_with_nvtx_name("dtensor_policy_worker_v2/train")
+    def train(
+        self,
+        data: BatchedDataDict[Any],
+        loss_fn: LossFunction,
+        eval_mode: bool = False,
+        gbs: Optional[int] = None,
+        mbs: Optional[int] = None,
+    ) -> dict[str, Any]:
+        """Train the policy on a batch of data with a given loss function."""
+        if gbs is None:
+            gbs = self.cfg["train_global_batch_size"]
+        if mbs is None:
+            mbs = self.cfg["train_micro_batch_size"]
+        local_gbs = gbs // self.dp_size
+        total_dataset_size = torch.tensor(data.size, device="cuda")
+        torch.distributed.all_reduce(
+            total_dataset_size,
+            op=torch.distributed.ReduceOp.SUM,
+            group=self.dp_mesh.get_group(),
+        )
+        num_global_batches = int(total_dataset_size.item()) // gbs
+
+        # dim 1 is always assumed to be the sequence dim, sanity check this here
+        sequence_dim = 1
+        seq_dim_size = data.get("input_ids").shape[sequence_dim]
+        for k, v in data.items():
+            if torch.is_tensor(v) and len(v.shape) > 1:
+                assert v.shape[sequence_dim] == seq_dim_size, (
+                    f"Dim 1 must be the sequence dim, expected dim 1={seq_dim_size} but got shape {v.shape}"
+                )
+
+        if eval_mode:
+            ctx: AbstractContextManager[Any] = torch.no_grad()
+            self.model.eval()
+        else:
+            ctx = nullcontext()
+            # Ensure model is in training mode
+            self.model.train()
+
+        with ctx:
+            # Get data from batch and move to device
+            data.to("cuda")
+
+            losses = []
+            all_mb_metrics = []
+            for gb_idx in range(num_global_batches):
+                global_batch = data.get_batch(batch_idx=gb_idx, batch_size=local_gbs)
+
+                assert "sample_mask" in global_batch, (
+                    "sample_mask must be present in the data!"
+                )
+                ## get the normalization factor for the loss
+                local_valid_seqs = torch.sum(global_batch["sample_mask"])
+
+                if not "token_mask" in global_batch:
+                    local_valid_toks = (
+                        local_valid_seqs * global_batch["input_ids"].shape[1]
+                    )
+                else:
+                    local_valid_toks = torch.sum(
+                        global_batch["token_mask"][:, 1:]
+                        * global_batch["sample_mask"].unsqueeze(-1)
+                    )
+
+                to_reduce = torch.tensor([local_valid_seqs, local_valid_toks]).cuda()
+                torch.distributed.all_reduce(to_reduce, group=self.dp_mesh.get_group())
+                global_valid_seqs, global_valid_toks = to_reduce[0], to_reduce[1]
+
+                if (
+                    hasattr(loss_fn, "loss_type")
+                    and loss_fn.loss_type == LossType.TOKEN_LEVEL
+                ):
+                    assert "token_mask" in global_batch, (
+                        "token_mask must be present in the data when using token-level loss"
+                    )
+
+                self.optimizer.zero_grad()
+                mb_losses = []
+                batch = data.get_batch(batch_idx=gb_idx, batch_size=local_gbs)
+                # Calculate number of microbatches to process
+                # make_microbatch_iterator assumes that the batch size is a multiple of the microbatch size
+                # so its safe to not check for the case where the last data slice is smaller than mbs
+                dummy_iterator = iter([])
+                if self.cfg["dynamic_batching"]["enabled"]:
+                    mb_iterator = batch.make_microbatch_iterator_with_dynamic_shapes()
+                    iterator_len = batch.get_microbatch_iterator_dynamic_shapes_len()
+                elif self.enable_seq_packing:
+                    mb_iterator = (
+                        batch.make_microbatch_iterator_for_packable_sequences()
+                    )
+                    iterator_len, max_seqlen = (
+                        batch.get_microbatch_iterator_for_packable_sequences_len()
+                    )
+                    max_batch_ct = torch.tensor([iterator_len], device="cuda")
+                    torch.distributed.all_reduce(
+                        max_batch_ct, op=torch.distributed.ReduceOp.MAX
+                    )
+
+                    # Sequence packing can end up with unevenly distributed batch counts across DP ranks.
+                    # We add dummy batches to the end of the iterator to make the batch counts equal.
+                    dummy_batch_ct = int(max_batch_ct.item() - iterator_len)
+                    dummy_iterator = (
+                        batch.make_microbatch_iterator_for_packable_sequences()
+                    )
+                    dummy_iterator = itertools.islice(
+                        itertools.cycle(dummy_iterator), dummy_batch_ct
+                    )
+                else:
+                    mb_iterator = batch.make_microbatch_iterator(mbs)
+                    iterator_len = batch.size // mbs
+
+                empty_cache_steps = self.cfg.get("dtensor_cfg", {}).get(
+                    "clear_cache_every_n_steps"
+                )
+                if empty_cache_steps:
+                    warnings.warn(
+                        f"Emptying cache every {empty_cache_steps} microbatches, doing so unnnecessarily would incur a large performance overhead."
+                    )
+
+                for mb_idx, mb in enumerate(
+                    itertools.chain(mb_iterator, dummy_iterator)
+                ):
+                    # Conditioanlly empty cache when sensitive to fragmentation
+                    if empty_cache_steps and mb_idx % empty_cache_steps == 0:
+                        torch.cuda.empty_cache()
+
+                    with torch.autocast(device_type="cuda", dtype=self.dtype):
+                        if self.enable_seq_packing:
+                            input_ids = mb.get("input_ids").cuda()
+                            input_ids, position_ids, _ = pack_sequences(
+                                input_ids=input_ids,
+                                input_lengths=mb["input_lengths"],
+                                packed_sequence_size=[
+                                    len(mb["input_lengths"])
+                                ],  # flash attention 2 expects flattened input
+                                padding_value=self.tokenizer.eos_token_id,
+                                return_attention_mask=False,
+                                min_seq_len=self.cfg["sequence_packing"][
+                                    "train_mb_tokens"
+                                ],  # TODO: this is a WAR for sequence packing, we should fix this. Without this, backward will fail when TP is enabled.
+                            )
+                            seq_len = input_ids.shape[1]
+                            attention_mask = None
+                            flash_attn_kwargs = get_flash_attention_kwargs(
+                                input_lengths=mb["input_lengths"],
+                            )
+
+                        else:
+                            input_ids = mb.get("input_ids").cuda()
+                            batch_size, seq_len = input_ids.shape
+
+                            attention_mask = torch.ones(
+                                (batch_size, seq_len),
+                                dtype=torch.bool,
+                                device=input_ids.device,
+                            )
+                            position_ids = torch.arange(
+                                seq_len, device=input_ids.device
+                            ).repeat(batch_size, 1)
+                            flash_attn_kwargs = {}
+
+                        # add vlm kwargs to model call
+                        vlm_kwargs = mb.get_multimodal_dict(
+                            as_tensors=True, device=input_ids.device
+                        )
+                        if len(vlm_kwargs) > 0:
+                            position_ids = None
+                            assert not self.cfg["dtensor_cfg"]["sequence_parallel"], (
+                                "Sequence parallel is not supported with multimodal since there's an issue when you do not pass position_ids. See https://github.com/NVIDIA-NeMo/Automodel/issues/652"
+                            )
+
+                    context_parallel_ctx = None
+                    if self.cp_size > 1:
+                        assert len(vlm_kwargs) == 0, (
+                            f"multimodal kwargs={vlm_kwargs} are not supported for context parallel"
+                        )
+                        seq_index = torch.arange(
+                            seq_len, device=input_ids.device
+                        ).repeat(1, 1)
+                        cp_buffers = (
+                            [input_ids, position_ids, seq_index]
+                            if self.cp_size > 1
+                            else []
+                        )
+
+                        # Create context parallel context
+                        context_parallel_ctx = create_context_parallel_ctx(
+                            cp_mesh=self.cp_mesh,
+                            cp_buffers=cp_buffers,
+                            cp_seq_dims=[sequence_dim] * len(cp_buffers),
+                            cp_no_restore_buffers=set(cp_buffers),
+                        )
+
+                    with get_train_context(False, False, context_parallel_ctx)():
+                        with torch.autocast(device_type="cuda", dtype=self.dtype):
+                            model_args = dict(
+                                input_ids=input_ids,
+                                attention_mask=attention_mask,
+                                position_ids=position_ids,
+                                use_cache=False,
+                                flash_attn_kwargs=flash_attn_kwargs,
+                                **vlm_kwargs,
+                            )
+
+                            if self._is_reward_model:
+                                # `flash_attn_kwarg` is not supported for `LlamaForSequenceClassification`.
+                                # Note that it should be empty anyway since sequence packing
+                                # is not supported for reward models.
+                                assert not flash_attn_kwargs
+                                del model_args["flash_attn_kwargs"]
+                            # remove flash_attn_kwargs if there are multimodal kwargs
+                            if len(vlm_kwargs) > 0:
+                                del model_args["flash_attn_kwargs"]
+
+                            if (
+                                not self.allow_flash_attn_args
+                                and "flash_attn_kwargs" in model_args
+                            ):
+                                del model_args["flash_attn_kwargs"]
+
+                            outputs = self.model(**model_args)
+
+                        # Get logprobs
+                        if not hasattr(outputs, "logits"):
+                            logits = self.model.lm_head(outputs.last_hidden_state)
+                        else:
+                            logits = outputs.logits
+                        del outputs
+
+                        # Apply temperature scaling
+                        logits = self._apply_temperature_scaling(logits)
+
+                        if self.cp_size > 1:
+                            seq_index_dtensor = (
+                                DTensor.from_local(
+                                    seq_index,
+                                    device_mesh=self.cp_mesh,
+                                    placements=[Shard(1)],
+                                )
+                                .full_tensor()
+                                .squeeze(0)
+                            )
+
+                            mb["seq_index"] = seq_index_dtensor
+
+                            for tensor_name in mb:
+                                current_tensor = mb[tensor_name]
+                                for buffer in cp_buffers:
+                                    if current_tensor is buffer:
+                                        assert type(current_tensor) == torch.Tensor, (
+                                            f"tensor {tensor_name} is not a tensor"
+                                        )
+                                        mb[tensor_name] = DTensor.from_local(
+                                            current_tensor,
+                                            device_mesh=self.cp_mesh,
+                                            placements=[Shard(sequence_dim)],
+                                        )
+                                        break
+
+                            if isinstance(logits, DTensor):
+                                # Must be tp sharded
+                                assert (
+                                    logits.device_mesh.ndim == 1
+                                    and logits.device_mesh.mesh_dim_names[0] == "tp"
+                                ), "logits must be tp sharded"
+
+                                # CP is implicitly sharded on the seq dim, so we need to redistribute to the tp dim
+                                logits = DTensor.from_local(
+                                    logits.to_local(),
+                                    device_mesh=self.device_mesh[("cp", "tp")],
+                                    placements=[Shard(sequence_dim), Shard(-1)],
+                                )
+                            else:
+                                logits = DTensor.from_local(
+                                    logits,
+                                    device_mesh=self.device_mesh[("cp", "tp")],
+                                    placements=[Shard(sequence_dim), Shard(-1)],
+                                )
+
+                        if self.enable_seq_packing:
+                            loss_fn_ = SequencePackingLossWrapper(
+                                loss_fn=loss_fn,
+                                cu_seqlens_q=flash_attn_kwargs.cu_seqlens_q,
+                                cu_seqlens_q_padded=flash_attn_kwargs.cu_seqlens_q,
+                            )
+                        else:
+                            loss_fn_ = loss_fn
+
+                        loss, loss_metrics = loss_fn_(
+                            logits,
+                            mb,
+                            global_valid_seqs,
+                            global_valid_toks,
+                        )
+                        del logits
+
+                        # skip the update for dummy batches
+                        if mb_idx < iterator_len:
+                            ## scale by the number of global batches so we get the correct
+                            ## value when summing metrics across all microbatches
+                            for k in loss_metrics.keys():
+                                loss_metrics[k] /= num_global_batches
+                            num_valid_samples = loss_metrics["num_valid_samples"]
+                            loss_metrics["lr"] = self.optimizer.param_groups[0]["lr"]
+                            loss_metrics["global_valid_seqs"] = global_valid_seqs.item()
+                            loss_metrics["global_valid_toks"] = global_valid_toks.item()
+                        else:
+                            loss *= 0
+
+                        # Backward pass
+                        if not eval_mode:
+                            ## NOTE: invalid samples should be multiplied
+                            ## by zero in the loss function to prevent them
+                            ## from affecting the gradient calculation
+
+                            # when FSDP reduces the gradients over the DP dim, they're automatically averaged
+                            # but we want to sum them so we cancel out the average here
+                            loss *= self.dp_size * self.cp_size
+                            loss.backward()
+
+                    if num_valid_samples > 0:
+                        mb_losses.append(loss.item())
+                        all_mb_metrics.append(loss_metrics)
+
+                grad_norm: Optional[float | torch.Tensor] = None
+                if not eval_mode:
+                    with torch.no_grad():
+                        grad_norm = get_grad_norm(
+                            self.model.parameters(),
+                            dp_cp_group=self.dp_cp_mesh.get_group(),
+                            tp_group=self.tp_mesh.get_group(),
+                            dtype=torch.float32,
+                        )
+                        if self.max_grad_norm is not None:
+                            clip_grad_by_total_norm_(
+                                self.model.parameters(),
+                                max_grad_norm=self.max_grad_norm,
+                                total_norm=grad_norm,
+                            )
+                        grad_norm = torch.tensor([grad_norm])
+
+                    # Update parameters
+                    self.optimizer.step()
+
+                losses.append(torch.tensor(mb_losses).sum().item())
+
+            # release gradient memory before rollouts
+            self.optimizer.zero_grad()
+            # increment scheduler after all batches in rollout are processed
+            if not eval_mode:
+                self.scheduler.step()
+            # dynamic batch and sequence dims causes alot of fragmentation, so clear
+            # the memory allocator before moving on
+            torch.cuda.empty_cache()
+
+            # Compute global loss across all ranks
+            with torch.no_grad():
+                global_loss = torch.tensor(losses, device="cuda")
+                torch.distributed.all_reduce(
+                    global_loss, group=self.dp_mesh.get_group()
+                )
+            # Aggregate metrics across all microbatches
+            mb_metrics = defaultdict(list)
+            for m in all_mb_metrics:
+                for k, v in m.items():
+                    mb_metrics[k].append(v)
+
+            metrics = {
+                "global_loss": global_loss.cpu(),
+                "grad_norm": grad_norm,
+                "rank": torch.distributed.get_rank(),
+                "gpu_name": torch.cuda.get_device_name(),
+                "model_dtype": self.dtype,
+                "all_mb_metrics": dict(mb_metrics),
+            }
+
+            return metrics
+
+    # TODO @Rayen Tian: Related Issue: Refactor shared logic between score() and get_logprobs() (https://github.com/NVIDIA-NeMo/RL/issues/1094)
+    @wrap_with_nvtx_name("dtensor_policy_worker_v2/get_logprobs")
+    def get_logprobs(
+        self, data: BatchedDataDict[Any], micro_batch_size: Optional[int] = None
+    ) -> BatchedDataDict[LogprobOutputSpec]:
+        """Get the logprobs of the model for a batch of data.
+
+        Uses the configured logprob_batch_size to do microbatching.
+
+        Input data is assumed to be right-padded. The method internally converts to
+        left-padded format for computation, and returns outputs in right-padded format.
+
+        Returns:
+          a BatchedDataDict with key "logprobs" and shape [batch_size, sequence_length].
+          We use the convention that the logprob of the first token is 0 so that the sequence length is maintained.
+          The logprob of input token i is specified at position i in the output logprobs tensor.
+        """
+        logprob_batch_size = (
+            micro_batch_size
+            if micro_batch_size is not None
+            else self.cfg["logprob_batch_size"]
+        )
+        logprob_chunk_size = self.cfg.get("logprob_chunk_size", None)
+
+        # dim 1 is always assumed to be the sequence dim, sanity check this here
+        sequence_dim = 1
+        seq_dim_size = data.get("input_ids").shape[sequence_dim]
+        for k, v in data.items():
+            if torch.is_tensor(v) and len(v.shape) > 1:
+                assert v.shape[sequence_dim] == seq_dim_size, (
+                    f"Dim 1 must be the sequence dim, expected dim 1={seq_dim_size} but got shape {v.shape}"
+                )
+
+        all_log_probs = []
+        self.model.eval()
+
+        with torch.no_grad():
+            data.to("cuda")
+            dummy_iterator = iter([])
+            if self.cfg["dynamic_batching"]["enabled"]:
+                mb_iterator = data.make_microbatch_iterator_with_dynamic_shapes()
+                iterator_len = data.get_microbatch_iterator_dynamic_shapes_len()
+            elif self.enable_seq_packing:
+                mb_iterator = data.make_microbatch_iterator_for_packable_sequences()
+                iterator_len, max_seqlen = (
+                    data.get_microbatch_iterator_for_packable_sequences_len()
+                )
+                max_batch_ct = torch.tensor([iterator_len], device="cuda")
+                torch.distributed.all_reduce(
+                    max_batch_ct, op=torch.distributed.ReduceOp.MAX
+                )
+
+                # Sequence packing can end up with unevenly distributed batch counts across DP ranks.
+                # We add dummy batches to the end of the iterator to make the batch counts equal.
+                dummy_batch_ct = int(max_batch_ct.item() - iterator_len)
+                dummy_iterator = data.make_microbatch_iterator_for_packable_sequences()
+                dummy_iterator = itertools.islice(
+                    itertools.cycle(dummy_iterator), dummy_batch_ct
+                )
+            else:
+                mb_iterator = data.make_microbatch_iterator(logprob_batch_size)
+                iterator_len = data.size // logprob_batch_size
+
+            step = 0
+            for batch_idx, lp_batch in enumerate(
+                itertools.chain(mb_iterator, dummy_iterator)
+            ):
+                step += 1
+                input_ids = lp_batch.get("input_ids").cuda()
+                input_lengths = lp_batch.get("input_lengths")
+                vlm_kwargs = lp_batch.get_multimodal_dict(
+                    as_tensors=True, device=input_ids.device
+                )
+
+                batch_size, seq_len = input_ids.shape
+                if self.enable_seq_packing:
+                    assert len(vlm_kwargs) == 0, (
+                        "multimodal kwargs are not supported for sequence packing"
+                    )
+                    input_ids, position_ids, _ = pack_sequences(
+                        input_ids=input_ids,
+                        input_lengths=input_lengths,
+                        packed_sequence_size=[
+                            batch_size
+                        ],  # flash attention 2 expects flattened input
+                        padding_value=self.tokenizer.eos_token_id,
+                        return_attention_mask=False,
+                    )
+                    seq_len = input_ids.shape[1]
+                    attention_mask = None
+                    flash_attn_kwargs = get_flash_attention_kwargs(
+                        input_lengths=input_lengths,
+                    )
+                else:
+                    # Create post_attention_mask for right-padded data for masking token after forwarding.
+                    post_attention_mask = torch.zeros(
+                        (batch_size, seq_len), dtype=torch.bool, device=input_ids.device
+                    )
+                    for i, length in enumerate(input_lengths):
+                        # For right-padded sequence, set 1s at the beginning of the sequence
+                        post_attention_mask[i, :length] = 1
+
+                    # explicitly create position ids for the input, otherwise the sharding
+                    # for DTensor will be incorrect
+                    position_ids = torch.arange(
+                        seq_len, device=input_ids.device
+                    ).repeat(batch_size, 1)
+                    flash_attn_kwargs = {}
+
+                    # DTensor requires the casual attention kernel to hit,
+                    # yet our attention mask above is not always all 1s
+                    # this is fine because we mask with the actual attention mask
+                    # later, but for input it has to be all 1s
+                    attention_mask = torch.ones(
+                        (batch_size, seq_len),
+                        dtype=torch.bool,
+                        device=input_ids.device,
+                    )
+
+                # if there are multimodal kwargs, we don't need to add position_ids (computed internally)
+                if len(vlm_kwargs) > 0:
+                    position_ids = None
+
+                context_parallel_ctx = None
+                if self.cp_size > 1:
+                    assert len(vlm_kwargs) == 0, (
+                        "multimodal kwargs are not supported for context parallel"
+                    )
+                    seq_index = torch.arange(seq_len, device=input_ids.device).repeat(
+                        1, 1
+                    )
+                    cp_buffers = [input_ids, position_ids, seq_index]
+
+                    # Create context parallel context
+                    context_parallel_ctx = create_context_parallel_ctx(
+                        cp_mesh=self.cp_mesh,
+                        cp_buffers=cp_buffers,
+                        cp_seq_dims=[sequence_dim] * len(cp_buffers),
+                        cp_no_restore_buffers=set(cp_buffers),
+                    )
+
+                with get_train_context(False, False, context_parallel_ctx)():
+                    with torch.autocast(device_type="cuda", dtype=self.dtype):
+                        model_args = dict(
+                            input_ids=input_ids,
+                            attention_mask=attention_mask,
+                            position_ids=position_ids,
+                            use_cache=False,
+                            flash_attn_kwargs=flash_attn_kwargs,
+                            **vlm_kwargs,
+                        )
+                        if len(vlm_kwargs) > 0:
+                            del model_args["flash_attn_kwargs"]
+
+                        if (
+                            not self.allow_flash_attn_args
+                            and "flash_attn_kwargs" in model_args
+                        ):
+                            del model_args["flash_attn_kwargs"]
+
+                        outputs = self.model(**model_args)
+
+                    logits = outputs.logits
+
+                    # Apply temperature scaling
+                    logits = self._apply_temperature_scaling(logits)
+
+                    if self.cp_size > 1:
+                        seq_index_tensor = (
+                            DTensor.from_local(
+                                seq_index,
+                                device_mesh=self.cp_mesh,
+                                placements=[Shard(1)],
+                            )
+                            .full_tensor()
+                            .squeeze(0)
+                        )
+
+                        input_ids_dtensor = DTensor.from_local(
+                            input_ids,
+                            device_mesh=self.cp_mesh,
+                            placements=[Shard(sequence_dim)],
+                        )
+
+                        if isinstance(logits, DTensor):
+                            # Must be tp sharded
+                            assert (
+                                logits.device_mesh.ndim == 1
+                                and logits.device_mesh.mesh_dim_names[0] == "tp"
+                            ), "logits must be tp sharded"
+
+                            # CP is implicitly sharded on the seq dim, so we need to redistribute to the tp dim
+                            logits = DTensor.from_local(
+                                logits.to_local(),
+                                device_mesh=self.device_mesh[("cp", "tp")],
+                                placements=[Shard(sequence_dim), Shard(-1)],
+                            )
+                        else:
+                            logits = DTensor.from_local(
+                                logits,
+                                device_mesh=self.device_mesh[("cp", "tp")],
+                                placements=[Shard(sequence_dim), Shard(-1)],
+                            )
+
+                        token_logprobs = get_logprobs_from_vocab_parallel_logits(
+                            logits,
+                            input_ids_dtensor,
+                            seq_index_tensor,
+                            chunk_size=logprob_chunk_size,
+                        )
+
+                        assert token_logprobs.shape[1] == seq_len - 1
+                    else:
+                        if isinstance(logits, DTensor):
+                            token_logprobs = get_logprobs_from_vocab_parallel_logits(
+                                logits,
+                                input_ids,
+                                chunk_size=logprob_chunk_size,
+                            )
+                        else:
+                            if logprob_chunk_size is not None:
+                                logits_seq_len = int(logits.shape[1])
+                                num_chunks = (
+                                    logits_seq_len + logprob_chunk_size - 1
+                                ) // logprob_chunk_size
+                                chunked_log_probs = []
+                                for chunk_idx in range(num_chunks):
+                                    chunk_start = chunk_idx * logprob_chunk_size
+                                    chunk_end = min(
+                                        logits_seq_len,
+                                        (chunk_idx + 1) * logprob_chunk_size,
+                                    )
+                                    chunk_logits = logits[
+                                        :, chunk_start:chunk_end, :
+                                    ].to(torch.float32)
+                                    log_probs = torch.nn.functional.log_softmax(
+                                        chunk_logits, dim=-1
+                                    )
+                                    chunked_log_probs.append(log_probs)
+                                log_probs = torch.cat(chunked_log_probs, dim=1)
+                                del chunked_log_probs
+                            else:
+                                logits = logits.to(torch.float32)
+                                log_probs = torch.nn.functional.log_softmax(
+                                    logits, dim=-1
+                                )
+                            # Extract logprobs for each token in the sequence by gathering the logprob
+                            # corresponding to the next token at each position
+                            # Input shapes:
+                            #   log_probs: [batch_size, sequence_length, vocab_size] - logits for each position
+                            #   token_ids: [batch_size, sequence_length] - actual tokens
+                            # Output shape: [batch_size, sequence_length] - logprob of each token given previous
+                            # We get logprob of token[t+1] from logits[t], prepending 0 to maintain sequence length
+                            next_tokens = input_ids[:, 1:]
+                            log_probs = log_probs[:, :-1]
+                            token_logprobs = log_probs.gather(
+                                dim=-1, index=next_tokens.unsqueeze(-1)
+                            ).squeeze(-1)
+                            del log_probs
+
+                del outputs, logits
+
+                token_logprobs = torch.cat(
+                    [torch.zeros_like(token_logprobs[:, :1]), token_logprobs], dim=1
+                )
+
+                # skip keeping the logprobs for the dummy batches
+                if batch_idx >= iterator_len:
+                    continue
+
+                if not self.enable_seq_packing:
+                    # Apply mask to zero out padding tokens logprobs
+                    token_logprobs = token_logprobs * post_attention_mask
+                else:
+                    # For packed sequences, unpack logprobs
+                    unpacked_logprobs = torch.zeros(
+                        (batch_size, seq_dim_size),
+                        dtype=token_logprobs.dtype,
+                        device=token_logprobs.device,
+                    )
+                    cu_seqlens = flash_attn_kwargs.cu_seqlens_q
+                    for i in range(batch_size):
+                        start = cu_seqlens[i].item() + 1
+                        end = cu_seqlens[i + 1].item()
+                        seq_len_actual = input_lengths[i].item()
+                        unpacked_logprobs[i, 1:seq_len_actual] = token_logprobs[
+                            0, start:end
+                        ]
+                    token_logprobs = unpacked_logprobs
+
+                all_log_probs.append(token_logprobs)
+
+        # Concatenate all batches
+        return_data = BatchedDataDict[LogprobOutputSpec]()
+
+        all_log_probs_padded = []
+        for lp in all_log_probs:
+            padding_needed = seq_dim_size - lp.shape[1]
+            if padding_needed > 0:
+                lp = torch.nn.functional.pad(
+                    lp, (0, padding_needed), mode="constant", value=0.0
+                )
+            all_log_probs_padded.append(lp)
+        return_data["logprobs"] = torch.cat(all_log_probs_padded, dim=0).cpu()
+
+        return return_data
+
+    # TODO @Rayen Tian: Related Issue: Refactor shared logic between score() and get_logprobs() (https://github.com/NVIDIA-NeMo/RL/issues/1094)
+    @wrap_with_nvtx_name("dtensor_policy_worker_v2/score")
+    def score(self, data: BatchedDataDict) -> BatchedDataDict[ScoreOutputSpec]:
+        global_batch_size = min(self.cfg["batch_size"], data.size)
+
+        sequence_dim = 1
+        seq_dim_size = data.get("input_ids").shape[sequence_dim]
+        for k, v in data.items():
+            if torch.is_tensor(v) and len(v.shape) > 1:
+                assert v.shape[sequence_dim] == seq_dim_size, (
+                    f"Dim 1 must be the sequence dim, expected dim 1={seq_dim_size} but got shape {v.shape}"
+                )
+        self.model.eval()
+        print("Begin to batch datas")
+        with torch.no_grad():
+            data.to("cuda")
+            dummy_iterator = iter([])
+            if self.cfg["dynamic_batching"]["enabled"]:
+                mb_iterator = data.make_microbatch_iterator_with_dynamic_shapes()
+                iterator_len = data.get_microbatch_iterator_dynamic_shapes_len()
+            elif self.enable_seq_packing:
+                mb_iterator = data.make_microbatch_iterator_for_packable_sequences()
+                iterator_len, max_seqlen = (
+                    data.get_microbatch_iterator_for_packable_sequences_len()
+                )
+                max_batch_ct = torch.tensor([iterator_len], device="cuda")
+                torch.distributed.all_reduce(
+                    max_batch_ct, op=torch.distributed.ReduceOp.MAX
+                )
+                dummy_batch_ct = int(max_batch_ct.item() - iterator_len)
+                dummy_iterator = data.make_microbatch_iterator_for_packable_sequences()
+                dummy_iterator = itertools.islice(
+                    itertools.cycle(dummy_iterator), dummy_batch_ct
+                )
+            else:
+                mb_iterator = data.make_microbatch_iterator(global_batch_size)
+                iterator_len = data.size // global_batch_size
+            step = 0
+            all_rm_scores = []
+            for batch_idx, generate_batch in enumerate(
+                itertools.chain(mb_iterator, dummy_iterator)
+            ):
+                step += 1
+                input_ids = generate_batch.get("input_ids").cuda()
+                input_lengths = generate_batch.get("input_lengths")
+                batch_size, seq_len = input_ids.shape
+                if self.enable_seq_packing:
+                    input_ids, position_ids, _ = pack_sequences(
+                        input_ids=input_ids,
+                        input_lengths=input_lengths,
+                        packed_sequence_size=[
+                            batch_size
+                        ],  # flash attention 2 expects flattened input
+                        padding_value=self.tokenizer.eos_token_id,
+                        return_attention_mask=False,
+                    )
+                    seq_len = input_ids.shape[1]
+                    attention_mask = None
+                    flash_attn_kwargs = get_flash_attention_kwargs(
+                        input_lengths=input_lengths,
+                    )
+                else:
+                    # Create attention mask for right-padded data
+                    post_attention_mask = torch.zeros(
+                        (batch_size, seq_len), dtype=torch.bool, device=input_ids.device
+                    )
+                    for i, length in enumerate(input_lengths):
+                        # For right-padded sequence, set 1s at the beginning of the sequence
+                        post_attention_mask[i, :length] = 1
+                    position_ids = torch.arange(
+                        seq_len, device=input_ids.device
+                    ).repeat(batch_size, 1)
+
+                    attention_mask = torch.ones(
+                        (batch_size, seq_len),
+                        dtype=torch.bool,
+                        device=input_ids.device,
+                    )
+                context_parallel_ctx = None
+                if self.cp_size > 1:
+                    seq_index = torch.arange(seq_len, device=input_ids.device).repeat(
+                        1, 1
+                    )
+                    cp_buffers = [input_ids, position_ids, seq_index]
+
+                    # Create context parallel context
+                    context_parallel_ctx = create_context_parallel_ctx(
+                        cp_mesh=self.cp_mesh,
+                        cp_buffers=cp_buffers,
+                        cp_seq_dims=[sequence_dim] * len(cp_buffers),
+                        cp_no_restore_buffers=set(cp_buffers),
+                    )
+                with get_train_context(False, False, context_parallel_ctx)():
+                    with torch.autocast(device_type="cuda", dtype=self.dtype):
+                        model_args = dict(
+                            input_ids=input_ids,
+                            attention_mask=attention_mask,
+                            position_ids=position_ids,
+                            use_cache=False,
+                        )
+                        outputs = self.model(**model_args)
+
+                    if not hasattr(outputs, "logits"):
+                        logits = self.model.lm_head(outputs.last_hidden_state)
+                    else:
+                        logits = outputs.logits
+                    # Apply temperature scaling
+                    logits = self._apply_temperature_scaling(logits)
+                if isinstance(logits, DTensor):
+                    logits = logits.to(torch.float32)
+                else:
+                    logits = outputs.logits.to(torch.float32)
+
+                rm_scores = to_local_if_dtensor(logits)
+                rm_scores = rm_scores.squeeze(-1)
+                all_rm_scores.append(rm_scores)
+
+        all_rm_scores = torch.cat(all_rm_scores, dim=0)
+        all_rm_scores = all_rm_scores.squeeze(-1).cpu()
+        return_data = BatchedDataDict[ScoreOutputSpec](
+            {
+                "scores": all_rm_scores,
+            }
+        )
+        return return_data
+
+    @wrap_with_nvtx_name("dtensor_policy_worker_v2/get_topk_logits")
+    def get_topk_logits(
+        self,
+        data: BatchedDataDict[Any],
+        k: int,
+        micro_batch_size: Optional[int] = None,
+    ) -> BatchedDataDict[Any]:
+        """Return per-position top-k logits and corresponding global indices.
+
+        Notes:
+        - Return shapes are [B, S, k].
+        - Computes top-k over the full sequence (no trimming of the last position).
+        - If alignment with next-token targets is required, the caller should handle it.
+        - If logits are TP-sharded DTensor, performs distributed global top-k across TP.
+        - Supports context parallelism with proper CP gather.
+        - Otherwise, computes local top-k on full-vocab tensor.
+        """
+        topk_batch_size = (
+            micro_batch_size
+            if micro_batch_size is not None
+            else self.cfg["logprob_batch_size"]
+        )
+
+        sequence_dim = 1
+        seq_dim_size = data.get("input_ids").shape[sequence_dim]
+
+        out_topk_vals = []
+        out_topk_idx = []
+        self.model.eval()
+
+        with torch.no_grad():
+            data.to("cuda")
+            dummy_iterator = iter([])
+            if self.cfg["dynamic_batching"]["enabled"]:
+                # dynamic batching support (no CP/packed)
+                mb_iterator = data.make_microbatch_iterator_with_dynamic_shapes()
+                iterator_len = data.get_microbatch_iterator_dynamic_shapes_len()
+            elif self.enable_seq_packing:
+                mb_iterator = data.make_microbatch_iterator_for_packable_sequences()
+                iterator_len, max_seqlen = (
+                    data.get_microbatch_iterator_for_packable_sequences_len()
+                )
+                max_batch_ct = torch.tensor([iterator_len], device="cuda")
+                torch.distributed.all_reduce(
+                    max_batch_ct, op=torch.distributed.ReduceOp.MAX
+                )
+
+                # Sequence packing can end up with unevenly distributed batch counts across DP ranks.
+                # We add dummy batches to the end of the iterator to make the batch counts equal.
+                dummy_batch_ct = int(max_batch_ct.item() - iterator_len)
+                dummy_iterator = data.make_microbatch_iterator_for_packable_sequences()
+                dummy_iterator = itertools.islice(
+                    itertools.cycle(dummy_iterator), dummy_batch_ct
+                )
+            else:
+                mb_iterator = data.make_microbatch_iterator(topk_batch_size)
+                iterator_len = data.size // topk_batch_size
+
+            for batch_idx, lp_batch in enumerate(
+                itertools.chain(mb_iterator, dummy_iterator)
+            ):
+                input_ids = lp_batch.get("input_ids").cuda()
+                input_lengths = lp_batch.get("input_lengths")
+
+                batch_size, seq_len = input_ids.shape
+                # Store original shapes for unpacking later
+                original_batch_size = batch_size
+                original_seq_len = seq_len
+
+                if self.enable_seq_packing:
+                    input_ids, position_ids, _ = pack_sequences(
+                        input_ids=input_ids,
+                        input_lengths=input_lengths,
+                        packed_sequence_size=[
+                            batch_size
+                        ],  # flash attention 2 expects flattened input
+                        padding_value=self.tokenizer.eos_token_id,
+                        return_attention_mask=False,
+                    )
+                    seq_len = input_ids.shape[1]
+                    attention_mask = None
+                    flash_attn_kwargs = get_flash_attention_kwargs(
+                        input_lengths=input_lengths,
+                    )
+                else:
+                    # Build attention mask (right-padded inputs)
+                    attention_mask = torch.zeros(
+                        (batch_size, seq_len), dtype=torch.long, device=input_ids.device
+                    )
+                    for i, length in enumerate(input_lengths):
+                        attention_mask[i, :length] = 1
+
+                    position_ids = torch.arange(
+                        seq_len, device=input_ids.device
+                    ).repeat(batch_size, 1)
+
+                    flash_attn_kwargs = {}
+
+                with torch.autocast(device_type="cuda", dtype=self.dtype):
+                    attention_mask_input_all_ones = torch.ones(
+                        (batch_size, seq_len), dtype=torch.long, device=input_ids.device
+                    )
+
+                context_parallel_ctx = None
+                if self.cp_size > 1:
+                    seq_index = torch.arange(seq_len, device=input_ids.device).repeat(
+                        1, 1
+                    )
+                    cp_buffers = [input_ids, position_ids, seq_index]
+
+                    # Create context parallel context
+                    context_parallel_ctx = create_context_parallel_ctx(
+                        cp_mesh=self.cp_mesh,
+                        cp_buffers=cp_buffers,
+                        cp_seq_dims=[sequence_dim] * len(cp_buffers),
+                        cp_no_restore_buffers=set(cp_buffers),
+                    )
+
+                with get_train_context(False, False, context_parallel_ctx)():
+                    with torch.autocast(device_type="cuda", dtype=self.dtype):
+                        outputs = self.model(
+                            input_ids=input_ids,
+                            attention_mask=attention_mask_input_all_ones,
+                            position_ids=position_ids,
+                            use_cache=False,
+                            flash_attn_kwargs=flash_attn_kwargs,
+                        )
+
+                    if not hasattr(outputs, "logits"):
+                        logits = self.model.lm_head(outputs.last_hidden_state)
+                    else:
+                        logits = outputs.logits
+                    del outputs
+
+                    # Apply temperature scaling
+                    logits = self._apply_temperature_scaling(logits)
+
+                    if self.cp_size > 1:
+                        if isinstance(logits, DTensor):
+                            # Must be tp sharded
+                            assert (
+                                logits.device_mesh.ndim == 1
+                                and logits.device_mesh.mesh_dim_names[0] == "tp"
+                            ), "logits must be tp sharded"
+
+                            # CP is implicitly sharded on the seq dim, so we need to redistribute to the tp dim
+                            logits = DTensor.from_local(
+                                logits.to_local(),
+                                device_mesh=self.device_mesh[("cp", "tp")],
+                                placements=[Shard(sequence_dim), Shard(-1)],
+                            )
+                        else:
+                            logits = DTensor.from_local(
+                                logits,
+                                device_mesh=self.device_mesh[("cp", "tp")],
+                                placements=[Shard(sequence_dim), Shard(-1)],
+                            )
+
+                        # deal with TP first
+                        local_logits = logits.to_local()  # [B, S_cp, V_tp]
+
+                        tp_group = self.tp_mesh.get_group()
+                        tp_rank = torch.distributed.get_rank(tp_group)
+                        V_local = int(local_logits.shape[-1])
+                        vocab_start_index = tp_rank * V_local
+                        vocab_end_index = (tp_rank + 1) * V_local
+
+                        vals, idx = distributed_vocab_topk(
+                            local_logits,
+                            k=k,
+                            tp_group=tp_group,
+                            vocab_start_index=vocab_start_index,
+                            vocab_end_index=vocab_end_index,
+                        )
+                        # [B, S_cp, k]
+
+                        cp_group = self.cp_mesh.get_group()
+
+                        vals = allgather_cp_sharded_tensor(
+                            vals, cp_group, seq_dim=sequence_dim
+                        )
+                        idx = allgather_cp_sharded_tensor(
+                            idx, cp_group, seq_dim=sequence_dim
+                        )
+                        # [B, S, k]
+                    else:
+                        # Compute top-k over full sequence length (do not drop last position)
+                        if isinstance(logits, DTensor):
+                            local_logits = logits.to_local()  # [B, S, V_local]
+                            tp_group = self.tp_mesh.get_group()
+                            tp_rank = torch.distributed.get_rank(tp_group)
+                            V_local = int(local_logits.shape[-1])
+                            vocab_start_index = tp_rank * V_local
+                            vocab_end_index = (tp_rank + 1) * V_local
+
+                            vals, idx = distributed_vocab_topk(
+                                local_logits,
+                                k=k,
+                                tp_group=tp_group,
+                                vocab_start_index=vocab_start_index,
+                                vocab_end_index=vocab_end_index,
+                            )
+                        else:
+                            full_logits = logits.to(torch.float32)
+                            vals, idx = torch.topk(full_logits, k=k, dim=-1)
+
+                # Handle sequence packing unpacking
+                if self.enable_seq_packing:
+                    # Unpack top-k results from packed format back to original batch format
+                    # vals: [1, packed_seq_len, k] -> [original_batch_size, original_seq_len, k]
+                    # idx: [1, packed_seq_len, k] -> [original_batch_size, original_seq_len, k]
+
+                    # Create tensors to store unpacked results
+                    unpacked_vals = torch.zeros(
+                        (original_batch_size, original_seq_len, k),
+                        dtype=vals.dtype,
+                        device=vals.device,
+                    )
+                    unpacked_idx = torch.zeros(
+                        (original_batch_size, original_seq_len, k),
+                        dtype=idx.dtype,
+                        device=idx.device,
+                    )
+
+                    # Get cumulative sequence lengths for unpacking
+                    cu_seqlens = flash_attn_kwargs.cu_seqlens_q
+
+                    for i in range(original_batch_size):
+                        start = cu_seqlens[i].item()
+                        end = cu_seqlens[i + 1].item()
+                        seq_len_actual = input_lengths[i].item()
+
+                        # Extract the corresponding portion from packed results
+                        # Note: vals and idx are [1, packed_seq_len, k] due to packing
+                        unpacked_vals[i, :seq_len_actual, :] = vals[0, start:end, :]
+                        unpacked_idx[i, :seq_len_actual, :] = idx[0, start:end, :]
+
+                    # Replace with unpacked results
+                    vals = unpacked_vals
+                    idx = unpacked_idx
+
+                    # Update batch_size and seq_len for consistency
+                    batch_size = original_batch_size
+                    seq_len = original_seq_len
+
+                # Keep only real sequence tokens (no trimming here; padded positions can be masked downstream)
+                # Shapes remain [B, S, k].
+                out_topk_vals.append(vals.cpu())
+                out_topk_idx.append(idx.cpu())
+
+        ret = BatchedDataDict[Any]()
+        # Pad each micro-batch result on sequence dim to common length (S), similar to get_logprobs
+        all_topk_vals_padded = []
+        all_topk_idx_padded = []
+        target_seq_len = seq_dim_size
+        for vals, idx in zip(out_topk_vals, out_topk_idx):
+            pad_needed = target_seq_len - vals.shape[1]
+            if pad_needed > 0:
+                # pad along sequence dimension (second dim): (last_dim_pad_left, last_dim_pad_right, seq_pad_left, seq_pad_right, batch_pad_left, batch_pad_right)
+                vals = torch.nn.functional.pad(
+                    vals, (0, 0, 0, pad_needed, 0, 0), mode="constant", value=0.0
+                )
+                idx = torch.nn.functional.pad(
+                    idx, (0, 0, 0, pad_needed, 0, 0), mode="constant", value=0
+                )
+            all_topk_vals_padded.append(vals)
+            all_topk_idx_padded.append(idx)
+
+        ret["topk_logits"] = (
+            torch.cat(all_topk_vals_padded, dim=0)
+            if len(all_topk_vals_padded) > 1
+            else all_topk_vals_padded[0]
+        ).cpu()
+        ret["topk_indices"] = (
+            torch.cat(all_topk_idx_padded, dim=0)
+            if len(all_topk_idx_padded) > 1
+            else all_topk_idx_padded[0]
+        ).cpu()
+        return ret
+
+    @contextmanager
+    def use_reference_model(self) -> Generator[None, None, None]:
+        """Context manager that temporarily swaps the reference model and active model.
+
+        On entry: Moves model to CPU, moves reference_model to CUDA. Swaps the references
+        On exit: Restores original references and re-flips cuda/cpu
+        """
+        with torch.no_grad():
+            try:
+                # Save train model state_dict
+                curr_state_dict = get_cpu_state_dict(
+                    self.model.state_dict().items(), pin_memory=True
+                )
+
+                # Swap reference model state_dict to self.model
+                for k, v in self.model.state_dict().items():
+                    val = to_local_if_dtensor(v)
+                    val.copy_(self.reference_model_state_dict[k])
+
+                # - self.model is the original reference_model, now on CUDA
+                # - curr_state_dict is the train model, now on CPU
+                yield
+
+            finally:
+                # Restore train model state_dict
+                for k, v in self.model.state_dict().items():
+                    val = to_local_if_dtensor(v)
+                    val.copy_(curr_state_dict[k])
+
+    @wrap_with_nvtx_name("dtensor_policy_worker_v2/get_reference_policy_logprobs")
+    def get_reference_policy_logprobs(
+        self, data: BatchedDataDict[Any], micro_batch_size: Optional[int] = None
+    ) -> BatchedDataDict[ReferenceLogprobOutputSpec]:
+        """Get the logprobs from the reference policy for a batch of data.
+
+        Returns:
+          a BatchedDataDict with key "reference_logprobs" and shape [batch_size, sequence_length].
+          We use the convention that the logprob of the first token is 0 so that the sequence length is maintained.
+          The logprob of input token i is specified at position i in the output logprobs tensor.
+        """
+        with self.use_reference_model():
+            reference_logprobs = self.get_logprobs(data, micro_batch_size)
+
+        return_data = BatchedDataDict[ReferenceLogprobOutputSpec]()
+        return_data["reference_logprobs"] = reference_logprobs["logprobs"].cpu()
+        return return_data
+
+    def _add_noise_to_weights(self) -> None:
+        """Add small Gaussian noise to the weights of the model. Note that this is used for testing purposes only."""
+        noise_std = 0.01  # Standard deviation for the noise
+        for p in self.model.parameters():
+            if p.requires_grad:
+                noise = torch.randn_like(p.data) * noise_std
+                p.data.add_(noise)  # Add noise in-place
+        torch.cuda.synchronize()
+
+    def return_state_dict(self):
+        return self.model.state_dict()
+
+    def return_model_config(self) -> dict[str, Any]:
+        """Return the model configuration as a dictionary.
+
+        Returns:
+            dict: Model configuration dictionary
+        """
+        return self.model.config
+
+    def report_device_id(self) -> str:
+        """Report the UUID of the current CUDA device using NVML.
+
+        Returns:
+            str: UUID of the device in the format "GPU-xxxxx"
+        """
+        from nemo_rl.utils.nvml import get_device_uuid
+
+        # Get current device index from torch
+        device_idx = torch.cuda.current_device()
+        # Get device UUID using NVML
+        return get_device_uuid(device_idx)
+
+    def get_zmq_address(self):
+        """Get the ZMQ address for the current device."""
+        return f"ipc:///tmp/{self.report_device_id()}.sock"
+
+    def maybe_init_zmq(self):
+        """Initialize the ZMQ socket if it doesn't exist."""
+        if not hasattr(self, "zmq_socket"):
+            self.zmq_context = zmq.Context()
+            self.zmq_socket = self.zmq_context.socket(zmq.REQ)
+            self.zmq_socket.setsockopt(
+                zmq.SNDTIMEO, 120000
+            )  # set timeout to 120 seconds
+            self.zmq_socket.setsockopt(
+                zmq.RCVTIMEO, 120000
+            )  # set timeout to 120 seconds
+            self.zmq_socket.setsockopt(zmq.LINGER, 0)
+            self.zmq_socket.bind(self.get_zmq_address())
+
+    @torch.no_grad()
+    def prepare_refit_info(self) -> Optional[dict[str, Any]]:
+        """Prepare state dict metadata for weight refitting and IPC streaming."""
+        state_dict_info = {}
+        for name, tensor in self.model.state_dict().items():
+            # all tensor will be casted to self.dtype in stream_weights_via_ipc_zmq/broadcast_weights_for_collective
+            state_dict_info[name] = (tensor.shape, self.dtype)
+
+        return state_dict_info
+
+    def get_free_memory_bytes(self) -> int:
+        """Get the available free memory."""
+        from nemo_rl.utils.nvml import get_free_memory_bytes
+
+        device_idx = torch.cuda.current_device()
+        return get_free_memory_bytes(device_idx)
+
+    @torch.no_grad()
+    @wrap_with_nvtx_name("dtensor_policy_worker_v2/stream_weights_via_ipc_zmq")
+    def stream_weights_via_ipc_zmq(self, buffer_size_bytes: int = 0) -> None:
+        """Stream model weights to peer process via ZMQ IPC socket."""
+        self.maybe_init_zmq()
+        # Manually move model to cuda for cpu offload case
+        if self.cpu_offload:
+            self.model = self.move_to_cuda(self.model)
+
+        from nemo_rl.models.policy.utils import stream_weights_via_ipc_zmq_impl
+
+        def dtensor_params_generator():
+            """Generator that yields (name, tensor) pairs, converting DTensors to local tensors."""
+            for name, tensor in self.model.state_dict().items():
+                if isinstance(tensor, DTensor):
+                    # Convert DTensor to full tensor for streaming
+                    full_tensor = tensor.full_tensor()
+                    # Convert to target dtype
+                    yield (
+                        name,
+                        full_tensor.to(self.dtype, non_blocking=True).contiguous(),
+                    )
+                else:
+                    # Convert to target dtype
+                    yield name, tensor.to(self.dtype, non_blocking=True).contiguous()
+
+        # Use the shared implementation
+        stream_weights_via_ipc_zmq_impl(
+            params_generator=dtensor_params_generator(),
+            buffer_size_bytes=buffer_size_bytes,
+            zmq_socket=self.zmq_socket,
+            rank=self.rank,
+            worker_name=str(self),
+        )
+
+    @torch.no_grad()
+    def broadcast_weights_for_collective(self) -> None:
+        """Broadcast the weights for collective communication."""
+        # Manually move model to cuda for cpu offload case
+        if self.cpu_offload:
+            print(
+                "[WARNING]: Unless you are lacking of memory, it is not recommended to enable cpu_offload when "
+                "using non-colocated generation since it will have an extra onload and offload at refit stage."
+            )
+            self.model = self.move_to_cuda(self.model)
+
+        def _dtensor_post_iter_func(tensor, dtype):
+            if isinstance(tensor, DTensor):
+                tensor = tensor.full_tensor()
+            tensor = tensor.to(dtype, non_blocking=True)
+            return tensor
+
+        # param_iterator will return (name, tensor), we only need tensor
+        dtensor_post_iter_func = lambda x: _dtensor_post_iter_func(x[1], self.dtype)
+
+        packed_broadcast_producer(
+            iterator=iter(self.model.state_dict().items()),
+            group=self.model_update_group,
+            src=0,
+            post_iter_func=dtensor_post_iter_func,
+        )
+
+        # Manually move model to cpu for cpu offload case
+        # cpu offload needs model on CPU before model forward
+        if self.cpu_offload:
+            self.model = self.move_to_cpu(self.model)
+
+    @wrap_with_nvtx_name("dtensor_policy_worker_v2/prepare_for_lp_inference")
+    def prepare_for_lp_inference(self) -> None:
+        # onload model to cuda
+        if not self.cpu_offload:
+            self.move_to_cuda(self.model)
+        else:
+            self.model = self.move_buffer_to_device(self.model, "cuda")
+
+        self.model.eval()
+
+        # offload optimizer to cpu
+        torch.randn(1).cuda()  # wake up torch allocator
+        if self.optimizer is not None and self.offload_optimizer_for_logprob:
+            self.move_optimizer_to_device("cpu")
+
+        gc.collect()
+        torch.cuda.empty_cache()
+
+    @wrap_with_nvtx_name("dtensor_policy_worker_v2/prepare_for_training")
+    def prepare_for_training(self, *args, **kwargs) -> None:
+        # onload models and optimizer state to cuda
+        if not self.cpu_offload:
+            self.move_to_cuda(self.model)
+        else:
+            # when cpu offload is enabled, the buffers do not get moved
+            # to cuda automatically, so we need to do that manually
+            self.model = self.move_buffer_to_device(self.model, "cuda")
+
+        self.model.train()
+        # Move optimizer state to CUDA if it exists
+        # colocated generation will always offload optimizer to cuda before refit
+        if (
+            self.optimizer is not None
+            and not self.cpu_offload
+            and (self.offload_optimizer_for_logprob or self.is_generation_colocated)
+        ):
+            self.move_optimizer_to_device("cuda")
+
+        torch.cuda.empty_cache()
+
+    @torch.no_grad()
+    @wrap_with_nvtx_name("dtensor_policy_worker_v2/offload_before_refit")
+    def offload_before_refit(self) -> None:
+        """Offload the optimizer to the CPU."""
+        torch.randn(1).cuda()  # wake up torch allocator
+        if self.optimizer is not None:
+            self.move_optimizer_to_device("cpu")
+
+        gc.collect()
+        torch.cuda.empty_cache()
+
+    @torch.no_grad()
+    @wrap_with_nvtx_name("dtensor_policy_worker_v2/offload_after_refit")
+    def offload_after_refit(self) -> None:
+        """Offload as much as possible on the CPU."""
+        self.model = self.move_to_cpu(self.model)
+        self.model.eval()
+        torch.randn(1).cuda()  # wake up torch allocator
+        self.offload_before_refit()  # rerun the old offload function
+
+        # Print memory stats after offloading
+        allocated = torch.cuda.memory_allocated() / (1024**3)  # Convert to GB
+        reserved = torch.cuda.memory_reserved() / (1024**3)  # Convert to GB
+        print(
+            f"GPU Memory after optimizer offload: {allocated:.2f}GB allocated, {reserved:.2f}GB reserved"
+        )
+
+    def move_optimizer_to_device(self, device: str | torch.device) -> None:
+        for state in self.optimizer.state.values():
+            for k, v in state.items():
+                if isinstance(v, (DTensor, torch.Tensor)):
+                    state[k] = v.to(device)
+
+    def move_to_device(self, model: nn.Module, device: str | torch.device) -> nn.Module:
+        model = self.move_buffer_to_device(model, device)
+        return model.to(device)
+
+    def move_buffer_to_device(
+        self, model: nn.Module, device: str | torch.device
+    ) -> nn.Module:
+        # FSDP modules do not move buffers to the device automatically
+        for v in model.buffers():
+            v.data = v.data.to(device)
+
+        return model
+
+    def move_to_cuda(self, model: torch.nn.Module) -> torch.nn.Module:
+        model = self.move_to_device(model, "cuda")
+        gc.collect()
+        torch.cuda.empty_cache()
+        return model
+
+    def move_to_cpu(self, model: torch.nn.Module) -> torch.nn.Module:
+        model = self.move_to_device(model, "cpu")
+        gc.collect()
+        torch.cuda.empty_cache()
+        return model
+
+    def save_checkpoint(
+        self,
+        weights_path: str,
+        optimizer_path: Optional[str] = None,
+        tokenizer_path: Optional[str] = None,
+        checkpointing_cfg: Optional[CheckpointingConfig] = None,
+    ) -> None:
+        """Save a checkpoint of the model.
+
+        the optimizer states are saved only if `optimizer` and `optimizer_path` are provided.
+        """
+        if checkpointing_cfg is None:
+            raise ValueError(
+                "checkpointing_cfg must be provided when saving checkpoint"
+            )
+
+        # Extract only the checkpointing configuration keys that exist
+        checkpoint_kwargs = {
+            key: value
+            for key, value in checkpointing_cfg.items()
+            if key
+            in {
+                "model_save_format",
+                "save_consolidated",
+                "is_peft",
+                "peft_config",
+            }
+        }
+
+        save_checkpoint(
+            model=self.model,
+            weights_path=weights_path,
+            optimizer=self.optimizer if optimizer_path else None,
+            scheduler=self.scheduler if optimizer_path else None,
+            optimizer_path=optimizer_path,
+            tokenizer=self.tokenizer if tokenizer_path else None,
+            tokenizer_path=tokenizer_path,
+            model_state_dict_keys=self.model_state_dict_keys,
+            **checkpoint_kwargs,
+        )
+
+    def load_checkpoint(
+        self,
+        weights_path: str,
+        optimizer_path: Optional[str] = None,
+    ) -> None:
+        """Load a checkpoint into the model."""
+        load_checkpoint(
+            model=self.model,
+            weights_path=weights_path,
+            optimizer=self.optimizer if optimizer_path else None,
+            scheduler=self.scheduler if optimizer_path else None,
+            optimizer_path=optimizer_path,
+        )
+
+    def shutdown(self) -> None:
+        """Shutdown the policy."""
+        # Clean up extension resources like ZMQ sockets
+        if hasattr(self, "zmq_socket"):
+            self.zmq_socket.close()
+            self.zmq_context.term()
+
+    def start_gpu_profiling(self) -> None:
+        """Start GPU profiling."""
+        torch.cuda.profiler.start()
+
+    def stop_gpu_profiling(self) -> None:
+        """Stop GPU profiling."""
+        torch.cuda.profiler.stop()
+
+    def report_node_ip_and_gpu_id(self) -> list[tuple[str, int]]:
+        """Report the node IP and GPU ID of the current worker."""
+        ip = ray._private.services.get_node_ip_address()
+        gpu_id = ray.get_gpu_ids()[0]
+        return (ip, gpu_id)
diff --git a/nemo_rl/models/policy/interfaces.py b/nemo_rl/models/policy/interfaces.py
index 96b44957ff..e221621403 100644
--- a/nemo_rl/models/policy/interfaces.py
+++ b/nemo_rl/models/policy/interfaces.py
@@ -34,6 +34,19 @@ class ReferenceLogprobOutputSpec(TypedDict):
     reference_logprobs: torch.Tensor
 
 
+class ScoreOutputSpec(TypedDict):
+    """scores: Tensor of scores."""
+
+    scores: torch.Tensor
+
+
+class TopkLogitsOutputSpec(TypedDict):
+    """Per-position top-k logits and corresponding global token indices."""
+
+    topk_logits: torch.Tensor
+    topk_indices: torch.Tensor
+
+
 class PolicyInterface(ABC):
     """Abstract base class defining the interface for RL policies."""
 
@@ -67,6 +80,20 @@ def get_reference_policy_logprobs(
         """
         pass
 
+    @abstractmethod
+    def get_topk_logits(
+        self,
+        data: BatchedDataDict[GenerationDatumSpec],
+        k: int,
+        micro_batch_size: Optional[int] = None,
+    ) -> BatchedDataDict[TopkLogitsOutputSpec]:
+        """Get per-position top-k logits and global indices for a batch of inputs.
+
+        Notes:
+            - Aligns to next-token positions → returns S-1 positions.
+        """
+        pass
+
     @abstractmethod
     def train(
         self,
@@ -87,6 +114,13 @@ def train(
         """
         pass
 
+    @abstractmethod
+    def score(
+        self, data: BatchedDataDict[GenerationDatumSpec]
+    ) -> BatchedDataDict[ScoreOutputSpec]:
+        """Score a batch of data using the policy."""
+        pass
+
     @abstractmethod
     def prepare_for_training(self, *args: Any, **kwargs: Any) -> None:
         pass
@@ -107,7 +141,7 @@ def shutdown(self) -> bool:
 class ColocatablePolicyInterface(PolicyInterface):
     @abstractmethod
     def init_collective(
-        self, ip: str, port: int, world_size: int
+        self, ip: str, port: int, world_size: int, *, train_world_size: int
     ) -> list[ray.ObjectRef]:
         pass
 
@@ -124,11 +158,9 @@ def prepare_refit_info(self) -> Optional[dict[str, Any]]:
         pass
 
     @abstractmethod
-    def prepare_weights_for_ipc(self, *args: Any, **kwargs: Any) -> list[list[str]]:
-        pass
-
-    @abstractmethod
-    def get_weights_ipc_handles(self, keys: list[str]) -> dict[str, Any]:
+    def stream_weights_via_ipc_zmq(
+        self, *args: Any, **kwargs: Any
+    ) -> list[ray.ObjectRef]:
         pass
 
     @abstractmethod
diff --git a/nemo_rl/models/policy/lm_policy.py b/nemo_rl/models/policy/lm_policy.py
index ebc608e35d..c1fde9bcf5 100644
--- a/nemo_rl/models/policy/lm_policy.py
+++ b/nemo_rl/models/policy/lm_policy.py
@@ -11,14 +11,17 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import math
 import os
+import warnings
 from collections import defaultdict
 from typing import Any, Optional, Union
 
 import numpy as np
 import ray
+import torch
 from ray.util.queue import Queue as RayQueue
-from transformers import PreTrainedTokenizerBase
+from transformers import AutoProcessor, PreTrainedTokenizerBase
 
 from nemo_rl.algorithms.interfaces import LossFunction
 from nemo_rl.distributed.batched_data_dict import (
@@ -40,6 +43,14 @@
     ColocatablePolicyInterface,
     LogprobOutputSpec,
     ReferenceLogprobOutputSpec,
+    ScoreOutputSpec,
+    TopkLogitsOutputSpec,
+)
+from nemo_rl.utils.checkpoint import CheckpointingConfig
+from nemo_rl.utils.flops_tracker import (
+    FLOPTracker,
+    get_default_hf_config,
+    get_theoretical_tflops,
 )
 
 PathLike = Union[str, "os.PathLike[Any]"]
@@ -57,6 +68,7 @@ def __init__(
         weights_path: Optional[PathLike] = None,
         optimizer_path: Optional[PathLike] = None,
         init_reference_model: bool = True,
+        processor: Optional[AutoProcessor] = None,
     ):
         if weights_path:
             weights_path = os.path.abspath(weights_path)
@@ -68,7 +80,13 @@ def __init__(
         pp_size = 1
         cp_size = 1
 
-        megatron_enable = config.get("megatron_cfg", {}).get("enabled", False)
+        megatron_enable = bool(config.get("megatron_cfg", {}).get("enabled", False))
+        dtensor_enable = bool(config.get("dtensor_cfg", {}).get("enabled", False))
+        if megatron_enable and dtensor_enable:
+            raise ValueError(
+                "Configure either Megatron (policy.megatron_cfg.enabled=true) or "
+                "DTensor (policy.dtensor_cfg.enabled=true), not both."
+            )
         if megatron_enable:
             worker_builder_cls = (
                 "nemo_rl.models.policy.megatron_policy_worker.MegatronPolicyWorker"
@@ -78,19 +96,55 @@ def __init__(
             cp_size = config["megatron_cfg"]["context_parallel_size"]
 
             env_vars = config["megatron_cfg"].get("env_vars", {})
+
+            if "TORCH_CUDA_ARCH_LIST" not in os.environ:
+                raise RuntimeError(
+                    "TORCH_CUDA_ARCH_LIST is not set. This is required in Megatron backend. This variable is set in our container, but "
+                    "if you are running a custom container or baremetal, you may need to set this variable manually. Example: export TORCH_CUDA_ARCH_LIST='9.0 10.0'"
+                )
+
         else:
-            assert config["dtensor_cfg"]["enabled"], (
-                "Please either set policy.megatron_cfg.enabled=true to use Megatron training backend "
-                "or set policy.dtensor_cfg.enabled=true to use DTensor training backend."
-            )
-            worker_builder_cls = (
-                "nemo_rl.models.policy.dtensor_policy_worker.DTensorPolicyWorker"
-            )
+            if not dtensor_enable:
+                raise ValueError(
+                    "Please either set policy.megatron_cfg.enabled=true to use Megatron training backend "
+                    "or set policy.dtensor_cfg.enabled=true to use DTensor training backend."
+                )
+
+            # Check if _v2 is enabled in dtensor_cfg (defaults to False for backward compatibility)
+            use_v2 = config.get("dtensor_cfg", {}).get("_v2", False)
+            if use_v2:
+                worker_builder_cls = "nemo_rl.models.policy.dtensor_policy_worker_v2.DTensorPolicyWorkerV2"
+            else:
+                worker_builder_cls = (
+                    "nemo_rl.models.policy.dtensor_policy_worker.DTensorPolicyWorker"
+                )
+
             tp_size = config["dtensor_cfg"]["tensor_parallel_size"]
             cp_size = config["dtensor_cfg"]["context_parallel_size"]
 
             env_vars = config["dtensor_cfg"].get("env_vars", {})
 
+        # Validate world_size compatibility with parallelism configuration
+        model_parallel_size = pp_size * cp_size * tp_size
+        actual_world_size = cluster.world_size()
+
+        if actual_world_size < model_parallel_size:
+            raise ValueError(
+                f"World size ({actual_world_size}) is insufficient for the parallelism configuration. "
+                f"Required minimum world size: PP({pp_size}) * CP({cp_size}) * TP({tp_size}) = {model_parallel_size}. "
+                f"This would result in DP = {actual_world_size}/{model_parallel_size} = {actual_world_size / model_parallel_size:.3f}, but DP must be ≥ 1. "
+                f"Please either increase the number of GPUs/nodes or reduce the parallelism parameters."
+            )
+
+        if actual_world_size % model_parallel_size != 0:
+            dp_size_float = actual_world_size / model_parallel_size
+            raise ValueError(
+                f"World size ({actual_world_size}) must be divisible by PP * CP * TP ({model_parallel_size}). "
+                f"The data parallel size (DP = world_size / (PP * CP * TP)) must be a positive integer. "
+                f"Current DP would be {actual_world_size}/{model_parallel_size} = {dp_size_float:.6f}, which is not an integer. "
+                f"Please adjust your cluster size or parallelism parameters."
+            )
+
         self.sharding_annotations = NamedSharding(
             layout=np.arange(cluster.world_size()).reshape(
                 pp_size,  # PP
@@ -111,6 +165,7 @@ def __init__(
             worker_builder_cls,
             config,
             tokenizer=tokenizer,
+            processor=processor,
             init_optimizer=init_optimizer,
             weights_path=weights_path,
             optimizer_path=optimizer_path,
@@ -119,14 +174,33 @@ def __init__(
             pre_init_communication_queue=pre_init_queue,
         )
 
-        self.worker_group = RayWorkerGroup(
-            cluster,
-            worker_builder,
-            name_prefix=name_prefix,
-            workers_per_node=workers_per_node,
-            sharding_annotations=self.sharding_annotations,
-            env_vars=env_vars,
-        )
+        if cluster._sorted_bundle_indices is not None:
+            # The cluster has initialized a unified placemenet group across nodes
+            # In this case, we need to create workers based on sorted bundle indices
+            group_size = cluster.num_gpus_per_node
+            tied_groups = [
+                (i // group_size, [bundle_idx])
+                for i, bundle_idx in enumerate(cluster._sorted_bundle_indices)
+            ]
+
+            self.worker_group = RayWorkerGroup(
+                cluster,
+                worker_builder,
+                name_prefix=name_prefix,
+                bundle_indices_list=tied_groups,
+                sharding_annotations=self.sharding_annotations,
+                env_vars=env_vars or {},
+            )
+
+        else:
+            self.worker_group = RayWorkerGroup(
+                cluster,
+                worker_builder,
+                name_prefix=name_prefix,
+                workers_per_node=workers_per_node,
+                sharding_annotations=self.sharding_annotations,
+                env_vars=env_vars or {},
+            )
 
         if config["dynamic_batching"]["enabled"]:
             assert pp_size == 1, (
@@ -147,19 +221,35 @@ def __init__(
         else:
             self.use_dynamic_batches = False
 
+        # initialize FLOPs tracker
+        try:
+            self.flops_tracker = FLOPTracker.from_config(
+                config["model_name"], get_default_hf_config(config["model_name"])
+            )
+        except ValueError as e:
+            self.flops_tracker = None
+            print(f"FLOPS tracker not supported for model {config['model_name']}: {e}")
+
         if config["sequence_packing"]["enabled"]:
             self.use_sequence_packing = True
+            sequence_length_pad_multiple = (
+                cp_size * 2 * tp_size if cp_size > 1 else tp_size
+            )
+            if (
+                config["megatron_cfg"]["enabled"]
+                and config["megatron_cfg"].get("fp8_cfg", None) is not None
+                and config["megatron_cfg"]["fp8_cfg"].get("enabled", False)
+            ):
+                # if fp8 is enabled, ensure the sequence is padded to multiples of 16
+                # Ref: https://github.com/NVIDIA/TransformerEngine/blob/5b3092a0e40654436bec5ea0a0b0f7ad2887b20d/transformer_engine/pytorch/utils.py#L437-L441
+                sequence_length_pad_multiple = math.lcm(
+                    16, sequence_length_pad_multiple
+                )
             self.sequence_packing_args: SequencePackingArgs = {
-                "train_mb_tokens": config["sequence_packing"]["train_mb_tokens"],
-                "logprob_mb_tokens": config["sequence_packing"].get(
-                    "logprob_mb_tokens", None
-                ),
                 "algorithm": config["sequence_packing"]["algorithm"],
                 "input_key": "input_ids",
                 "input_lengths_key": "input_lengths",
-                "sequence_length_pad_multiple": (cp_size * 2 * tp_size)
-                if cp_size > 1
-                else tp_size,
+                "sequence_length_pad_multiple": sequence_length_pad_multiple,
             }
             assert not config["dynamic_batching"]["enabled"], (
                 "Sequence Packing is exclusive of Dynamic Batching. Please disable Dynamic Batching"
@@ -170,11 +260,15 @@ def __init__(
         self.cfg = config
 
     def init_collective(
-        self, ip: str, port: int, world_size: int
+        self, ip: str, port: int, world_size: int, *, train_world_size: int
     ) -> list[ray.ObjectRef]:
         """Initialize the collective communication."""
         futures = self.worker_group.run_all_workers_single_data(
-            "init_collective", ip=ip, port=port, world_size=world_size
+            "init_collective",
+            ip=ip,
+            port=port,
+            world_size=world_size,
+            train_world_size=train_world_size,
         )
         # this function should co-work with vllm, so we should wait for all futures to complete outside
         return futures
@@ -309,6 +403,72 @@ def get_reference_policy_logprobs(
 
         return logprobs
 
+    def get_topk_logits(
+        self,
+        data: BatchedDataDict[GenerationDatumSpec],
+        k: int,
+        micro_batch_size: Optional[int] = None,
+    ) -> BatchedDataDict[TopkLogitsOutputSpec]:
+        """Dispatch get_topk_logits to workers (no CP/packed support initially)."""
+        dp_size = self.sharding_annotations.get_axis_size("data_parallel")
+        sharded_data: list[SlicedDataDict]
+        unsorted_data_indices: list[int]
+        if self.use_dynamic_batches:
+            self.dynamic_batching_args["max_tokens_per_microbatch"] = self.cfg[
+                "dynamic_batching"
+            ]["logprob_mb_tokens"]
+            sharded_data, unsorted_data_indices = data.shard_by_batch_size(  # type: ignore
+                dp_size,
+                batch_size=None,
+                dynamic_batching_args=self.dynamic_batching_args,
+            )
+        elif self.use_sequence_packing:
+            self.sequence_packing_args["max_tokens_per_microbatch"] = self.cfg[
+                "sequence_packing"
+            ]["logprob_mb_tokens"]
+            # we just shard into DP shards here as Sequence packing allows for CP.
+            sharded_data, unsorted_data_indices = data.shard_by_batch_size(
+                dp_size,
+                batch_size=None,
+                sequence_packing_args=self.sequence_packing_args,
+            )
+        else:
+            sharded_data = data.shard_by_batch_size(  # type: ignore
+                dp_size,
+                batch_size=None,
+            )
+
+        futures = self.worker_group.run_all_workers_sharded_data(
+            "get_topk_logits",
+            data=sharded_data,
+            in_sharded_axes=["data_parallel"],
+            replicate_on_axes=[
+                "context_parallel",
+                "tensor_parallel",
+                "pipeline_parallel",
+            ],
+            output_is_replicated=[
+                "context_parallel",
+                "tensor_parallel",
+                "pipeline_parallel",
+            ],
+            common_kwargs={"k": k, "micro_batch_size": micro_batch_size},
+        )
+
+        # Avoid BatchedDataDict.from_batches here because it flattens rows for tensors with ndim>2 ([B,S,k] -> [B,S*k]).
+        worker_batches = self.worker_group.get_all_worker_results(futures)
+        all_topk_logits = [wb["topk_logits"] for wb in worker_batches]
+        all_topk_indices = [wb["topk_indices"] for wb in worker_batches]
+
+        stacked: BatchedDataDict[TopkLogitsOutputSpec] = BatchedDataDict()
+        stacked["topk_logits"] = torch.cat(all_topk_logits, dim=0)
+        stacked["topk_indices"] = torch.cat(all_topk_indices, dim=0)
+
+        if self.use_dynamic_batches or self.use_sequence_packing:
+            stacked.reorder_data(unsorted_data_indices)
+
+        return stacked
+
     def train(
         self,
         data: BatchedDataDict[Any],
@@ -346,6 +506,12 @@ def train(
                 batch_size=batch_size,
             )
 
+        if self.flops_tracker is not None:
+            self.flops_tracker.reset()
+            for shard in sharded_data:
+                input_lengths = shard["input_lengths"]
+                self.flops_tracker.track_batch(input_lengths.tolist())
+
         # Train each shard in parallel
         futures = self.worker_group.run_all_workers_sharded_data(
             "train",
@@ -376,6 +542,19 @@ def train(
             "grad_norm": results[0]["grad_norm"],
         }
 
+        if self.flops_tracker is not None:
+            aggregated_results["total_flops"] = self.flops_tracker.total_flops
+            aggregated_results["num_ranks"] = self.worker_group.cluster.world_size()
+            gpus_per_worker = self.worker_group.cluster.world_size() / len(results)
+
+            try:
+                aggregated_results["theoretical_tflops"] = gpus_per_worker * sum(
+                    get_theoretical_tflops(r["gpu_name"], r["model_dtype"])
+                    for r in results
+                )
+            except Exception as e:
+                warnings.warn(f"Error getting theoretical flops: {e}")
+
         # Aggregate metrics across all workers
         all_mb_metrics = defaultdict(list)
         for r in results:
@@ -410,7 +589,7 @@ def generate(
         assert self.cfg["generation"] is not None, "Generation config is not set"
         result: BatchedDataDict[GenerationOutputSpec] = BatchedDataDict.from_batches(
             self.worker_group.get_all_worker_results(futures),
-            pad_value_dict={"output_ids": self.cfg["generation"]["pad_token_id"]},
+            pad_value_dict={"output_ids": self.cfg["generation"]["_pad_token_id"]},
         )
 
         # Verify the output has all required fields
@@ -428,6 +607,51 @@ def generate(
 
         return result
 
+    def score(
+        self, data: BatchedDataDict[GenerationDatumSpec]
+    ) -> BatchedDataDict[ScoreOutputSpec]:
+        """Score a batch of data using the policy."""
+        # Verify input data is right-padded
+        assert isinstance(data, BatchedDataDict), (
+            f"data must be a BatchedDataDict, got type: {type(data)}"
+        )
+        assert "input_ids" in data and "input_lengths" in data, (
+            "Missing required input fields"
+        )
+
+        dp_size = self.sharding_annotations.get_axis_size("data_parallel")
+        sharded_data = data.shard_by_batch_size(dp_size, batch_size=None)
+        futures = self.worker_group.run_all_workers_sharded_data(
+            "score",
+            data=sharded_data,
+            in_sharded_axes=["data_parallel"],
+            replicate_on_axes=[
+                "context_parallel",
+                "tensor_parallel",
+                "pipeline_parallel",
+            ],
+            output_is_replicated=[
+                "context_parallel",
+                "tensor_parallel",
+                "pipeline_parallel",
+            ],
+            common_kwargs={},
+        )
+
+        result: BatchedDataDict[ScoreOutputSpec] = BatchedDataDict.from_batches(
+            self.worker_group.get_all_worker_results(futures),
+        )
+        required_keys = [
+            "scores",
+        ]
+        missing_keys = [key for key in required_keys if key not in result]
+        if missing_keys:
+            raise ValueError(
+                f"Missing required keys for ScoreOutputSpec: {missing_keys}"
+            )
+
+        return result
+
     def prepare_for_generation(self, *args: Any, **kwargs: Any) -> bool:
         # We don't need to do anything here
         return True
@@ -447,6 +671,10 @@ def finish_generation(self, *args: Any, **kwargs: Any) -> bool:
         # We don't need to do anything here
         return True
 
+    def invalidate_kv_cache(self, *args: Any, **kwargs: Any) -> bool:
+        # We don't need to do anything here
+        return True
+
     def finish_training(self, *args: Any, **kwargs: Any) -> None:
         # Placeholder implementation
         pass
@@ -462,69 +690,19 @@ def prepare_refit_info(self) -> Optional[dict[str, Any]]:
         # Only get the first worker's info since all workers will have the same result
         return results[0]
 
-    def prepare_weights_for_ipc(
-        self, _refit_buffer_size_gb: Optional[int] = None
-    ) -> list[list[str]]:
-        """Prepare the weights for IPC.
+    def get_free_memory_bytes(self) -> int:
+        """Get the available free memory."""
+        futures = self.worker_group.run_all_workers_single_data("get_free_memory_bytes")
+        # minimum free memory from all workers for safety
+        free_memory_bytes = min(ray.get(future) for future in futures)
+        return free_memory_bytes
 
-        Returns:
-            list: A list containing the keys of the parameters, which is grouped by size.
-        """
-        # Get the state_dict_info and available memory from all workers
+    def stream_weights_via_ipc_zmq(self, buffer_size_bytes: int) -> list[ray.ObjectRef]:
+        """Send the weights for IPC handles via ZMQ socket."""
         futures = self.worker_group.run_all_workers_single_data(
-            "prepare_weights_for_ipc"
-        )
-        results = ray.get(futures)
-
-        # Only get the first worker's state_dict_info since all workers will have the same result
-        state_dict_info = results[0][0]
-
-        if _refit_buffer_size_gb is not None:
-            total_available_bytes = _refit_buffer_size_gb * (1024**3)
-        else:
-            # Get the minimum available memory from all workers
-            total_available_bytes = min(result[1] for result in results)
-
-        # Group tensors by size
-        cur_available_bytes = total_available_bytes
-        grouped_param_keys: list[list[str]] = []
-        keys: list[str] = []
-
-        for key, size_in_bytes in state_dict_info:
-            if size_in_bytes > cur_available_bytes:
-                if keys:
-                    grouped_param_keys.append(keys)
-                    keys = []
-                cur_available_bytes = total_available_bytes
-
-            keys.append(key)
-            cur_available_bytes -= size_in_bytes
-
-        if keys:
-            grouped_param_keys.append(keys)
-
-        return grouped_param_keys
-
-    def get_weights_ipc_handles(self, keys: list[str]) -> dict[str, Any]:
-        """Fetch weight IPC handles from all workers.
-
-        Returns:
-            dict: A dictionary mapping device UUIDs to parameter IPC handles.
-        """
-        # Collect IPC handles from all workers
-        worker_handles: list[dict[str, Any]] = ray.get(
-            [
-                worker.get_weights_ipc_handles.remote(keys=keys)
-                for worker in self.worker_group.workers
-            ]
+            "stream_weights_via_ipc_zmq", buffer_size_bytes=buffer_size_bytes
         )
-
-        # Combine all worker handles into a single dictionary
-        all_handles = {}
-        for handle in worker_handles:
-            all_handles.update(handle)
-
-        return all_handles
+        return futures
 
     def broadcast_weights_for_collective(self) -> list[ray.ObjectRef]:
         """Broadcast the weights for collective communication."""
@@ -549,14 +727,34 @@ def save_checkpoint(
         weights_path: str,
         optimizer_path: Optional[str] = None,
         tokenizer_path: Optional[str] = None,
+        checkpointing_cfg: Optional[CheckpointingConfig] = None,
     ) -> None:
         """Save a checkpoint of the model."""
-        futures = self.worker_group.run_all_workers_single_data(
-            "save_checkpoint",
-            weights_path=weights_path,
-            optimizer_path=optimizer_path,
-            tokenizer_path=tokenizer_path,
-        )
+        # Only pass checkpointing_cfg for DTensor v2
+        use_v2 = self.cfg.get("dtensor_cfg", {}).get("_v2", False)
+
+        if use_v2:
+            futures = self.worker_group.run_all_workers_single_data(
+                "save_checkpoint",
+                weights_path=weights_path,
+                optimizer_path=optimizer_path,
+                tokenizer_path=tokenizer_path,
+                checkpointing_cfg=checkpointing_cfg,
+            )
+        else:
+            if (
+                checkpointing_cfg is not None
+                and checkpointing_cfg.get("model_save_format", None) is not None
+            ):
+                raise ValueError(
+                    "model_save_format must be None or omitted if using DTensorPolicyWorker (_v2=False)."
+                )
+            futures = self.worker_group.run_all_workers_single_data(
+                "save_checkpoint",
+                weights_path=weights_path,
+                optimizer_path=optimizer_path,
+                tokenizer_path=tokenizer_path,
+            )
         ray.get(futures)
 
     def shutdown(self) -> bool:
@@ -575,7 +773,8 @@ def __del__(self) -> None:
         the object is lost due to leaving a function scope. It's always recommended that the
         user calls worker_group.shutdown().
         """
-        self.worker_group.shutdown()
+        if hasattr(self, "worker_group"):
+            self.worker_group.shutdown(cleanup_method="shutdown")
 
     def start_gpu_profiling(self) -> None:
         """Start GPU profiling."""
@@ -586,3 +785,36 @@ def stop_gpu_profiling(self) -> None:
         """Stop GPU profiling."""
         futures = self.worker_group.run_all_workers_single_data("stop_gpu_profiling")
         ray.get(futures)
+
+    def print_node_ip_and_gpu_id(self) -> list[tuple[str, int]]:
+        """Print the node IP and GPU ID of the current worker."""
+        results = ray.get(
+            self.worker_group.run_all_workers_single_data(
+                "report_node_ip_and_gpu_id",
+            )
+        )
+        all_node_ips = sorted(set([result[0] for result in results]))
+        all_gpu_ids = sorted(set([result[1] for result in results]))
+
+        worker_id_list = [
+            [list() for _ in range(len(all_gpu_ids))] for _ in range(len(all_node_ips))
+        ]
+        for worker_id, (ip, gpu_id) in enumerate(results):
+            node_idx = all_node_ips.index(ip)
+            gpu_idx = all_gpu_ids.index(gpu_id)
+            worker_id_list[node_idx][gpu_idx].append("worker-" + str(worker_id))
+
+        from prettytable import PrettyTable
+
+        table = PrettyTable()
+        table.title = "Policy worker mapping to Nodes and GPUs"
+        table.field_names = ["Node_IP"] + [
+            "GPU_ID=" + str(gpu_id) for gpu_id in all_gpu_ids
+        ]
+        for i, node_idx in enumerate(all_node_ips):
+            row = [node_idx]
+            for j in range(len(all_gpu_ids)):
+                row.append(tuple(worker_id_list[i][j]))
+            table.add_row(row)
+
+        print(table)
diff --git a/nemo_rl/models/policy/megatron_policy_worker.py b/nemo_rl/models/policy/megatron_policy_worker.py
index 9793ea8e9c..c507b43acb 100644
--- a/nemo_rl/models/policy/megatron_policy_worker.py
+++ b/nemo_rl/models/policy/megatron_policy_worker.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import gc
+import math
 import os
 import time
 import warnings
@@ -22,9 +23,47 @@
 
 import ray
 import torch
+import zmq
+from megatron.bridge import AutoBridge
+from megatron.bridge.models.model_provider import get_model
+from megatron.bridge.training import fault_tolerance
+from megatron.bridge.training.checkpointing import (
+    checkpoint_exists,
+    init_checkpointing_context,
+    load_checkpoint,
+    maybe_finalize_async_save,
+    save_checkpoint,
+)
+from megatron.bridge.training.config import (
+    CheckpointConfig,
+    ConfigContainer,
+    DistributedDataParallelConfig,
+    LoggerConfig,
+    OptimizerConfig,
+    SchedulerConfig,
+    TokenizerConfig,
+    TrainingConfig,
+)
+from megatron.bridge.training.initialize import (
+    initialize_megatron,
+    set_jit_fusion_options,
+)
+from megatron.bridge.training.optim import setup_optimizer
+from megatron.bridge.training.setup import (
+    _update_model_config_funcs,
+)
+from megatron.bridge.training.state import GlobalState
+from megatron.bridge.training.tokenizers.tokenizer import build_tokenizer
+from megatron.bridge.training.utils.train_utils import (
+    logical_and_across_model_parallel_group,
+    reduce_max_stat_across_model_parallel_group,
+)
+from megatron.bridge.utils.common_utils import get_rank_safe
+from megatron.bridge.utils.instantiate_utils import InstantiationMode
+from megatron.bridge.utils.vocab_utils import calculate_padded_vocab_size
 from megatron.core import parallel_state
 from megatron.core.distributed import DistributedDataParallel
-from megatron.core.distributed.custom_fsdp import (
+from megatron.core.distributed.fsdp.mcore_fsdp_adapter import (
     FullyShardedDataParallel as custom_FSDP,
 )
 from megatron.core.inference.engines import (
@@ -36,6 +75,9 @@
 from megatron.core.inference.text_generation_controllers.text_generation_controller import (
     TextGenerationController,
 )
+from megatron.core.inference.text_generation_server.run_mcore_engine import (
+    run_mcore_engine,
+)
 from megatron.core.models.gpt import GPTModel
 from megatron.core.optimizer import ChainedOptimizer
 from megatron.core.parallel_state import (
@@ -51,44 +93,17 @@
 )
 from megatron.core.pipeline_parallel import get_forward_backward_func
 from megatron.core.rerun_state_machine import get_rerun_state_machine
-from megatron.inference.text_generation.mcore_engine_server import (
-    run_mcore_engine,
-)
+from megatron.core.transformer.module import Float16Module
+from megatron.core.transformer.transformer_config import TransformerConfig
 from megatron.training.utils import get_ltor_masks_and_position_ids
-from nemo.tron import fault_tolerance
-from nemo.tron.checkpointing import checkpoint_exists, load_checkpoint, save_checkpoint
-from nemo.tron.config import (
-    CheckpointConfig,
-    ConfigContainer,
-    DistributedDataParallelConfig,
-    LoggerConfig,
-    OptimizerConfig,
-    SchedulerConfig,
-    TokenizerConfig,
-    TrainingConfig,
-)
-from nemo.tron.init import initialize_megatron, set_jit_fusion_options
-from nemo.tron.model import get_model_from_config
-from nemo.tron.optim import setup_optimizer
-from nemo.tron.setup import (
-    HAVE_FSDP2,
-    _init_checkpointing_context,
-    _update_model_config_funcs,
-)
-from nemo.tron.state import GlobalState
-from nemo.tron.tokenizers.tokenizer import build_tokenizer
-from nemo.tron.utils.async_utils import maybe_finalize_async_save
-from nemo.tron.utils.common_utils import get_rank_safe
-from nemo.tron.utils.train_utils import (
-    logical_and_across_model_parallel_group,
-    reduce_max_stat_across_model_parallel_group,
-)
 from ray.util.queue import Queue
 from transformers import PreTrainedTokenizerBase
 
 from nemo_rl.algorithms.interfaces import LossFunction, LossType
 from nemo_rl.distributed.batched_data_dict import BatchedDataDict
 from nemo_rl.distributed.model_utils import (
+    allgather_cp_sharded_tensor,
+    distributed_vocab_topk,
     from_parallel_logits_to_logprobs,
     from_parallel_logits_to_logprobs_packed_sequences,
 )
@@ -104,27 +119,85 @@
     forward_step_arbitrary_loss,
 )
 from nemo_rl.models.megatron.community_import import import_model_from_hf_name
-from nemo_rl.models.megatron.converters.common import MegatronToHFConverter
-from nemo_rl.models.megatron.refit_utils import (
-    gather_params,
-    get_local_key_to_global_keys,
-    get_param_info,
-)
 from nemo_rl.models.policy import PolicyConfig
 from nemo_rl.models.policy.interfaces import (
     LogprobOutputSpec,
     ReferenceLogprobOutputSpec,
 )
 from nemo_rl.models.policy.utils import (
-    configure_expandable_segments,
+    configure_dynamo_cache,
     get_gpu_info,
     get_megatron_checkpoint_dir,
     get_runtime_env_for_policy_worker,
 )
+from nemo_rl.utils.nsys import wrap_with_nvtx_name
+from nemo_rl.utils.packed_tensor import packed_broadcast_producer
+
+try:
+    from megatron.core.distributed import (
+        TorchFullyShardedDataParallel as torch_FSDP,  # noqa: F401 unused-import
+    )
+
+    HAVE_FSDP2 = True
+except ImportError:
+    HAVE_FSDP2 = False
 
 TokenizerType = TypeVar("TokenizerType", bound=PreTrainedTokenizerBase)
 
 
+def broadcast_object_across_pp_ranks(obj):
+    """Broadcast an object across pipeline parallel ranks.
+
+    This utility function handles broadcasting an object from the rank that owns it
+    to all other pipeline parallel ranks. If only one rank has the object (non-None),
+    it will be broadcast to all other ranks.
+
+    Args:
+        obj: The object to broadcast. Can be None on ranks that don't own it.
+
+    Returns:
+        The object on all ranks (either the original or the broadcast copy).
+
+    Raises:
+        ValueError: If the object doesn't exist on any pipeline parallel rank.
+    """
+    pp_size = get_pipeline_model_parallel_world_size()
+    pp_group = get_pipeline_model_parallel_group()
+
+    if pp_size == 1:
+        return obj
+
+    # ------------------------------------------------------------------
+    # 1. Gather presence flags from all PP ranks to find the source rank
+    # ------------------------------------------------------------------
+    has_obj = obj is not None
+    obj_flags = [None] * pp_size
+    torch.distributed.all_gather_object(obj_flags, has_obj, group=pp_group)
+
+    # ------------------------------------------------------------------
+    # 2. Identify the owning rank (the only rank with True flag)
+    # ------------------------------------------------------------------
+    src_rank = None  # Rank *inside* the PP group
+    for rank, flag in enumerate(obj_flags):
+        if flag:
+            src_rank = rank
+            break
+
+    if src_rank is None:
+        raise ValueError("Object must exist on at least one PP rank")
+
+    # ------------------------------------------------------------------
+    # 3. Broadcast the object from the source rank to all ranks
+    # ------------------------------------------------------------------
+    # Use broadcast_object_list which is more robust than all_gather_object
+    obj_list = [obj]
+    pp_ranks = torch.distributed.get_process_group_ranks(pp_group)
+    global_src = pp_ranks[src_rank]
+    torch.distributed.broadcast_object_list(obj_list, src=global_src, group=pp_group)
+
+    return obj_list[0]
+
+
 def setup_megatron_model(
     policy_cfg: PolicyConfig,
     cfg: ConfigContainer,
@@ -136,19 +209,19 @@ def setup_megatron_model(
     state.cfg = cfg
     # TODO: Freeze state.cfg
 
+    cfg.dist.external_gpu_device_mapping = True
     initialize_megatron(
         cfg=cfg,
         get_embedding_ranks=get_embedding_ranks,
         get_position_embedding_ranks=get_position_embedding_ranks,
-        gpu_visibility_externally_set=True,
     )
 
-    if cfg.ft_config and cfg.ft_config.enable_ft_package:
+    if cfg.ft and cfg.ft.enable_ft_package:
         fault_tolerance.setup(cfg, state)
-        fault_tolerance.maybe_setup_simulated_fault(cfg.ft_config)
+        fault_tolerance.maybe_setup_simulated_fault(cfg.ft)
 
     # Set pytorch JIT layer fusion options and warmup JIT functions.
-    set_jit_fusion_options(cfg.model_config, cfg.train_config.micro_batch_size)
+    set_jit_fusion_options(cfg.model, cfg.train.micro_batch_size)
 
     # Adjust the startup time so it reflects the largest value.
     # This will be closer to what scheduler will see (outside of
@@ -167,45 +240,61 @@ def setup_megatron_model(
     torch.distributed.barrier()
 
     # Context used for persisting some state between checkpoint saves.
-    checkpointing_context = _init_checkpointing_context(cfg.checkpoint_config)
+    checkpointing_context = init_checkpointing_context(cfg.checkpoint)
 
     # Tokenizer
     build_tokenizer(
-        cfg.tokenizer_config,
-        make_vocab_size_divisible_by=cfg.model_config.make_vocab_size_divisible_by
-        // cfg.model_config.tensor_model_parallel_size,
-        tensor_model_parallel_size=cfg.model_config.tensor_model_parallel_size,
+        cfg.tokenizer,
+        make_vocab_size_divisible_by=cfg.model.make_vocab_size_divisible_by
+        // cfg.model.tensor_model_parallel_size,
+        tensor_model_parallel_size=cfg.model.tensor_model_parallel_size,
+        trust_remote_code=True,
     )
-    if not cfg.model_config.vocab_size:
-        cfg.model_config.vocab_size = cfg.tokenizer_config.padded_vocab_size
+    assert cfg.model.vocab_size, "vocab size must be specified in model config"
 
     torch.distributed.barrier()
 
-    model_post_init_fns = []
+    pre_wrap_hook = []
+    mixed_precision_wrapper = Float16Module
     if policy_cfg["megatron_cfg"]["freeze_moe_router"]:
 
-        def freeze_moe_router(model_module):
-            for layer in model_module.decoder.layers:
-                if hasattr(layer.mlp, "router"):
-                    layer.mlp.router.weight.requires_grad = False
-
-        model_post_init_fns.append(freeze_moe_router)
+        def freeze_moe_router(megatron_model):
+            if not isinstance(megatron_model, list):
+                megatron_model = [megatron_model]
+            for model_module in megatron_model:
+                # Handle both wrapped (Float16Module) and unwrapped models
+                if isinstance(model_module, Float16Module):
+                    model_module = model_module.module
+                # Handle VLM models
+                if hasattr(model_module, "language_model"):
+                    model_module = model_module.language_model
+                for layer in model_module.decoder.layers:
+                    if hasattr(layer, "mlp") and hasattr(layer.mlp, "router"):
+                        layer.mlp.router.weight.requires_grad = False
+
+        mixed_precision_wrapper = CustomFloat16Module
+        pre_wrap_hook.extend([freeze_moe_router])
+
+    # If deferring fp32 logits, disable mixed-precision wrapper entirely
+    if policy_cfg["megatron_cfg"].get("defer_fp32_logits", None):
+        mixed_precision_wrapper = None
 
     # Model, optimizer, and learning rate.
-    model = get_model_from_config(
-        cfg.model_config,
-        cfg.ddp_config,
-        use_torch_fsdp2=cfg.dist_config.use_torch_fsdp2,
-        overlap_param_gather_with_optimizer_step=cfg.optimizer_config.overlap_param_gather_with_optimizer_step,
-        data_parallel_random_init=cfg.rng_config.data_parallel_random_init,
-        model_post_init_fns=model_post_init_fns,
+    model = get_model(
+        cfg.model,
+        cfg.ddp,
+        use_torch_fsdp2=cfg.dist.use_torch_fsdp2,
+        overlap_param_gather_with_optimizer_step=cfg.optimizer.overlap_param_gather_with_optimizer_step,
+        data_parallel_random_init=cfg.rng.data_parallel_random_init,
+        pre_wrap_hook=pre_wrap_hook,
+        mixed_precision_wrapper=mixed_precision_wrapper,
     )
     if load_optimizer:
         optimizer, scheduler = setup_optimizer(
-            optimizer_config=cfg.optimizer_config,
-            scheduler_config=cfg.scheduler_config,
+            optimizer_config=cfg.optimizer,
+            scheduler_config=cfg.scheduler,
             model=model,
-            use_gloo_process_groups=cfg.dist_config.use_gloo_process_groups,
+            use_gloo_process_groups=cfg.dist.use_gloo_process_groups,
         )
     else:
         optimizer = None
@@ -216,11 +305,11 @@ def freeze_moe_router(model_module):
 
     # Load checkpoint if applicable
     if (
-        cfg.checkpoint_config.load is not None
-        or cfg.checkpoint_config.pretrained_checkpoint is not None
+        cfg.checkpoint.load is not None
+        or cfg.checkpoint.pretrained_checkpoint is not None
     ) and (
-        checkpoint_exists(cfg.checkpoint_config.load)
-        or checkpoint_exists(cfg.checkpoint_config.pretrained_checkpoint)
+        checkpoint_exists(cfg.checkpoint.load)
+        or checkpoint_exists(cfg.checkpoint.pretrained_checkpoint)
     ):
         load_checkpoint(
             state,
@@ -228,7 +317,7 @@ def freeze_moe_router(model_module):
             optimizer,
             scheduler,
             checkpointing_context=checkpointing_context,
-            skip_load_to_model_and_opt=HAVE_FSDP2 and cfg.dist_config.use_torch_fsdp2,
+            skip_load_to_model_and_opt=HAVE_FSDP2 and cfg.dist.use_torch_fsdp2,
         )
         print("Checkpoint loaded")
     torch.distributed.barrier()
@@ -259,7 +348,9 @@ def destroy_parallel_state():
     # Reset async calls queue to prevent call_idx mismatches after distributed context recreation
     try:
         import nemo.tron.utils.async_utils as nemo_async_utils
-        from nemo.tron.utils.async_utils import AsyncCallsQueue
+        from megatron.core.dist_checkpointing.strategies.async_utils import (
+            AsyncCallsQueue,
+        )
 
         # Clean up any existing async callers first
         old_call_idx = getattr(nemo_async_utils._async_calls_queue, "call_idx", None)
@@ -361,6 +452,15 @@ def __init__(
         pre_init_communication_queue: Queue,
         **kwargs: Any,
     ):
+        self.is_generation_colocated = None
+        if "generation" in config and config["generation"] is not None:
+            self.is_generation_colocated = config["generation"]["colocated"]["enabled"]
+
+        # Explicitly set NCCL_CUMEM_ENABLE to 1 to avoid the P2P initialization error for PyNCCLCommunicator.
+        # See https://github.com/NVIDIA-NeMo/RL/issues/564 for more details.
+        if not self.is_generation_colocated:
+            os.environ["NCCL_CUMEM_ENABLE"] = "1"
+
         self.cfg = config
         dtype_map = {
             "float32": torch.float32,
@@ -369,8 +469,21 @@ def __init__(
         }
         self.dtype = dtype_map[self.cfg["precision"]]
 
-        # Only enable expandable_segments on Hopper and newer architectures (compute capability 9.x+)
-        configure_expandable_segments()
+        self.optimizer_cpu_offload = self.cfg["megatron_cfg"]["optimizer"][
+            "optimizer_cpu_offload"
+        ]
+        self.offload_optimizer_for_logprob = self.cfg["offload_optimizer_for_logprob"]
+
+        # Reward models are not yet supported with Megatron.
+        if "reward_model_cfg" in self.cfg and self.cfg["reward_model_cfg"]["enabled"]:
+            raise NotImplementedError(
+                "Reward models are not yet supported with the Megatron backend, this issue is "
+                "tracked in https://github.com/NVIDIA-NeMo/RL/issues/720"
+            )
+
+        # Disable dynamo autotune_local_cache to avoid crash when there's already a cache
+        # with different order of node_bundles
+        configure_dynamo_cache()
 
         # cfg["model_name"] is allowed to be either an HF model name or a path to an HF checkpoint
         # check if hf_model_name is a path
@@ -388,43 +501,24 @@ def __init__(
         # Ensure clean slate before import
         destroy_parallel_state()
 
-        if get_rank_safe() == 0:
-            if pt_checkpoint_exists:
-                print(
-                    f"Checkpoint already exists at {pretrained_path}. Skipping import."
-                )
-            else:
-                try:
-                    # Clean environment to prevent conflicts
-                    env_backup = {}
-                    env_vars_to_clean = [
-                        "MASTER_ADDR",
-                        "MASTER_PORT",
-                        "WORLD_SIZE",
-                        "LOCAL_RANK",
-                    ]
-                    for var in env_vars_to_clean:
-                        if var in os.environ:
-                            env_backup[var] = os.environ[var]
-                            del os.environ[var]
-
-                    import_model_from_hf_name(hf_model_name, pretrained_path)
-
-                    # Restore environment
-                    for var, val in env_backup.items():
-                        os.environ[var] = val
-
-                except Exception as e:
-                    print(f"Error importing model: {e}")
-                    raise
-                finally:
-                    # Force cleanup after import
-                    destroy_parallel_state()
-            pre_init_communication_queue.put(True)
+        # Set for rank for non-collocated to check which ranks to broadcast from
+        self.rank = get_rank_safe()
+        # Need to initialize the process group before calling into Megatron-Bridge, otherwise Megatron-Bridge will try to set an incorrect device
+        torch.distributed.init_process_group("nccl")
+        if pt_checkpoint_exists:
+            print(f"Checkpoint already exists at {pretrained_path}. Skipping import.")
         else:
-            pre_init_communication_queue.get()
-            pre_init_communication_queue.put(True)
-        destroy_parallel_state()
+            hf_config_overrides = self.cfg.get("hf_config_overrides", {}) or {}
+            import_model_from_hf_name(
+                hf_model_name,
+                pretrained_path,
+                self.cfg["megatron_cfg"],
+                **hf_config_overrides,
+            )
+
+            if parallel_state.model_parallel_is_initialized():
+                print("Reinitializing model parallel after loading model state.")
+                parallel_state.destroy_model_parallel()
 
         pretrained_run_config = os.path.join(
             pretrained_path, "iter_0000000/run_config.yaml"
@@ -439,9 +533,11 @@ def __init__(
                 f"Pretrained run config not found at {pretrained_run_config} on rank={get_rank_safe()}. This usually means that the one-time HF->mcore conversion on rank=0 saved to a directory not being mounted on this node. Please check "
             )
 
-        cfg_from_pretrained = ConfigContainer.from_yaml(pretrained_run_config)
-        model_cfg = cfg_from_pretrained.model_config
-        cfg_from_pretrained.logger_config = LoggerConfig()
+        cfg_from_pretrained = ConfigContainer.from_yaml(
+            pretrained_run_config, mode=InstantiationMode.STRICT
+        )
+        model_cfg = cfg_from_pretrained.model
+        cfg_from_pretrained.logger = LoggerConfig()
 
         model_cfg.tensor_model_parallel_size = self.cfg["megatron_cfg"][
             "tensor_model_parallel_size"
@@ -487,6 +583,10 @@ def __init__(
             "moe_router_bias_update_rate"
         ]
 
+        model_cfg.moe_permute_fusion = self.cfg["megatron_cfg"]["moe_permute_fusion"]
+        if "layernorm_epsilon" in self.cfg["megatron_cfg"]:
+            model_cfg.layernorm_epsilon = self.cfg["megatron_cfg"]["layernorm_epsilon"]
+
         model_cfg.sequence_parallel = self.cfg["megatron_cfg"]["sequence_parallel"]
         model_cfg.bf16 = self.dtype == torch.bfloat16
         model_cfg.fp16 = self.dtype == torch.float16
@@ -512,6 +612,44 @@ def __init__(
                 "https://github.com/NVIDIA/Megatron-LM/blob/1ab876ddc4c1893c76f26d775226a8d1dcdfb3d2/megatron/core/transformer/mlp.py#L174."
             )
         model_cfg.apply_rope_fusion = self.cfg["megatron_cfg"]["apply_rope_fusion"]
+        model_cfg.bias_activation_fusion = self.cfg["megatron_cfg"][
+            "bias_activation_fusion"
+        ]
+        fp8_cfg = self.cfg["megatron_cfg"].get("fp8_cfg", None)
+        self.fp8_cfg = fp8_cfg
+        if fp8_cfg is not None and fp8_cfg.get("enabled", False):
+            try:
+                model_cfg.fp8 = fp8_cfg["fp8"]
+                model_cfg.fp8_recipe = fp8_cfg["fp8_recipe"]
+                model_cfg.fp8_param = fp8_cfg["fp8_param"]
+            except KeyError as e:
+                raise KeyError(f"Missing key in fp8_cfg: {e}")
+            if model_cfg.fp8_param:
+                warnings.warn(
+                    "Setting fp8_param=True sometimes causes NaN token_mult_prob_error, please use with caution. "
+                    "Refer to https://github.com/NVIDIA-NeMo/RL/issues/1164 for latest updates with this issue."
+                )
+
+        optimizer_cpu_offload = self.cfg["megatron_cfg"]["optimizer"][
+            "optimizer_cpu_offload"
+        ]
+        optimizer_offload_fraction = self.cfg["megatron_cfg"]["optimizer"][
+            "optimizer_offload_fraction"
+        ]
+        if optimizer_cpu_offload:
+            # Currently, hybrid optimizer (partly on GPU and partly on CPU) is not supported because it conflicts with the way
+            # Nemo-rl handles the optimizer offload/onload between generation and training. So if using CPU optimizer the offload_fraction should be 1.0.
+            assert optimizer_offload_fraction == 1.0, (
+                "Currently for optimizer offloading, only optimizer_offload_fraction=1.0 is supported"
+            )
+        if (
+            "logprob_chunk_size" in self.cfg
+            and self.cfg["logprob_chunk_size"] is not None
+            and self.cfg["logprob_chunk_size"] > 0
+        ):
+            assert self.cfg["megatron_cfg"]["defer_fp32_logits"], (
+                "defer_fp32_logits must be True if logprob_chunk_size is set"
+            )
 
         checkpoint_config = CheckpointConfig(
             save_interval=100,
@@ -530,19 +668,43 @@ def __init__(
             fully_parallel_load=True,  # Enable fully parallel load
             load_rng=False,
         )
+
+        assert "train_iters" in self.cfg["megatron_cfg"], (
+            "train_iters must be set in megatron_cfg. For an example, see "
+            "https://github.com/NVIDIA-NeMo/RL/blob/bccbc377705a81a1f4b3c31ad9767bcc15f735a8/nemo_rl/algorithms/sft.py#L175-L179."
+        )
+
+        ## These settings are required for correct gradient computations in mcore
+        ## when calculate_per_token_loss is True, there is no scaling of the gradient in mcore,
+        ## so we handle the scaling in nemo-rl.
+        ## perform_initialization = True is a workaround to ensure the correct tensor parallel attributes are set
+        ## on the TP-sharded parameters.
+        model_cfg.calculate_per_token_loss = True
+        model_cfg.perform_initialization = True
+
+        assert (
+            "aux_loss" not in model_cfg.moe_router_load_balancing_type
+            or model_cfg.moe_aux_loss_coeff == 0
+        ), (
+            "MoE aux loss is currently not supported due to a known bug in Megatron-LM. "
+            "See https://github.com/NVIDIA/Megatron-LM/issues/1984 for more details."
+        )
+
         self.megatron_cfg = ConfigContainer(
-            model_config=model_cfg,
-            checkpoint_config=checkpoint_config,
-            logger_config=LoggerConfig(logging_level=0),
-            train_config=TrainingConfig(
+            model=model_cfg,
+            checkpoint=checkpoint_config,
+            logger=LoggerConfig(logging_level=0),
+            train=TrainingConfig(
                 micro_batch_size=1,  # ignored
                 global_batch_size=self.cfg["train_global_batch_size"],  # ignored
-                train_iters=1000,  # Default value for inference
+                train_iters=self.cfg["megatron_cfg"][
+                    "train_iters"
+                ],  # Set by algorithm setup
             ),
-            optimizer_config=OptimizerConfig(
+            optimizer=OptimizerConfig(
                 **self.cfg["megatron_cfg"]["optimizer"],
             ),
-            ddp_config=DistributedDataParallelConfig(
+            ddp=DistributedDataParallelConfig(
                 check_for_nan_in_grad=True,
                 grad_reduce_in_fp32=self.cfg["megatron_cfg"][
                     "distributed_data_parallel_config"
@@ -553,9 +715,9 @@ def __init__(
                 overlap_param_gather=self.cfg["megatron_cfg"][
                     "distributed_data_parallel_config"
                 ]["overlap_param_gather"],
-                average_in_collective=self.cfg["megatron_cfg"][
-                    "distributed_data_parallel_config"
-                ]["average_in_collective"],
+                # we need to set average_in_collective=False with calculate_per_token_loss=True.
+                # otherwise, mcore throws an assertion error.
+                average_in_collective=False,
                 use_distributed_optimizer=self.cfg["megatron_cfg"]["optimizer"][
                     "use_distributed_optimizer"
                 ],
@@ -563,11 +725,11 @@ def __init__(
                     "distributed_data_parallel_config"
                 ]["data_parallel_sharding_strategy"],
             ),
-            scheduler_config=SchedulerConfig(
+            scheduler=SchedulerConfig(
                 **self.cfg["megatron_cfg"]["scheduler"],
             ),
-            dataset_config=None,
-            tokenizer_config=TokenizerConfig(
+            dataset=None,
+            tokenizer=TokenizerConfig(
                 tokenizer_type="HuggingFaceTokenizer",
                 tokenizer_model=hf_model_name,
             ),
@@ -585,8 +747,8 @@ def __init__(
 
         # Set the param sync function for the model
         if (
-            self.megatron_cfg.ddp_config.overlap_param_gather
-            and self.megatron_cfg.ddp_config.align_param_gather
+            self.megatron_cfg.ddp.overlap_param_gather
+            and self.megatron_cfg.ddp.align_param_gather
         ):
             self.megatron_cfg.param_sync_func = [
                 model_chunk.start_param_sync for model_chunk in self.model
@@ -598,31 +760,39 @@ def __init__(
 
         if init_reference_model:
             self.model = self.move_model(self.model, "cpu")
-            ref_ckpt_context = _init_checkpointing_context(ref_checkpoint_config)
+            ref_ckpt_context = init_checkpointing_context(ref_checkpoint_config)
 
             # Create a separate megatron config for the reference model with the correct checkpoint config
             ref_megatron_cfg = ConfigContainer(
-                model_config=self.megatron_cfg.model_config,
-                checkpoint_config=ref_checkpoint_config,  # Use the reference checkpoint config
-                logger_config=self.megatron_cfg.logger_config,
-                train_config=self.megatron_cfg.train_config,
-                optimizer_config=self.megatron_cfg.optimizer_config,
-                ddp_config=self.megatron_cfg.ddp_config,
-                scheduler_config=self.megatron_cfg.scheduler_config,
-                dataset_config=self.megatron_cfg.dataset_config,
-                tokenizer_config=self.megatron_cfg.tokenizer_config,
+                model=self.megatron_cfg.model,
+                checkpoint=ref_checkpoint_config,  # Use the reference checkpoint config
+                logger=self.megatron_cfg.logger,
+                train=self.megatron_cfg.train,
+                optimizer=self.megatron_cfg.optimizer,
+                ddp=self.megatron_cfg.ddp,
+                scheduler=self.megatron_cfg.scheduler,
+                dataset=self.megatron_cfg.dataset,
+                tokenizer=self.megatron_cfg.tokenizer,
             )
 
             # Create a separate state object for the reference model
             ref_state = GlobalState()
             ref_state.cfg = ref_megatron_cfg
 
-            reference_model = get_model_from_config(
-                self.megatron_cfg.model_config,
-                self.megatron_cfg.ddp_config,
-                use_torch_fsdp2=self.megatron_cfg.dist_config.use_torch_fsdp2,
-                overlap_param_gather_with_optimizer_step=self.megatron_cfg.optimizer_config.overlap_param_gather_with_optimizer_step,
-                data_parallel_random_init=self.megatron_cfg.rng_config.data_parallel_random_init,
+            # Configure mixed precision wrapper for reference model
+            ref_mixed_precision_wrapper = Float16Module
+            if self.cfg["megatron_cfg"].get("freeze_moe_router", False):
+                ref_mixed_precision_wrapper = CustomFloat16Module
+            if self.cfg["megatron_cfg"].get("defer_fp32_logits", None):
+                ref_mixed_precision_wrapper = None
+
+            reference_model = get_model(
+                self.megatron_cfg.model,
+                self.megatron_cfg.ddp,
+                use_torch_fsdp2=self.megatron_cfg.dist.use_torch_fsdp2,
+                overlap_param_gather_with_optimizer_step=self.megatron_cfg.optimizer.overlap_param_gather_with_optimizer_step,
+                pre_wrap_hook=self.megatron_cfg.rng.data_parallel_random_init,
+                mixed_precision_wrapper=ref_mixed_precision_wrapper,
             )
             print("Loading the Reference Model")
             if (
@@ -636,7 +806,7 @@ def __init__(
                     None,  # no scheduler
                     checkpointing_context=ref_ckpt_context,
                     skip_load_to_model_and_opt=HAVE_FSDP2
-                    and self.megatron_cfg.dist_config.use_torch_fsdp2,
+                    and self.megatron_cfg.dist.use_torch_fsdp2,
                 )
                 reference_model = reference_model[0]
                 reference_model.eval()
@@ -658,14 +828,12 @@ def __init__(
 
         _update_model_config_funcs(
             [self.model],
-            self.megatron_cfg.model_config,
-            self.megatron_cfg.ddp_config,
+            self.megatron_cfg.model,
+            self.megatron_cfg.ddp,
             self.optimizer,
-            align_grad_reduce=self.megatron_cfg.dist_config.align_grad_reduce,
+            align_grad_reduce=self.megatron_cfg.dist.align_grad_reduce,
         )
 
-        from nemo.tron.tokenizers.tokenizer import build_tokenizer
-
         tokenizer_config = TokenizerConfig(
             tokenizer_type="HuggingFaceTokenizer",
             tokenizer_model=hf_model_name,
@@ -673,15 +841,22 @@ def __init__(
 
         self.megatron_tokenizer = build_tokenizer(
             tokenizer_config,
-            make_vocab_size_divisible_by=self.megatron_cfg.model_config.make_vocab_size_divisible_by
+            make_vocab_size_divisible_by=self.megatron_cfg.model.make_vocab_size_divisible_by
             // self.cfg["megatron_cfg"]["tensor_model_parallel_size"],
             tensor_model_parallel_size=self.cfg["megatron_cfg"][
                 "tensor_model_parallel_size"
             ],
+            trust_remote_code=True,
+        )
+        self.final_padded_vocab_size = calculate_padded_vocab_size(
+            self.megatron_cfg.model.vocab_size,
+            self.megatron_cfg.model.make_vocab_size_divisible_by,
+            self.cfg["megatron_cfg"]["tensor_model_parallel_size"],
         )
-        self.final_padded_vocab_size = tokenizer_config.padded_vocab_size
         self.dp_size = worker_sharding_annotations.get_axis_size("data_parallel")
-        self.megatron_to_hf_converter = MegatronToHFConverter(hf_model_name, self.model)
+        self.megatron_bridge = AutoBridge.from_hf_pretrained(
+            hf_model_name, trust_remote_code=True
+        )
 
         self.should_disable_forward_pre_hook = (
             self.cfg["megatron_cfg"]["optimizer"]["use_distributed_optimizer"]
@@ -692,11 +867,34 @@ def __init__(
 
         # vars used for refit
         ## will be initialized in prepare_refit_info
-        self.refit_param_info_hf = None
-        self.local_key_to_global_keys = None
+        # refit_param_info_mcore combines the conversion tasks with the param memory
+        # [(mcore_param_name, estimated_memory), ...]
+        # Note: here param name is local param name, with local layer number and
+        # local expert id etc.
+        self.refit_conversion_tasks = (
+            None  # Meta data for conversion params from megatron bridge
+        )
+        self.refit_conversion_tasks_current_index = None
+        self.refit_param_info_mcore = None
+
         ## used for streaming update inference engine weights
         self._held_gather_buffer = None
 
+    def init_collective(
+        self, ip: str, port: int, world_size: int, *, train_world_size: int
+    ) -> None:
+        """Initialize the collective communication."""
+        from vllm.distributed.device_communicators.pynccl import PyNcclCommunicator
+        from vllm.distributed.utils import StatelessProcessGroup
+
+        # world_size = train_world_size + inference_world_size
+        # variable train_world_size is used in inference cluster
+        pg = StatelessProcessGroup.create(
+            host=ip, port=port, rank=self.rank, world_size=world_size
+        )
+        device = torch.cuda.current_device()
+        self.model_update_group = PyNcclCommunicator(pg, device=device)
+
     def is_alive(self):
         return True
 
@@ -715,6 +913,7 @@ def disable_forward_pre_hook(self, param_sync=True):
         assert isinstance(self.model, DistributedDataParallel)
         self.model.disable_forward_pre_hook(param_sync=param_sync)
 
+    @wrap_with_nvtx_name("megatron_policy_worker/train")
     def train(
         self,
         data: BatchedDataDict,
@@ -766,7 +965,9 @@ def train(
                         f"Dim 1 must be the sequence dim, expected dim 1={seq_dim_size} but got shape {v.shape}"
                     )
 
-            forward_step = partial(forward_step_arbitrary_loss, loss_fn=loss_fn)
+            forward_step = partial(
+                forward_step_arbitrary_loss, loss_fn=loss_fn, policy_cfg=self.cfg
+            )
             all_mb_metrics = []
             losses = []
             for gb_idx in range(num_global_batches):
@@ -825,6 +1026,9 @@ def train(
                     tp_size = self.cfg["megatron_cfg"]["tensor_model_parallel_size"]
                     cp_size = self.cfg["megatron_cfg"]["context_parallel_size"]
                     pad_factor = cp_size * 2 * tp_size if cp_size > 1 else tp_size
+                    if self.fp8_cfg is not None and self.fp8_cfg.get("enabled", False):
+                        # if fp8 is enabled, ensure the sequence is padded to multiples of 16
+                        pad_factor = math.lcm(16, pad_factor)
                     if self.cfg["megatron_cfg"]["pipeline_model_parallel_size"] > 1:
                         _, pad_full_seq_to = (
                             batch.get_microbatch_iterator_for_packable_sequences_len()
@@ -867,7 +1071,12 @@ def train(
                     torch.cuda.empty_cache()
 
                 # Update parameters.
-                update_successful, grad_norm, num_zeros_in_grad = self.optimizer.step()
+                if not eval_mode:
+                    update_successful, grad_norm, num_zeros_in_grad = (
+                        self.optimizer.step()
+                    )
+                else:
+                    update_successful, grad_norm, num_zeros_in_grad = (True, 0.0, 0.0)
 
                 # when freezing sub-models we may have a mixture of successful and unsucessful ranks,
                 # so we must gather across mp ranks
@@ -905,7 +1114,6 @@ def train(
                         curr_wd = self.scheduler.get_wd()
                         loss_metrics["lr"] = curr_lr
                         loss_metrics["wd"] = curr_wd
-                        loss_metrics["grad_norm"] = grad_norm
                         loss_metrics["global_valid_seqs"] = global_valid_seqs.item()
                         loss_metrics["global_valid_toks"] = global_valid_toks.item()
                         mb_losses.append(loss_metrics["loss"])
@@ -951,13 +1159,14 @@ def train(
         metrics = {
             "global_loss": global_loss.cpu(),
             "rank": torch.distributed.get_rank(),
+            "gpu_name": torch.cuda.get_device_name(),
+            "model_dtype": self.dtype,
             "all_mb_metrics": dict(mb_metrics),
-            "grad_norm": torch.tensor(
-                mb_metrics["grad_norm"][-1]
-            ).cpu(),  # TODO @sahilj: return an average or something later
+            "grad_norm": torch.tensor([grad_norm]),
         }
         return metrics
 
+    @wrap_with_nvtx_name("megatron_policy_worker/get_logprobs")
     def get_logprobs(
         self, *, data: BatchedDataDict[Any], micro_batch_size: Optional[int] = None
     ) -> BatchedDataDict[LogprobOutputSpec]:
@@ -1022,6 +1231,9 @@ def forward_step_fn(
                 cp_size = self.cfg["megatron_cfg"]["context_parallel_size"]
                 cp_rank = get_context_parallel_rank()
                 pad_factor = cp_size * 2 * tp_size if cp_size > 1 else tp_size
+                if self.fp8_cfg is not None and self.fp8_cfg.get("enabled", False):
+                    # if fp8 is enabled, ensure the sequence is padded to multiples of 16
+                    pad_factor = math.lcm(16, pad_factor)
                 (
                     input_ids,
                     input_ids_cp_sharded,
@@ -1042,22 +1254,46 @@ def forward_step_fn(
                 input_ids = data_dict["input_ids"]
                 input_ids_cp_sharded = input_ids
                 attention_mask, _, position_ids = get_ltor_masks_and_position_ids(
-                    input_ids, 0, False, False, False
+                    data=input_ids,
+                    eod_token=0,  # used for loss_mask, which we don't use
+                    pad_token=0,  # used for loss_mask, which we don't use
+                    reset_position_ids=False,
+                    reset_attention_mask=False,
+                    eod_mask_loss=False,
+                    pad_mask_loss=False,
                 )
                 packed_seq_params = None
                 unpacked_input_ids = input_ids
 
+            multimodal_data = data_dict.get_multimodal_dict(
+                as_tensors=True, device=input_ids.device
+            )
+            if len(multimodal_data) > 0:
+                position_ids = None
+
+            additional_kwargs = {}
+            # Mamba models currently do not support packed_seq_params
+            if packed_seq_params is not None:
+                additional_kwargs["packed_seq_params"] = packed_seq_params
+
             output_tensor = model(
-                input_ids_cp_sharded,
-                position_ids,
-                attention_mask,
-                packed_seq_params=packed_seq_params,
+                input_ids=input_ids_cp_sharded,
+                position_ids=position_ids,
+                attention_mask=attention_mask,
+                **multimodal_data,
+                **additional_kwargs,
             )
 
+            # Apply temperature scaling to logits for training
+            # This matches the dtensor worker's _apply_temperature_scaling in the train method
+            if "generation" in self.cfg and self.cfg["generation"] is not None:
+                output_tensor.div_(self.cfg["generation"]["temperature"])
+
             def collection_fn(output_tensor):
                 stc = time.time()
                 tp_grp = get_tensor_model_parallel_group()
                 tp_rank = get_tensor_model_parallel_rank()
+                logprob_chunk_size = self.cfg.get("logprob_chunk_size", None)
                 if self.cfg["sequence_packing"]["enabled"]:
                     token_logprobs = from_parallel_logits_to_logprobs_packed_sequences(
                         output_tensor,
@@ -1069,15 +1305,17 @@ def collection_fn(output_tensor):
                         group=tp_grp,
                         inference_only=True,
                         cp_group=get_context_parallel_group(),
+                        chunk_size=logprob_chunk_size,
                     )
                 else:
                     token_logprobs = from_parallel_logits_to_logprobs(
-                        output_tensor.to(torch.float32),
+                        output_tensor,
                         target=unpacked_input_ids,
                         vocab_start_index=tp_rank * output_tensor.shape[-1],
                         vocab_end_index=(tp_rank + 1) * output_tensor.shape[-1],
                         tp_group=tp_grp,
                         inference_only=True,
+                        chunk_size=logprob_chunk_size,
                     )
 
                 # Prepend 0 logprob for first token to maintain same sequence length as input
@@ -1165,8 +1403,9 @@ def use_reference_model(self):
                 # if isinstance(item, torch.Tensor):
                 # self.model.state_dict()[name] = item.detach().to(device="cuda", non_blocking=True, copy=True)
 
-                gc.collect()
-                torch.cuda.empty_cache()
+                if self.cfg["megatron_cfg"]["empty_unused_memory_level"] >= 1:
+                    gc.collect()
+                    torch.cuda.empty_cache()
 
                 # - self.model is the original reference_model, now on CUDA
                 # - self.reference_model is the original model, now on CPU
@@ -1180,14 +1419,16 @@ def use_reference_model(self):
                 # item = item.detach().to(device="cuda", non_blocking=True, copy=True)
                 # self.model.state_dict()[name] = item
 
-                gc.collect()
-                torch.cuda.empty_cache()
+                if self.cfg["megatron_cfg"]["empty_unused_memory_level"] >= 1:
+                    gc.collect()
+                    torch.cuda.empty_cache()
 
                 ## re-enable overlap param gather after weight swap
                 if self.should_disable_forward_pre_hook:
                     self.enable_forward_pre_hook()
 
     # Temporary fix, 'data' is a kwarg due to some sort of ray bug
+    @wrap_with_nvtx_name("megatron_policy_worker/get_reference_policy_logprobs")
     def get_reference_policy_logprobs(
         self, *, data: BatchedDataDict[Any], micro_batch_size: Optional[int] = None
     ) -> BatchedDataDict[ReferenceLogprobOutputSpec]:
@@ -1210,6 +1451,304 @@ def get_reference_policy_logprobs(
         return_data["reference_logprobs"] = reference_logprobs["logprobs"].cpu()
         return return_data
 
+    @wrap_with_nvtx_name("megatron_policy_worker/get_topk_logits")
+    def get_topk_logits(
+        self,
+        *,
+        data: BatchedDataDict[GenerationDatumSpec],
+        k: int,
+        micro_batch_size: Optional[int] = None,
+    ):
+        """Get the top-k logits and indices for a batch of data.
+
+        The major difference from get_logprobs is that we compute top-k logits and indices for each position in the sequence.
+
+        Returns:
+            BatchedDataDict containing:
+                - topk_logits: Tensor of top-k logits for each position in the sequence
+                - topk_indices: Tensor of top-k indices for each position in the sequence
+        """
+        no_grad = torch.no_grad()
+        no_grad.__enter__()
+
+        logprob_batch_size = (
+            micro_batch_size
+            if micro_batch_size is not None
+            else self.cfg["logprob_batch_size"]
+        )
+
+        sequence_dim = 1
+        input_seq_dim_size = data["input_ids"].shape[sequence_dim]
+        # Avoid shadowing the function argument `k` by using a distinct variable name
+        for tensor_name, v in data.items():
+            if torch.is_tensor(v) and len(v.shape) > 1:
+                assert v.shape[sequence_dim] == input_seq_dim_size, (
+                    f"Tensor {tensor_name} must have sequence dimension {sequence_dim} of size {input_seq_dim_size}, but got shape {v.shape}"
+                )
+
+        self.model.eval()
+
+        pp_seq_dim_size = input_seq_dim_size
+        pp_grp = get_pipeline_model_parallel_group()
+
+        # If using sequence packing with PP>1, pad full sequence to static PP buffer length
+        pad_full_seq_to = None
+        if (
+            self.cfg["sequence_packing"]["enabled"]
+            and self.cfg["megatron_cfg"]["pipeline_model_parallel_size"] > 1
+        ):
+            _, pad_full_seq_to = (
+                data.get_microbatch_iterator_for_packable_sequences_len()
+            )
+            pp_seq_dim_size = pad_full_seq_to
+
+        def forward_step_fn(
+            data_iterator: Iterator[BatchedDataDict[Any]], model: GPTModel
+        ):
+            nonlocal pad_full_seq_to
+            data_dict = next(data_iterator).to("cuda")
+
+            pack = self.cfg["sequence_packing"]["enabled"]
+            if pack:
+                original_seq_length = data_dict["input_ids"].shape[1]
+                tp_size = self.cfg["megatron_cfg"]["tensor_model_parallel_size"]
+                pp_size = self.cfg["megatron_cfg"]["pipeline_model_parallel_size"]
+                cp_size = self.cfg["megatron_cfg"]["context_parallel_size"]
+                cp_rank = get_context_parallel_rank()
+                pad_factor = cp_size * 2 * tp_size if cp_size > 1 else tp_size
+                if self.fp8_cfg is not None and self.fp8_cfg.get("enabled", False):
+                    pad_factor = math.lcm(16, pad_factor)
+
+                (
+                    input_ids_unpacked,
+                    input_ids_cp_sharded,
+                    packed_seq_params,
+                    cu_seqlens,
+                    cu_seqlens_padded,
+                ) = _pack_sequences_for_megatron(
+                    data_dict["input_ids"].clone(),
+                    data_dict["input_lengths"],
+                    pad_individual_seqs_to_multiple_of=pad_factor,
+                    pad_packed_seq_to=pad_full_seq_to,
+                    cp_rank=cp_rank,
+                    cp_size=cp_size,
+                )
+                attention_mask, position_ids = None, None
+                seq_lengths = data_dict["input_lengths"]
+                unpacked_seqlen = original_seq_length
+            else:
+                input_ids_cp_sharded = data_dict["input_ids"]
+                attention_mask, _, position_ids = get_ltor_masks_and_position_ids(
+                    data=input_ids_cp_sharded,
+                    eod_token=0,
+                    pad_token=0,
+                    reset_position_ids=False,
+                    reset_attention_mask=False,
+                    eod_mask_loss=False,
+                    pad_mask_loss=False,
+                )
+                packed_seq_params = None
+
+            multimodal_data = data_dict.get_multimodal_dict(
+                as_tensors=True, device=input_ids_cp_sharded.device
+            )
+            if len(multimodal_data) > 0:
+                position_ids = None
+
+            additional_kwargs = {}
+            if packed_seq_params is not None:
+                additional_kwargs["packed_seq_params"] = packed_seq_params
+
+            output_tensor = model(
+                input_ids=input_ids_cp_sharded,
+                position_ids=position_ids,
+                attention_mask=attention_mask,
+                **additional_kwargs,
+                **multimodal_data,
+            )
+
+            if "generation" in self.cfg and self.cfg["generation"] is not None:
+                output_tensor.div_(self.cfg["generation"]["temperature"])
+
+            def collection_fn(_):
+                # Only the last PP stage produces final logits/top-k; earlier stages return empty
+                # if not is_pipeline_last_stage(ignore_virtual=True):
+                # return output_tensor.new_zeros(()), {}
+
+                tp_grp = get_tensor_model_parallel_group()
+                tp_rank = get_tensor_model_parallel_rank()
+                vocab_shard_size = output_tensor.shape[-1]
+                vocab_start_index = tp_rank * vocab_shard_size
+
+                chunk_size = None
+                if "logprob_chunk_size" in self.cfg:
+                    chunk_size = self.cfg["logprob_chunk_size"]
+
+                topk_vals_local, topk_idx_local = distributed_vocab_topk(
+                    output_tensor,
+                    k,
+                    tp_grp,
+                    vocab_start_index=vocab_start_index,
+                    vocab_end_index=vocab_start_index + vocab_shard_size,
+                    chunk_size=chunk_size,
+                )
+
+                if self.cfg["megatron_cfg"]["context_parallel_size"] > 1:
+                    cp_grp = get_context_parallel_group()
+                    if pack:
+                        # Per-sequence CP allgather following packed-sequence logic
+                        batch_size = data_dict["input_ids"].shape[0]
+                        total_packed_len = int(cu_seqlens_padded[-1].item())
+
+                        topk_vals_full = torch.zeros(
+                            (1, total_packed_len, k),
+                            dtype=topk_vals_local.dtype,
+                            device=topk_vals_local.device,
+                        )
+                        topk_idx_full = torch.zeros(
+                            (1, total_packed_len, k),
+                            dtype=topk_idx_local.dtype,
+                            device=topk_idx_local.device,
+                        )
+
+                        for i in range(batch_size):
+                            start_idx = int(cu_seqlens_padded[i].item())
+                            end_idx = int(cu_seqlens_padded[i + 1].item())
+                            if end_idx > start_idx:
+                                local_vals_slice = topk_vals_local[
+                                    :, start_idx // cp_size : end_idx // cp_size, :
+                                ]
+                                local_idx_slice = topk_idx_local[
+                                    :, start_idx // cp_size : end_idx // cp_size, :
+                                ]
+                                gathered_vals = allgather_cp_sharded_tensor(
+                                    local_vals_slice, cp_grp, seq_dim=1
+                                )
+                                gathered_idx = allgather_cp_sharded_tensor(
+                                    local_idx_slice, cp_grp, seq_dim=1
+                                )
+                                # Some kernels may return [X, Y, k] where X*Y = (end_idx - start_idx).
+                                # Flatten leading dims and reshape to [1, expected_len, k] to match target.
+                                expected_len = end_idx - start_idx
+                                if (
+                                    gathered_vals.dim() == 3
+                                    and gathered_vals.shape[1] != expected_len
+                                ):
+                                    gathered_vals = gathered_vals.reshape(
+                                        1, expected_len, gathered_vals.shape[-1]
+                                    )
+                                if (
+                                    gathered_idx.dim() == 3
+                                    and gathered_idx.shape[1] != expected_len
+                                ):
+                                    gathered_idx = gathered_idx.reshape(
+                                        1, expected_len, gathered_idx.shape[-1]
+                                    )
+                                topk_vals_full[:, start_idx:end_idx, :] = gathered_vals
+                                topk_idx_full[:, start_idx:end_idx, :] = gathered_idx
+                    else:
+                        # Sequence packing must be enabled when CP > 1
+                        raise RuntimeError(
+                            "Context Parallelism (CP>1) requires sequence packing to be enabled."
+                        )
+                else:
+                    topk_vals_full = topk_vals_local
+                    topk_idx_full = topk_idx_local
+
+                if pack:
+                    batch_size = data_dict["input_ids"].shape[0]
+                    out_vals = torch.zeros(
+                        (batch_size, unpacked_seqlen, k),
+                        dtype=topk_vals_full.dtype,
+                        device=topk_vals_full.device,
+                    )
+                    out_idx = torch.zeros(
+                        (batch_size, unpacked_seqlen, k),
+                        dtype=topk_idx_full.dtype,
+                        device=topk_idx_full.device,
+                    )
+                    for i in range(batch_size):
+                        seq_len = int(seq_lengths[i].item())
+                        start_idx = int(cu_seqlens_padded[i].item())
+                        if seq_len > 0:
+                            out_vals[i, :seq_len, :] = topk_vals_full[
+                                0, start_idx : start_idx + seq_len, :
+                            ]
+                            out_idx[i, :seq_len, :] = topk_idx_full[
+                                0, start_idx : start_idx + seq_len, :
+                            ]
+                    return output_tensor.new_zeros(()), {
+                        "topk_logits": out_vals,
+                        "topk_indices": out_idx,
+                    }
+                else:
+                    return output_tensor.new_zeros(()), {
+                        "topk_logits": topk_vals_full,
+                        "topk_indices": topk_idx_full,
+                    }
+
+            return output_tensor, collection_fn
+
+        if self.cfg["dynamic_batching"]["enabled"]:
+            mb_iterator = data.make_microbatch_iterator_with_dynamic_shapes()
+            data_iterator_len = data.get_microbatch_iterator_dynamic_shapes_len()
+            micro_batch = logprob_batch_size
+        elif self.cfg["sequence_packing"]["enabled"]:
+            mb_iterator = data.make_microbatch_iterator_for_packable_sequences()
+            data_iterator_len, _ = (
+                data.get_microbatch_iterator_for_packable_sequences_len()
+            )
+            micro_batch = 1
+        else:
+            mb_iterator = data.make_microbatch_iterator(logprob_batch_size)
+            data_iterator_len = max(1, data.size // logprob_batch_size)
+            micro_batch = logprob_batch_size
+
+        forward_backward_func = get_forward_backward_func()
+        list_of_outputs = forward_backward_func(
+            forward_step_func=forward_step_fn,
+            data_iterator=mb_iterator,
+            model=self.model,
+            num_microbatches=data_iterator_len,
+            seq_length=pp_seq_dim_size,
+            micro_batch_size=micro_batch,
+            decoder_seq_length=pp_seq_dim_size,
+            forward_only=True,
+        )
+
+        if is_pipeline_last_stage(ignore_virtual=True):
+            logits_chunks = []
+            indices_chunks = []
+            for out in list_of_outputs:
+                tk = out["topk_logits"]
+                ti = out["topk_indices"]
+                pad_len = input_seq_dim_size - tk.shape[1]
+                if pad_len > 0:
+                    tk = torch.nn.functional.pad(tk, (0, 0, 0, pad_len), value=0.0)
+                    ti = torch.nn.functional.pad(ti, (0, 0, 0, pad_len), value=0)
+                logits_chunks.append(tk)
+                indices_chunks.append(ti)
+
+            topk_logits = torch.cat(logits_chunks, dim=0)
+            topk_indices = torch.cat(indices_chunks, dim=0)
+
+            topk_logits = broadcast_tensor(
+                topk_logits, torch.distributed.get_rank(), pp_grp
+            )
+            topk_indices = broadcast_tensor(
+                topk_indices, torch.distributed.get_rank(), pp_grp
+            )
+        else:
+            last_pp_rank = get_pipeline_model_parallel_last_rank()
+            topk_logits = broadcast_tensor(None, last_pp_rank, pp_grp)
+            topk_indices = broadcast_tensor(None, last_pp_rank, pp_grp)
+
+        no_grad.__exit__(None, None, None)
+        return BatchedDataDict.from_batches(
+            [{"topk_logits": topk_logits.cpu(), "topk_indices": topk_indices.cpu()}]
+        )
+
+    @wrap_with_nvtx_name("megatron_policy_worker/generate")
     def generate(
         self, *, data: BatchedDataDict[GenerationDatumSpec], greedy: bool = False
     ) -> BatchedDataDict[GenerationOutputSpec]:
@@ -1241,7 +1780,7 @@ def generate(
                 f"Input to Megatron Generation worker is not properly right-padded: {error_msg}"
             )
 
-        model_cfg = self.megatron_cfg.model_config
+        model_cfg = self.megatron_cfg.model
         inference_wrapper_config = InferenceWrapperConfig(
             hidden_size=model_cfg.hidden_size,
             inference_batch_times_seqlen_threshold=1000000,
@@ -1352,177 +1891,162 @@ def report_device_id(self) -> str:
         # Get device UUID using NVML
         return get_device_uuid(device_idx)
 
+    def get_zmq_address(self):
+        """Get the ZMQ address for the current device."""
+        return f"ipc:///tmp/{self.report_device_id()}.sock"
+
+    def maybe_init_zmq(self):
+        """Initialize the ZMQ socket if it doesn't exist."""
+        if not hasattr(self, "zmq_socket"):
+            self.zmq_context = zmq.Context()
+            self.zmq_socket = self.zmq_context.socket(zmq.REQ)
+            self.zmq_socket.setsockopt(
+                zmq.SNDTIMEO, 120000
+            )  # set timeout to 120 seconds
+            self.zmq_socket.setsockopt(
+                zmq.RCVTIMEO, 120000
+            )  # set timeout to 120 seconds
+            self.zmq_socket.setsockopt(zmq.LINGER, 0)
+            self.zmq_socket.bind(self.get_zmq_address())
+
     @torch.no_grad()
+    @wrap_with_nvtx_name("megatron_policy_worker/prepare_refit_info")
     def prepare_refit_info(self) -> None:
-        # Get parameter info for refit
-        ## param_info: list of ((name, shape, dtype), size_in_bytes) tuples
-        # Cannot cache refit_param_info_mcore since dtype and size_in_bytes for the 1st and 2nd steps may be different
-        ## e.g. e_score_correction_bias
-        refit_param_info_mcore = get_param_info(self.model, self.dtype)
-
-        # Create a map that maps any local parameter name to a list of global parameter names.
-        # This map is repeatedly used by parameter gatherring phase during refit of every step.
-        self.local_key_to_global_keys = get_local_key_to_global_keys(
-            self.model, state_dict_info=refit_param_info_mcore
+        """Prepare state dict metadata for weight refitting and IPC streaming."""
+        self.refit_param_info_mcore = self._calculate_refit_param_info()
+
+        # Collect tensor metadata for refit / hf side info
+        refit_param_info_hf = {}
+        hf_params_generator = self.megatron_bridge.export_hf_weights(
+            [self.model],
+            show_progress=False,
+            conversion_tasks=self.refit_conversion_tasks,  # used for metadata caching
         )
+        for name, tensor in hf_params_generator:
+            metadata = (tensor.shape, tensor.dtype)
+            refit_param_info_hf[name] = metadata
+        return refit_param_info_hf
+
+    def _calculate_refit_param_info(self) -> list[tuple[str, int]]:
+        """Calculate parameter information for refit.
+
+        Each task contains:
+        - param_name: Local parameter name without module prefixes
+        - mapping: MegatronParamMapping instance for weight transformation
+        - pp_rank: Pipeline-parallel rank owning the parameter
+        - vp_stage: Virtual-pipeline stage index
+        - megatron_module: Reference to Megatron model/submodule
+        - param_weight: Target parameter tensor for converted weight
 
-        # Collect tensor metadata for refit
-        self.refit_param_info_hf = {}
-        for key, _ in refit_param_info_mcore:
-            # gather megatron params
-            gathered_megatron_params = gather_params(
-                self.model,
-                [key],
-                key_to_global_keys=self.local_key_to_global_keys,
-            )
-            # convert to hf params
-            gathered_hf_params = self.megatron_to_hf_converter.convert(
-                gathered_megatron_params, self.model.config
-            )
-            # collect tensor metadata
-            for name, tensor in gathered_hf_params.items():
-                self.refit_param_info_hf[name] = (
-                    tensor.shape,
-                    tensor.dtype,
-                    tensor.numel(),
+        Returns:
+            List of (parameter_name, size_in_bytes) tuples.
+        """
+        self.refit_conversion_tasks = self.megatron_bridge.get_conversion_tasks(
+            [self.model]
+        )
+        param_info = []
+
+        def calculate_size_in_bytes(param, tp_size, ep_size):
+            if param is None:
+                # need to broadcast for other pp ranks
+                size_in_bytes = None
+            else:
+                # Calculate size for this parameter
+                prec_to_bytes = {
+                    torch.bfloat16: 2,
+                    torch.float16: 2,
+                    torch.float32: 4,
+                }
+                scale = prec_to_bytes[self.dtype] / prec_to_bytes[param.dtype]
+                size_in_bytes = (
+                    param.element_size() * param.numel() * tp_size * ep_size * scale
                 )
 
-        return self.refit_param_info_hf
+            # Broadcast size_in_bytes across pipeline parallel ranks
+            return broadcast_object_across_pp_ranks(size_in_bytes)
 
-    @torch.no_grad()
-    def prepare_weights_for_ipc(self) -> tuple[list[tuple[str, int]], float]:
-        """Prepare Megatron model weights for IPC transfer to vLLM.
+        for task in self.refit_conversion_tasks:
+            param_info.append(
+                (
+                    task.param_name,
+                    calculate_size_in_bytes(
+                        task.param_weight,
+                        task.mapping.tp_size,
+                        task.mapping.ep_size if task.mapping.is_expert else 1,
+                    ),
+                )
+            )
+        return param_info
 
-        Collects information about weight tensors (names and sizes).
-        Returns a list of (parameter_name, size_in_bytes) tuples.
-        """
+    def get_free_memory_bytes(self) -> int:
+        """Get the available free memory."""
         from nemo_rl.utils.nvml import get_free_memory_bytes
 
-        # Get parameter info for refit
-        ## param_info: list of ((name, shape, dtype), size_in_bytes) tuples
-        # Cannot cache refit_param_info_mcore since dtype and size_in_bytes for the 1st and 2nd steps may be different
-        ## e.g. e_score_correction_bias
-        refit_param_info_mcore = get_param_info(self.model, self.dtype)
-
-        # Collect current available memory for refit
-        ## Get current device index from torch
         device_idx = torch.cuda.current_device()
-        ## Get device free memory using NVML
-        total_available_bytes = get_free_memory_bytes(device_idx)
-        ## default to 20% to get some more speedup than 10%, OOM if set to 30%
-        memory_ratio = os.getenv("NRL_REFIT_BUFFER_MEMORY_RATIO", "0.2")
-        total_available_bytes *= float(memory_ratio)
-
-        return refit_param_info_mcore, total_available_bytes
+        return get_free_memory_bytes(device_idx)
 
-    # Temporary fix, 'keys' is a kwarg due to some sort of ray bug
     @torch.no_grad()
-    def get_weights_ipc_handles(self, *, keys: list[str]) -> dict[str, Any]:
-        """Get IPC handles for the requested Megatron model weights.
+    @wrap_with_nvtx_name("megatron_policy_worker/stream_weights_via_ipc_zmq")
+    def stream_weights_via_ipc_zmq(self, buffer_size_bytes: int = 0) -> None:
+        """Stream model weights to peer process via ZMQ IPC socket."""
+        self.maybe_init_zmq()
 
-        Args:
-            keys: List of parameter names to get handles for
-        Returns:
-            Dict mapping device UUID to list of (mapped_key, handle) tuples
-        """
-        if self._held_gather_buffer is not None:
-            del self._held_gather_buffer
-            self._held_gather_buffer = None
+        from nemo_rl.models.policy.utils import stream_weights_via_ipc_zmq_impl
 
-        gathered_megatron_params = gather_params(
-            self.model,
-            keys,
-            key_to_global_keys=self.local_key_to_global_keys,
+        # Generate HF parameters for streaming
+        hf_params_generator = self.megatron_bridge.export_hf_weights(
+            [self.model],
+            show_progress=False,
+            conversion_tasks=self.refit_conversion_tasks,  # used for metadata caching
         )
 
-        gathered_hf_params = self.megatron_to_hf_converter.convert(
-            gathered_megatron_params, self.model.config
+        # Use the shared implementation
+        stream_weights_via_ipc_zmq_impl(
+            params_generator=hf_params_generator,
+            buffer_size_bytes=buffer_size_bytes,
+            zmq_socket=self.zmq_socket,
+            rank=self.rank,
+            worker_name=str(self),
         )
 
-        # Get device UUID for IPC handles
-        device_uuid = self.report_device_id()
-        from torch.multiprocessing.reductions import reduce_tensor
-
-        # Create IPC handles for each parameter
-        tensor_number_threshold = os.getenv(
-            "NEMO_RL_MEGATRON_IPC_TENSOR_PACKING_THRESHOLD", "32"
-        )  # an arbitrary threshold
-        if len(gathered_hf_params) >= int(tensor_number_threshold):
-            pack_tensor_for_ipc = True
-        else:
-            pack_tensor_for_ipc = False
-
-        if pack_tensor_for_ipc:
-            # Pack tensors in gathered_hf_params into consolidated tensors by dtype
-            # First calculate total size needed for each dtype
-            type_to_total_size = defaultdict(lambda: 0)
-            tensor_metadata = dict()
-
-            # Record offset of the tensor
-            for key, tensor in gathered_hf_params.items():
-                # dtype for the 1st and 2nd steps may be different (e.g. e_score_correction_bias)
-                if tensor.dtype == self.refit_param_info_hf[key][1]:
-                    tensor_metadata[key] = type_to_total_size[tensor.dtype]
-                else:
-                    # also send dtype if it changes
-                    tensor_metadata[key] = (
-                        type_to_total_size[tensor.dtype],
-                        tensor.dtype,
-                    )
-                    # update record
-                    self.refit_param_info_hf[key] = (
-                        tensor.shape,
-                        tensor.dtype,
-                        tensor.numel(),
-                    )
-                type_to_total_size[tensor.dtype] += tensor.numel()
-
-            # Allocate consolidated tensors for each dtype
-            packed_tensors = {
-                dtype: torch.empty(
-                    total_size,
-                    device=next(iter(gathered_hf_params.values())).device,
-                    dtype=dtype,
-                    requires_grad=False,
-                )
-                for dtype, total_size in type_to_total_size.items()
-            }
-
-            # Copy tensors into consolidated buffers
-            for key, tensor in gathered_hf_params.items():
-                offset = tensor_metadata[key]
-                if isinstance(offset, tuple):
-                    offset, _ = offset
-                dtype = tensor.dtype
-                size = tensor.numel()
-                packed_tensors[dtype][offset : offset + size].copy_(
-                    tensor.detach().view(-1)
-                )
-
-            # Create IPC handles for consolidated tensors
-            all_handles = [
-                (dtype, reduce_tensor(tensor.detach()))
-                for dtype, tensor in packed_tensors.items()
-            ]
-
-            # Store reference to prevent garbage collection
-            self._held_gather_buffer = packed_tensors
-
-            serialized = (pack_tensor_for_ipc, all_handles, tensor_metadata)
-        else:
-            all_handles = []
-            for key, tensor in gathered_hf_params.items():
-                handle = reduce_tensor(tensor.detach())
-                all_handles.append((key, handle))
-            self._held_gather_buffer = gathered_hf_params
-            serialized = (False, all_handles)
+    @torch.no_grad()
+    def broadcast_weights_for_collective(self) -> None:
+        """Broadcast the weights for collective communication."""
+        hf_params_generator = self.megatron_bridge.export_hf_weights(
+            [self.model],
+            show_progress=False,
+            conversion_tasks=self.refit_conversion_tasks,  # used for metadata caching
+        )
 
-        return {device_uuid: serialized}
+        # param_iterator will return (name, tensor), we only need tensor
+        packed_broadcast_producer(
+            iterator=hf_params_generator,
+            group=self.model_update_group,
+            src=0,
+            post_iter_func=lambda x: x[1],
+        )
 
     def prepare_for_lp_inference(self):
         self.model = self.move_model(self.model, "cuda", move_grads=False)
         self.model.eval()
-        self.offload_before_refit()
+
+        # offload grads to cpu
+        self.model = self.move_model(
+            self.model, "cpu", move_params=False, move_grads=True
+        )  # get rid of grad buffers
+
+        # offload optimizer to cpu
+        torch.randn(1).cuda()  # wake up torch allocator
+        if (
+            hasattr(self, "optimizer")
+            and self.optimizer is not None
+            and not self.optimizer_cpu_offload
+            and self.offload_optimizer_for_logprob
+        ):
+            self.move_optimizer("cpu")
+
+        gc.collect()
+        torch.cuda.empty_cache()
 
     def prepare_for_training(self, *args, **kwargs):
         # onload models and optimizer state to cuda
@@ -1532,18 +2056,19 @@ def prepare_for_training(self, *args, **kwargs):
         self.model.train()
 
         # Move optimizer state to CUDA if it exists
-        if hasattr(self, "optimizer") and self.optimizer is not None:
-            if isinstance(self.optimizer, ChainedOptimizer):
-                optimizer_state = self.optimizer.state
-            else:
-                optimizer_state = self.optimizer._get_state()
-            for _, state in optimizer_state.items():
-                for k, v in state.items():
-                    if torch.is_tensor(v) and not v.is_cuda:
-                        state[k] = v.to("cuda")
+        # colocated generation will always offload optimizer to cuda before refit
+        if (
+            hasattr(self, "optimizer")
+            and self.optimizer is not None
+            and not self.optimizer_cpu_offload
+            and (self.offload_optimizer_for_logprob or self.is_generation_colocated)
+        ):
+            self.move_optimizer("cuda")
 
-        torch.cuda.empty_cache()
+        if self.cfg["megatron_cfg"]["empty_unused_memory_level"] >= 1:
+            torch.cuda.empty_cache()
 
+    @wrap_with_nvtx_name("megatron_policy_worker/offload_before_refit")
     def offload_before_refit(self):
         """Offload the optimizer and buffers to the CPU."""
         no_grad = torch.no_grad()
@@ -1557,19 +2082,12 @@ def offload_before_refit(self):
             self.model, "cpu", move_params=False, move_grads=True
         )  # get rid of grad buffers
         torch.randn(1).cuda()  # wake up torch allocator
-        if hasattr(self, "optimizer") and self.optimizer is not None:
-            # Iterate through the state dictionaries for each parameter group
-            if isinstance(self.optimizer, ChainedOptimizer):
-                optimizer_state = self.optimizer.state
-            else:
-                optimizer_state = self.optimizer._get_state()
-            for _, state in optimizer_state.items():
-                # Iterate through the state items (e.g., momentum, variance) for a parameter
-                for k, v in state.items():
-                    # Check if the item is a tensor and on the GPU
-                    if torch.is_tensor(v) and v.is_cuda:
-                        # Move the tensor to CPU and update the state dictionary
-                        state[k] = v.to("cpu")
+        if (
+            hasattr(self, "optimizer")
+            and self.optimizer is not None
+            and not self.optimizer_cpu_offload
+        ):
+            self.move_optimizer("cpu")
 
         gc.collect()
         torch.cuda.empty_cache()
@@ -1582,22 +2100,16 @@ def offload_before_refit(self):
         )
         no_grad.__exit__(None, None, None)
 
+    @wrap_with_nvtx_name("megatron_policy_worker/offload_after_refit")
     def offload_after_refit(self):
+        """Offload as much as possible on the CPU."""
         no_grad = torch.no_grad()
         no_grad.__enter__()
-        # Offload as much as possible on the CPU
         self.model = self.move_model(self.model, "cpu")
         self.model.eval()
         torch.randn(1).cuda()  # wake up torch allocator
         self.offload_before_refit()  # rerun the old offload function
 
-        if self._held_gather_buffer is not None:
-            del self._held_gather_buffer
-            self._held_gather_buffer = None
-
-        gc.collect()
-        torch.cuda.empty_cache()
-
         allocated = torch.cuda.memory_allocated() / (1024**3)  # Convert to GB
         reserved = torch.cuda.memory_reserved() / (1024**3)  # Convert to GB
         print(
@@ -1655,6 +2167,29 @@ def move_model(
                     model.load_state_dict(new_state_dict)
         return model
 
+    def move_optimizer(self, device: str):
+        # Iterate through the state dictionaries for each parameter group
+        if isinstance(self.optimizer, ChainedOptimizer):
+            optimizer_state = self.optimizer.state
+        else:
+            optimizer_state = self.optimizer._get_state()
+        for _, state in optimizer_state.items():
+            # Iterate through the state items (e.g., momentum, variance) for a parameter
+            for k, v in state.items():
+                # Check if the item is a tensor
+                if torch.is_tensor(v):
+                    # Move the tensor to device and update the state dictionary
+                    if device == "cpu":
+                        if v.is_cuda:
+                            state[k] = v.to("cpu")
+                    elif device == "cuda":
+                        if not v.is_cuda:
+                            state[k] = v.to("cuda")
+                    else:
+                        raise ValueError(
+                            f"Invalid device: {device}. Only strings 'cpu' and 'cuda' are supported."
+                        )
+
     def save_checkpoint(
         self,
         weights_path: str,
@@ -1677,15 +2212,17 @@ def save_checkpoint(
                 "Megatron core state or model is not initialized. Cannot save checkpoint."
             )
 
-        original_save_path = self.mcore_state.cfg.checkpoint_config.save
+        original_save_path = self.mcore_state.cfg.checkpoint.save
         # save_dir = os.path.dirname(weights_path)
         release_name = os.path.basename(weights_path)
 
         try:
             maybe_finalize_async_save(
-                ckpt_cfg=self.mcore_state.cfg.checkpoint_config, blocking=False
+                self.mcore_state,
+                ckpt_cfg=self.mcore_state.cfg.checkpoint,
+                blocking=False,
             )
-            self.mcore_state.cfg.checkpoint_config.save = weights_path
+            self.mcore_state.cfg.checkpoint.save = weights_path
 
             optimizer_to_save = None
             scheduler_to_save = None
@@ -1703,6 +2240,8 @@ def save_checkpoint(
             if not is_training:
                 self.model.eval()
 
+            if self.should_disable_forward_pre_hook:
+                self.disable_forward_pre_hook()
             save_checkpoint(
                 state=self.mcore_state,
                 model=[self.model],
@@ -1713,10 +2252,13 @@ def save_checkpoint(
             )
             print(f"Saved checkpoint to {weights_path}")
             maybe_finalize_async_save(
-                ckpt_cfg=self.mcore_state.cfg.checkpoint_config,
+                self.mcore_state,
+                ckpt_cfg=self.mcore_state.cfg.checkpoint,
                 blocking=True,
                 terminate=True,
             )
+            if self.should_disable_forward_pre_hook:
+                self.enable_forward_pre_hook()
 
             if not is_training:  # Restore training state if it was changed
                 self.model.train()
@@ -1725,7 +2267,7 @@ def save_checkpoint(
             print(f"Failed to save checkpoint to {weights_path}: {e}")
             raise
         finally:
-            self.mcore_state.cfg.checkpoint_config.save = original_save_path
+            self.mcore_state.cfg.checkpoint.save = original_save_path
 
     def load_checkpoint(self, weights_path: str, optimizer_path: Optional[str] = None):
         """Load a training checkpoint.
@@ -1741,7 +2283,10 @@ def load_checkpoint(self, weights_path: str, optimizer_path: Optional[str] = Non
 
     def shutdown(self):
         """Shutdown the policy."""
-        pass
+        # Clean up extension resources like ZMQ sockets
+        if hasattr(self, "zmq_socket"):
+            self.zmq_socket.close()
+            self.zmq_context.term()
 
     def start_gpu_profiling(self) -> None:
         """Start GPU profiling."""
@@ -1750,3 +2295,90 @@ def start_gpu_profiling(self) -> None:
     def stop_gpu_profiling(self) -> None:
         """Stop GPU profiling."""
         torch.cuda.profiler.stop()
+
+    def report_node_ip_and_gpu_id(self) -> list[tuple[str, int]]:
+        """Report the node IP and GPU ID of the current worker."""
+        ip = ray._private.services.get_node_ip_address()
+        gpu_id = ray.get_gpu_ids()[0]
+        return (ip, gpu_id)
+
+    def check_tensor_parallel_attributes(self) -> dict[str, Any]:
+        """Check tensor parallel attributes on model parameters.
+
+        Returns:
+            Dictionary containing information about tensor parallel parameters:
+            - tp_params: List of parameter names that have tensor_model_parallel=True
+            - non_tp_params: List of parameter names that have tensor_model_parallel=False
+            - total_params: Total number of parameters checked
+            - tp_size: Tensor parallel size from config
+        """
+        tp_params = []
+        non_tp_params = []
+        total_params = 0
+
+        for name, param in self.model.named_parameters():
+            total_params += 1
+            tensor_model_parallel = getattr(param, "tensor_model_parallel", False)
+
+            if tensor_model_parallel:
+                tp_params.append(
+                    {
+                        "name": name,
+                        "tensor_model_parallel": tensor_model_parallel,
+                        "partition_dim": getattr(param, "partition_dim", None),
+                        "partition_stride": getattr(param, "partition_stride", None),
+                        "shape": list(param.shape),
+                    }
+                )
+            else:
+                non_tp_params.append(
+                    {
+                        "name": name,
+                        "tensor_model_parallel": tensor_model_parallel,
+                        "shape": list(param.shape),
+                    }
+                )
+
+        return {
+            "tp_params": tp_params,
+            "non_tp_params": non_tp_params,
+            "total_params": total_params,
+            "tp_size": self.megatron_cfg.model.tensor_model_parallel_size,
+        }
+
+
+class CustomFloat16Module(Float16Module):
+    """Float 16 Module.
+
+    Attributes:
+        config (TransformerConfig): Transformer config
+        fp16 (bool) : Specifies if the model runs in fp16 mode
+        bf16 (bool) : Specifies if the model runs in bf16 mode
+
+    Args:
+        config (TransformerConfig): The transformer config used to initalize the model
+    """
+
+    def __init__(self, config: TransformerConfig, module: torch.nn.Module):
+        super(CustomFloat16Module, self).__init__(config, module)
+        self.re_enable_float32_expert_bias()
+
+    def re_enable_float32_expert_bias(self) -> None:
+        """Ensure MoE router expert bias stays in float32 for numerical stability.
+
+        Walks the wrapped module to find MoE routers and invokes the
+        `_maintain_float32_expert_bias()` helper which recreates or casts the
+        expert bias tensors to float32 as required by Megatron-LM.
+        """
+        module = self.module
+        # Handle VLM models where language model is nested
+        if hasattr(module, "language_model"):
+            module = module.language_model
+        if hasattr(module, "decoder") and hasattr(module.decoder, "layers"):
+            for layer in module.decoder.layers:
+                mlp = getattr(layer, "mlp", None)
+                router = getattr(mlp, "router", None) if mlp is not None else None
+                if router is not None and hasattr(
+                    router, "_maintain_float32_expert_bias"
+                ):
+                    router._maintain_float32_expert_bias()
diff --git a/nemo_rl/models/policy/utils.py b/nemo_rl/models/policy/utils.py
index a61e5e20b7..0d12c60ecd 100644
--- a/nemo_rl/models/policy/utils.py
+++ b/nemo_rl/models/policy/utils.py
@@ -12,15 +12,178 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import gc
 import importlib
 import os
-from typing import Any
+import traceback
+from enum import Enum
+from typing import Any, Dict, Optional
 
 import torch
-from transformers import AutoConfig
+import zmq
+from torch.multiprocessing.reductions import rebuild_cuda_tensor
+from transformers import (
+    AutoConfig,
+    AutoModelForCausalLM,
+    AutoModelForImageTextToText,
+    AutoModelForTextToWaveform,
+)
+
+# Try to import nemo_automodel classes, fallback to None if not available
+try:
+    from nemo_automodel.components._transformers.auto_model import (
+        NeMoAutoModelForCausalLM,
+        NeMoAutoModelForImageTextToText,
+        NeMoAutoModelForTextToWaveform,
+    )
+
+    NEMO_AUTOMODEL_AVAILABLE = True
+except ImportError:
+    # nemo_automodel is not installed, classes will be None
+    NeMoAutoModelForCausalLM = None  # type: ignore
+    NeMoAutoModelForImageTextToText = None  # type: ignore
+    NeMoAutoModelForTextToWaveform = None  # type: ignore
+    NEMO_AUTOMODEL_AVAILABLE = False
 
 from nemo_rl.distributed.worker_group_utils import get_nsight_config_if_pattern_matches
 
+# an automodel factory for loading the huggingface models from correct class
+
+AUTOMODEL_FACTORY: Dict[str, Any] = {
+    "qwen2_5_vl": AutoModelForImageTextToText,
+    "qwen2_vl": AutoModelForImageTextToText,
+    "qwen2_5_omni": AutoModelForTextToWaveform,
+    "llava": AutoModelForImageTextToText,
+    "internvl": AutoModelForImageTextToText,
+    "gemma3": AutoModelForImageTextToText,
+    "smolvlm": AutoModelForImageTextToText,
+    "mistral3": AutoModelForImageTextToText,
+    "llama4": AutoModelForImageTextToText,
+}
+
+if NEMO_AUTOMODEL_AVAILABLE:
+    AUTOMODEL_FACTORY = {
+        "qwen2_5_vl": NeMoAutoModelForImageTextToText,
+        "qwen2_vl": NeMoAutoModelForImageTextToText,
+        "qwen2_5_omni": NeMoAutoModelForTextToWaveform,
+        "llava": NeMoAutoModelForImageTextToText,
+        "internvl": NeMoAutoModelForImageTextToText,
+        "gemma3": NeMoAutoModelForImageTextToText,
+        "smolvlm": NeMoAutoModelForImageTextToText,
+        "mistral3": NeMoAutoModelForImageTextToText,
+        "llama4": NeMoAutoModelForImageTextToText,
+    }
+
+
+class IPCProtocol(Enum):
+    """IPC protocol constants for ZMQ weight streaming."""
+
+    COMPLETE = "complete"
+    ACK = "ack"
+
+
+def apply_top_k_top_p(
+    logits: torch.Tensor,
+    top_k: Optional[int] = None,
+    top_p: Optional[float] = None,
+) -> torch.Tensor:
+    """Apply top-k and top-p masks to the logits.
+
+    Simplified version of VLLM's implementation for scalar parameters.
+
+    Based on VLLM's implementation:
+    https://github.com/vllm-project/vllm/blob/34a20c49b3f81f64133428b3a0d62309db1256f9/vllm/v1/sample/ops/topk_topp_sampler.py
+    SPDX-License-Identifier: Apache-2.0
+    Copyright contributors to the vLLM project
+
+    Args:
+        logits: Input logits tensor of shape [batch_size, seq_len, vocab_size]
+        top_k: Top-k sampling parameter. Set to -1 to consider all tokens.
+        top_p: Top-p (nucleus) sampling parameter. Must be in (0, 1]. Set to 1 to consider all tokens.
+
+    Returns:
+        Filtered logits with sampling parameters applied
+    """
+    if top_p is None or top_p == 1.0:
+        if top_k is None or top_k == -1:
+            return logits
+        # Avoid sorting vocab for top-k only case
+        return apply_top_k_only(logits, top_k)
+
+    # Apply top-p (requires sorting)
+    logits_sort, logits_idx = logits.sort(dim=-1, descending=False)
+
+    if top_k is not None and top_k != -1:
+        # Apply top-k first
+        top_k_index = logits_sort.size(-1) - top_k
+        # Get all the top_k values - need to broadcast the index across all dimensions
+        index_tensor = torch.full(
+            logits_sort.shape[:-1],
+            top_k_index,
+            device=logits_sort.device,
+            dtype=torch.long,
+        )
+        top_k_threshold = logits_sort.gather(-1, index_tensor.unsqueeze(-1))
+        top_k_mask = logits_sort < top_k_threshold
+        logits_sort.masked_fill_(top_k_mask, -float("inf"))
+
+    # Apply top-p
+    probs_sort = logits_sort.softmax(dim=-1)
+    probs_sum = torch.cumsum(probs_sort, dim=-1)
+    top_p_mask = probs_sum <= 1 - top_p
+    # at least one
+    top_p_mask[..., -1] = False
+    logits_sort.masked_fill_(top_p_mask, -float("inf"))
+
+    # Re-sort the probabilities
+    logits = logits_sort.scatter(dim=-1, index=logits_idx, src=logits_sort)
+    return logits
+
+
+def apply_top_k_only(
+    logits: torch.Tensor,
+    top_k: int,
+) -> torch.Tensor:
+    """Apply top-k mask to the logits.
+
+    Simplified version of VLLM's implementation for scalar parameters.
+    This implementation doesn't involve sorting the entire vocab.
+
+    Based on VLLM's implementation:
+    https://github.com/vllm-project/vllm/blob/34a20c49b3f81f64133428b3a0d62309db1256f9/vllm/v1/sample/ops/topk_topp_sampler.py
+    SPDX-License-Identifier: Apache-2.0
+    Copyright contributors to the vLLM project
+
+    Args:
+        logits: Input logits tensor of shape [batch_size, seq_len, vocab_size]
+        top_k: Top-k sampling parameter.
+
+    Returns:
+        Filtered logits with top-k applied
+    """
+    if top_k >= logits.shape[-1] or top_k == -1:
+        return logits
+
+    # Get top-k values and create mask
+    top_k_values, _ = torch.topk(logits, top_k, dim=-1)
+    threshold = top_k_values[..., -1:].expand_as(logits)
+    mask = logits >= threshold
+
+    # Apply mask: keep top-k values, set others to -inf
+    logits = torch.where(
+        mask,
+        logits,
+        torch.tensor(-float("inf"), device=logits.device, dtype=logits.dtype),
+    )
+    return logits
+
+
+def resolve_model_class(model_name: str) -> Any:
+    """Resolve the appropriate model class for a given model name."""
+    if NEMO_AUTOMODEL_AVAILABLE:
+        return AUTOMODEL_FACTORY.get(model_name.lower(), NeMoAutoModelForCausalLM)
+    return AUTOMODEL_FACTORY.get(model_name.lower(), AutoModelForCausalLM)
+
 
 def is_vllm_v1_engine_enabled() -> bool:
     """Check if vLLM V1 engine is enabled.
@@ -140,47 +303,14 @@ def sliding_window_overwrite(model_name: str) -> dict[str, Any]:
     return overwrite_dict
 
 
-def configure_expandable_segments() -> None:
-    """Configure expandable_segments on Hopper and newer architectures (compute capability 9.x+).
+def configure_dynamo_cache() -> None:
+    """Disable dynamo autotune_local_cache.
 
-    This helps with memory allocation but causes crashes on Ampere GPUs, so we only enable it
-    on newer architectures. If PYTORCH_CUDA_ALLOC_CONF is already set, preserves existing values.
+    Dynamo may fail at cached_autotune when there's already a cache with different order of node_bundles.
+    Disable autotune_local_cache as a workaround.
+    See https://github.com/pytorch/pytorch/issues/153791 for more details.
     """
-    compute_capability = torch.cuda.get_device_properties(0).major
-
-    if compute_capability >= 9:  # Hopper+
-        existing_conf = os.environ.get("PYTORCH_CUDA_ALLOC_CONF", "")
-
-        # Check if expandable_segments is already configured
-        if "expandable_segments" in existing_conf:
-            print(f"expandable_segments already configured: {existing_conf}")
-            # Already configured, don't override
-            return
-
-        # Add expandable_segments to existing configuration
-        if existing_conf:
-            # Append to existing configuration
-            new_conf = f"{existing_conf},expandable_segments:True"
-        else:
-            # Set new configuration
-            new_conf = "expandable_segments:True"
-
-        print(f"Setting PYTORCH_CUDA_ALLOC_CONF to {new_conf}")
-        os.environ["PYTORCH_CUDA_ALLOC_CONF"] = new_conf
-
-    else:
-        ## make sure that expandable_segments is not set to True
-        if "expandable_segments" in os.environ.get("PYTORCH_CUDA_ALLOC_CONF", ""):
-            conf_items = os.environ["PYTORCH_CUDA_ALLOC_CONF"].split(",")
-            for item in conf_items:
-                if item.strip().startswith("expandable_segments"):
-                    key_value = item.split(":")
-                    if len(key_value) == 2 and key_value[1].strip().lower() == "true":
-                        raise RuntimeError(
-                            "expandable_segments is enabled in PYTORCH_CUDA_ALLOC_CONF, "
-                            "but this is not supported on architectures older than Hopper (compute capability < 9). "
-                            "Please set expandable_segments to False."
-                        )
+    torch._inductor.config.autotune_local_cache = False
 
 
 def get_runtime_env_for_policy_worker(policy_worker_name: str) -> dict[str, Any]:
@@ -220,3 +350,171 @@ def get_megatron_checkpoint_dir() -> str:
             )
     print(f"Using default megatron checkpoint dir: {checkpoint_dir}")
     return checkpoint_dir
+
+
+def get_handle_from_tensor(tensor: torch.Tensor) -> tuple[Any]:
+    """Get IPC handle from a tensor."""
+    from torch.multiprocessing.reductions import reduce_tensor
+
+    # skip serializing the function for better refit performance
+    return reduce_tensor(tensor.detach())[1:]
+
+
+def calculate_aligned_size(size_bytes: int, alignment: int = 512) -> int:
+    """Calculate aligned size for memory alignment.
+
+    Args:
+        size_bytes(int): Size in bytes to align
+        alignment(int): Alignment boundary in bytes (default 512)
+
+    Returns:
+        Aligned size in bytes(int).
+    """
+    return int(((size_bytes + alignment - 1) // alignment) * alignment)
+
+
+def stream_weights_via_ipc_zmq_impl(
+    params_generator, buffer_size_bytes: int, zmq_socket, rank: int, worker_name: str
+) -> None:
+    """Shared implementation for streaming weights via IPC ZMQ with improved memory management.
+
+    Uses ping-pong double buffering to enable overlapping communication while reusing buffers
+    to reduce memory allocation overhead and improve stability.
+
+    Args:
+        params_generator: Generator yielding (name, tensor) pairs
+        buffer_size_bytes: total size of buffer in bytes for batching parameters
+        zmq_socket: ZMQ socket for communication
+        rank: Worker rank for logging
+        worker_name: Name of the worker for logging
+    """
+    # Divide total buffer size by 2 because we use two individual buffers (ping-pong) for overlapping communication.
+    buffer_size_bytes = buffer_size_bytes // 2
+
+    def send_buffer_group_overlap(buffer, param_names, used_bytes, await_recv) -> bool:
+        """Send a group of parameters and return new pending_recv state."""
+        # Synchronize before getting IPC handle to ensure data is ready
+        torch.cuda.current_stream().synchronize()
+        cuda_ipc_handle = get_handle_from_tensor(buffer)
+
+        if await_recv:
+            zmq_socket.recv()
+
+        # Payload tuple: (cuda_ipc_handle, param_names, used_bytes)
+        payload = (cuda_ipc_handle, param_names, used_bytes)
+        zmq_socket.send_pyobj(payload)
+        return True  # pending_recv = True
+
+    def allocate_buffer(device):
+        """Allocate a new aligned buffer with proper memory alignment."""
+        aligned_size = calculate_aligned_size(buffer_size_bytes)
+        return torch.empty(
+            aligned_size,
+            device=device,
+            dtype=torch.uint8,
+            requires_grad=False,
+        )
+
+    def pack_tensor(buffer, tensor, used_bytes) -> int:
+        """Pack tensor into buffer and return new used_bytes."""
+        tensor_bytes = tensor.nbytes
+        buffer[used_bytes : used_bytes + tensor_bytes].data.copy_(
+            tensor.data.view(-1).view(dtype=torch.uint8), non_blocking=True
+        )
+        return used_bytes + calculate_aligned_size(tensor_bytes)
+
+    # Initialize ping-pong double buffering
+    buffer_a: torch.Tensor | None = None
+    buffer_b: torch.Tensor | None = None
+    current_buffer: torch.Tensor | None = None
+
+    used_bytes = 0
+    param_names = []
+    await_recv = False
+    count_of_groups = 0
+
+    try:
+        for name, tensor in params_generator:
+            # Initialize device and buffers on first tensor
+            if buffer_a is None:
+                buffer_a = allocate_buffer(tensor.device)
+                buffer_b = allocate_buffer(tensor.device)
+                current_buffer = buffer_a
+
+            aligned_size = calculate_aligned_size(tensor.nbytes)
+            assert aligned_size <= buffer_size_bytes, (
+                f"Parameter {name} too large for buffer: {aligned_size} > {buffer_size_bytes}"
+            )
+
+            # Check if we need to send current buffer and switch to the other one
+            if used_bytes + aligned_size > buffer_size_bytes:
+                await_recv = send_buffer_group_overlap(
+                    current_buffer, param_names, used_bytes, await_recv
+                )
+                count_of_groups += 1
+
+                # Switch buffers for ping-pong double buffering
+                current_buffer = buffer_b if current_buffer is buffer_a else buffer_a
+                used_bytes, param_names = 0, []
+
+            # Pack tensor into current buffer
+            param_names.append(name)
+            used_bytes = pack_tensor(current_buffer, tensor, used_bytes)
+
+        # Send remaining tensors
+        if param_names:
+            await_recv = send_buffer_group_overlap(
+                current_buffer, param_names, used_bytes, await_recv
+            )
+            count_of_groups += 1
+
+        # Complete transmission
+        if await_recv:
+            zmq_socket.recv()
+
+        # Final synchronization and completion signal
+        torch.cuda.current_stream().synchronize()
+        zmq_socket.send_pyobj(IPCProtocol.COMPLETE)
+        zmq_socket.recv()
+
+        if rank == 0:
+            print(
+                f"{worker_name}: Packed {count_of_groups} groups of tensors", flush=True
+            )
+
+    except zmq.Again:
+        timeout_ms = zmq_socket.getsockopt(zmq.RCVTIMEO)
+        raise TimeoutError(
+            f"{worker_name} (rank {rank}): ZMQ communication timeout after {timeout_ms}ms in policy worker side. "
+            f"The generation worker may be dead or unresponsive. "
+            f"This typically indicates the generation worker has crashed or is not responding to weight streaming."
+        ) from None
+    except zmq.ZMQError as e:
+        raise RuntimeError(
+            f"{worker_name} (rank {rank}): ZMQ error during weight streaming: {e} (errno: {e.errno}). "
+            f"Error details: {e.strerror}. "
+            f"This may indicate network issues or the peer process has terminated unexpectedly.\n"
+            f"{traceback.format_exc()}"
+        ) from e
+
+    finally:
+        # Clean up buffers in finally block to ensure cleanup even on exceptions
+        if buffer_a is not None:
+            del buffer_a
+        if buffer_b is not None:
+            del buffer_b
+
+        # Force garbage collection and clear CUDA cache
+        gc.collect()
+        torch.cuda.empty_cache()
+
+
+def rebuild_cuda_tensor_from_ipc(
+    cuda_ipc_handle: tuple, device_id: int
+) -> torch.Tensor:
+    """Rebuild a CUDA tensor from an IPC handle."""
+    func = rebuild_cuda_tensor
+    args = cuda_ipc_handle[0]
+    list_args = list(args)
+    list_args[6] = device_id
+    return func(*list_args)
diff --git a/nemo_rl/package_info.py b/nemo_rl/package_info.py
index 29883366db..f54069691e 100644
--- a/nemo_rl/package_info.py
+++ b/nemo_rl/package_info.py
@@ -14,7 +14,7 @@
 
 
 MAJOR = 0
-MINOR = 3
+MINOR = 5
 PATCH = 0
 PRE_RELEASE = "rc0"
 
diff --git a/nemo_rl/utils/automodel_checkpoint.py b/nemo_rl/utils/automodel_checkpoint.py
new file mode 100644
index 0000000000..a9f0793851
--- /dev/null
+++ b/nemo_rl/utils/automodel_checkpoint.py
@@ -0,0 +1,240 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Checkpoint management utilities for HF models."""
+
+import os
+from typing import Any, Optional
+
+import torch
+from nemo_automodel.components.checkpoint._backports.filesystem import (
+    SerializationFormat,
+)
+
+# Apply torch backports for compatibility with torch==2.7.1
+from nemo_automodel.components.checkpoint._torch_backports import apply_patches
+
+# Import from nemo-automodel
+from nemo_automodel.components.checkpoint.checkpointing import (
+    CheckpointingConfig,
+    load_model,
+    load_optimizer,
+    save_model,
+    save_optimizer,
+)
+
+# Apply torch backports for compatibility with torch==2.7.1
+apply_patches()
+
+
+def _infer_checkpoint_root(weights_path: str) -> str:
+    """Infer checkpoint root directory from weights path.
+
+    When weights_path ends with "…/weights/model", we need the parent of
+    the weights directory (the checkpoint root), not the weights directory itself.
+
+    Args:
+        weights_path: Path to model weights (e.g., "/path/to/policy/weights/model")
+
+    Returns:
+        str: Checkpoint root directory (e.g., "/path/to/policy")
+    """
+    weights_dir = os.path.dirname(weights_path)
+    if weights_dir.endswith("weights"):
+        return os.path.dirname(weights_dir)
+    return weights_dir
+
+
+def detect_checkpoint_format(weights_path: str) -> tuple[str, bool]:
+    """Detect model save format and PEFT status from checkpoint directory.
+
+    Args:
+        weights_path: Path to the checkpoint directory (e.g., weights/model)
+
+    Returns:
+        tuple: (model_save_format, is_peft) where:
+               model_save_format is "torch_save" for DCP or "safetensors" for safetensors
+               is_peft is True if PEFT/adapter patterns are detected
+    """
+    is_peft = False
+    model_save_format = "safetensors"
+    try:
+        # Iterate through all subdirectories and files recursively
+        all_files = []
+        for root, dirs, files in os.walk(weights_path):
+            all_files.extend(files)
+
+        if any(f.endswith(".distcp") for f in all_files):
+            model_save_format = "torch_save"
+        elif any(f.endswith(".safetensors") for f in all_files):
+            model_save_format = "safetensors"
+        elif any(f.endswith((".bin", ".pt", ".pth")) for f in all_files):
+            model_save_format = "torch_save"
+
+        if not is_peft:
+            is_peft = any("adapter" in f.lower() for f in all_files)
+
+    except (OSError, PermissionError):
+        pass
+
+    return model_save_format, is_peft
+
+
+def save_checkpoint(
+    model: torch.nn.Module,
+    weights_path: str,
+    optimizer: Optional[torch.optim.Optimizer] = None,
+    scheduler: Optional[Any] = None,
+    optimizer_path: Optional[str] = None,
+    tokenizer: Optional[Any] = None,
+    tokenizer_path: Optional[str] = None,
+    model_save_format: str = "safetensors",
+    is_peft: bool = False,
+    peft_config: Optional[Any] = None,
+    save_consolidated: bool = False,
+    model_state_dict_keys: Optional[list[str]] = None,
+) -> None:
+    """Save a checkpoint of the model and optionally optimizer state.
+
+    Args:
+        model: The PyTorch model to save
+        weights_path: Path to save model weights
+        optimizer: Optional optimizer to save
+        scheduler: Optional scheduler to save
+        optimizer_path: Path to save optimizer state (required if optimizer provided)
+        tokenizer: Optional tokenizer to save
+        tokenizer_path: Path to save tokenizer state (required if tokenizer provided)
+        model_save_format: Format for saving model ("torch_save" or "safetensors")
+        is_peft: Whether the model uses PEFT
+        peft_config: PEFT configuration if is_peft is True
+        save_consolidated: Whether to save consolidated checkpoints (for HF compatibility)
+        model_state_dict_keys: Copy of the model state dict keys before any parallelization.
+                             If None, will be extracted from the model's current state dict.
+    """
+    # Create checkpoint config
+
+    # Extract model state dict keys if not provided
+    if model_state_dict_keys is None:
+        model_state_dict_keys = list(model.state_dict().keys())
+
+    valid_formats = {"safetensors", "torch_save"}
+    if model_save_format not in valid_formats:
+        raise ValueError(
+            f"Unsupported model_save_format='{model_save_format}'. "
+            f"Expected one of {sorted(valid_formats)}."
+        )
+
+    # Ensure target directories exist
+    os.makedirs(weights_path, exist_ok=True)
+    if optimizer_path:
+        os.makedirs(optimizer_path, exist_ok=True)
+    if tokenizer_path:
+        os.makedirs(tokenizer_path, exist_ok=True)
+
+    checkpoint_config = CheckpointingConfig(
+        enabled=True,
+        checkpoint_dir=_infer_checkpoint_root(weights_path),
+        model_save_format=model_save_format,
+        model_cache_dir="",
+        model_repo_id="",
+        save_consolidated=save_consolidated,
+        is_peft=is_peft,
+        model_state_dict_keys=model_state_dict_keys,
+    )
+
+    # Save model using nemo-automodel API
+    save_model(
+        model=model,
+        weights_path=weights_path,
+        checkpoint_config=checkpoint_config,
+        peft_config=peft_config,
+        tokenizer=tokenizer if tokenizer_path is None else None,
+    )
+
+    # Save optimizer if provided
+    if optimizer is not None:
+        if optimizer_path is None:
+            raise ValueError(
+                "optimizer_path must be provided when saving optimizer state"
+            )
+        save_optimizer(
+            optimizer=optimizer,
+            model=model,
+            weights_path=optimizer_path,
+            scheduler=scheduler,
+        )
+
+    # Save tokenizer separately if tokenizer_path provided
+    if tokenizer is not None and tokenizer_path is not None:
+        print(f"Saving tokenizer (or processor) to {tokenizer_path}")
+        tokenizer.save_pretrained(tokenizer_path)
+
+
+def load_checkpoint(
+    model: torch.nn.Module,
+    weights_path: str,
+    optimizer: Optional[torch.optim.Optimizer] = None,
+    scheduler: Optional[Any] = None,
+    optimizer_path: Optional[str] = None,
+) -> None:
+    """Load a model weights and optionally optimizer state.
+
+    Args:
+        model: The PyTorch model whose weights to update
+        weights_path: Path to load model weights from
+        optimizer: Optional optimizer to load state into
+        scheduler: Optional scheduler to load state into
+        optimizer_path: Path to load optimizer state from (required if optimizer provided)
+    """
+    print(f"Loading weights from {weights_path}")
+
+    model_save_format, is_peft = detect_checkpoint_format(weights_path)
+
+    try:
+        format_enum = SerializationFormat[model_save_format.upper()]
+
+        # append /model to the weights_path if it doesn't exist
+        # TODO: remove this once nemo-automodel is updated
+        if not weights_path.endswith("/model"):
+            weights_path = os.path.join(weights_path, "model")
+
+        # Load model using nemo-automodel API
+        load_model(
+            model=model,
+            model_path=weights_path,
+            model_save_format=format_enum,
+            is_peft=is_peft,
+        )
+    except FileNotFoundError as e:
+        msg = (
+            f"Failed to load model from '{weights_path}': {e}\n"
+            "Note: DTensorPolicyWorkerV2 expects:\n"
+            "  - Model shards under '<checkpoint_root>/weights/model'\n"
+            "  - Optimizer states under '<checkpoint_root>/optimizer/optim'\n"
+            "Please verify your checkpoint layout."
+        )
+        raise FileNotFoundError(msg) from e
+
+    if optimizer is not None:
+        if optimizer_path is None:
+            raise ValueError(
+                "optimizer_path must be provided when loading optimizer state"
+            )
+        print(f"Loading optimizer from {optimizer_path}")
+        load_optimizer(
+            optimizer=optimizer,
+            model=model,
+            weights_path=optimizer_path,
+            scheduler=scheduler,
+        )
diff --git a/nemo_rl/utils/checkpoint.py b/nemo_rl/utils/checkpoint.py
index 48231d76a8..05e0ee2f3a 100644
--- a/nemo_rl/utils/checkpoint.py
+++ b/nemo_rl/utils/checkpoint.py
@@ -20,6 +20,7 @@
 import glob
 import json
 import os
+import re
 import shutil
 import warnings
 from pathlib import Path
@@ -39,8 +40,15 @@ class CheckpointingConfig(TypedDict):
     enabled (bool): Whether checkpointing is enabled.
     checkpoint_dir (PathLike): Directory where checkpoints will be saved.
     metric_name (str | None): Name of the metric to use for determining best checkpoints.
+        Must be of the form "val:<metric_name>" or "train:<metric_name>" to indicate whether
+        the metric should be taken from the validation or training metrics.
     higher_is_better (bool): Whether higher values of the metric indicate better performance.
     keep_top_k (Optional[int]): Number of best checkpoints to keep. If None, all checkpoints are kept.
+    model_save_format (str | None): Format for saving model (v2 allowed values: "torch_save" or "safetensors", v1 allowed values: None).
+    save_consolidated (bool): Whether to save consolidated checkpoints (for HF compatibility).
+    model_cache_dir (str): Directory for model cache (for safetensors format).
+    model_repo_id (str): Repository ID for the model (for safetensors format).
+    is_peft (bool): Whether the model uses PEFT.
     """
 
     enabled: bool
@@ -49,6 +57,14 @@ class CheckpointingConfig(TypedDict):
     higher_is_better: bool
     save_period: int
     keep_top_k: NotRequired[int]
+    checkpoint_must_save_by: NotRequired[str | None]
+    # New nemo-automodel integration fields
+    model_save_format: NotRequired[str | None]  # Default: "safetensors"
+    save_consolidated: NotRequired[bool]  # Default: False
+    model_cache_dir: NotRequired[str]  # Default: ""
+    model_repo_id: NotRequired[str]  # Default: ""
+    is_peft: NotRequired[bool]  # Default: False
+    peft_config: NotRequired[Any]  # Default: None
 
 
 class CheckpointManager:
@@ -79,10 +95,17 @@ def __init__(self, config: CheckpointingConfig):
             config (CheckpointingConfig)
         """
         self.checkpoint_dir = Path(config["checkpoint_dir"])
-        self.metric_name = config["metric_name"]
+        self.metric_name: str | None = config["metric_name"]
         self.higher_is_better = config["higher_is_better"]
         self.keep_top_k = config["keep_top_k"]
 
+        # Store nemo-automodel specific config options
+        self.model_save_format = config.get("model_save_format", "safetensors")
+        self.save_consolidated = config.get("save_consolidated", False)
+        self.model_cache_dir = config.get("model_cache_dir", "")
+        self.model_repo_id = config.get("model_repo_id", "")
+        self.is_peft = config.get("is_peft", False)
+
     def init_tmp_checkpoint(
         self,
         step: int,
@@ -112,10 +135,11 @@ def init_tmp_checkpoint(
         # save training info
         with open(save_dir / "training_info.json", "w") as f:
             # make any numpy items serializable
-            for k, v in training_info.items():
+            serializable_training_info = dict(training_info)
+            for k, v in serializable_training_info.items():
                 if isinstance(v, torch.Tensor) or isinstance(v, np.ndarray):
-                    training_info[k] = v.item()
-            json.dump(training_info, f)
+                    serializable_training_info[k] = v.item()
+            json.dump(serializable_training_info, f)
 
         # save config
         if run_config is not None:
@@ -180,26 +204,18 @@ def remove_old_checkpoints(self, exclude_latest: bool = True) -> None:
         if self.metric_name is None:
             checkpoint_history.sort(key=lambda x: x[0], reverse=True)
         else:
-            try:
-                assert self.metric_name is not None  # Type checker hint
-                # sort by metric value first, then by step number (for equal metrics, prefer more recent)
-                if self.higher_is_better:
-                    # For higher_is_better=True: higher metric values first, then higher step numbers
-                    checkpoint_history.sort(
-                        key=lambda x: (x[2][self.metric_name], x[0]), reverse=True
-                    )
-                else:
-                    # For higher_is_better=False: lower metric values first, then higher step numbers for equal values
-                    checkpoint_history.sort(
-                        key=lambda x: (x[2][self.metric_name], -x[0])
-                    )
-            except KeyError:
-                warnings.warn(
-                    f"Metric {self.metric_name} not found in checkpoint history. Keeping most recent k checkpoints."
+            # sort by metric value first, then by step number (for equal metrics, prefer more recent)
+            if self.higher_is_better:
+                # For higher_is_better=True: higher metric values first, then higher step numbers
+                checkpoint_history.sort(
+                    key=lambda x: (x[2].get(self.metric_name, -float("inf")), x[0]),
+                    reverse=True,
+                )
+            else:
+                # For higher_is_better=False: lower metric values first, then higher step numbers for equal values
+                checkpoint_history.sort(
+                    key=lambda x: (x[2].get(self.metric_name, float("inf")), -x[0])
                 )
-                checkpoint_history.sort(key=lambda x: x[0], reverse=True)
-
-                self.metric_name = None
 
         # remove checkpoints that are not in the top-k
         for checkpoint in checkpoint_history[self.keep_top_k :]:
@@ -243,7 +259,11 @@ def get_latest_checkpoint_path(self) -> Optional[str]:
             Optional[str]: Path to the latest checkpoint, or None if no checkpoints exist.
         """
         # find checkpoint directory with highest step number
-        step_dirs = glob.glob(str(self.checkpoint_dir / "step_*"))
+        step_dirs = [
+            x
+            for x in glob.glob(str(self.checkpoint_dir / "step_*"))
+            if re.fullmatch(r"step_\d+", Path(x).name)
+        ]
         step_dirs.sort(key=lambda x: int(Path(x).name.split("_")[1]))
         if len(step_dirs) == 0:
             return None
@@ -283,7 +303,11 @@ def _load_checkpoint_history(
     checkpoint_history: list[tuple[int, PathLike, dict[str, Any]]] = []
 
     # Find all step directories
-    step_dirs = glob.glob(str(checkpoint_dir / "step_*"))
+    step_dirs = [
+        x
+        for x in glob.glob(str(checkpoint_dir / "step_*"))
+        if re.fullmatch(r"step_\d+", Path(x).name)
+    ]
 
     for step_dir in step_dirs:
         info_file = Path(step_dir) / "training_info.json"
diff --git a/nemo_rl/utils/flops_formulas.py b/nemo_rl/utils/flops_formulas.py
new file mode 100644
index 0000000000..fe55e3b4cb
--- /dev/null
+++ b/nemo_rl/utils/flops_formulas.py
@@ -0,0 +1,544 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from dataclasses import dataclass
+from typing import List, Optional, Union
+
+
+# lifted from NeMo/nemo/utils/flops_formulas.py
+@dataclass
+class FLOPSConfig:
+    """Contains the model hparams needed for FLOPS computations."""
+
+    gbs: int
+    enc_seq_len: Optional[int] = None
+    hs: Optional[int] = None
+    layers: Optional[int] = None
+    ffn_hs: Optional[int] = None
+    attention_heads: Optional[int] = None
+    moe_router_topk: Optional[int] = None
+    query_groups: Optional[int] = None
+    img_seq_len: Optional[int] = None
+    img_h: Optional[int] = None
+    img_w: Optional[int] = None
+    in_channels: Optional[int] = None
+    patch_dim: Optional[int] = None
+    class_token_len: Optional[int] = None
+    projector_type: Optional[str] = None
+    inp_s: Optional[int] = None
+    model_pattern: Optional[str] = None
+    vocab_size: Optional[int] = None
+    model_channels: Optional[int] = None
+    vec_in_dim: Optional[int] = None
+    q_lora_rank: Optional[int] = None
+    kv_lora_rank: Optional[int] = None
+    qk_head_dim: Optional[int] = None
+    qk_pos_emb_head_dim: Optional[int] = None
+    v_head_dim: Optional[int] = None
+    moe_layer_freq: Optional[Union[int, List[int]]] = None
+    moe_shared_expert_intermediate_size: Optional[int] = None
+    moe_ffn_hidden_size: Optional[int] = None
+    mtp_num_layers: Optional[int] = None
+    causal_self_attn: Optional[bool] = None
+    is_hybrid_model: bool = False
+    hybrid_override_pattern: Optional[str] = None
+    mamba_state_dim: Optional[int] = None
+    mamba_head_dim: Optional[int] = None
+    mamba_num_groups: Optional[int] = None
+    mamba_num_heads: Optional[int] = None
+
+
+def gpt3(config: FLOPSConfig):
+    """Model FLOPs for GPT3 family."""
+    return (
+        24 * config.gbs * config.enc_seq_len * config.hs * config.hs
+        + 4 * config.gbs * config.enc_seq_len * config.enc_seq_len * config.hs
+    ) * (3 * config.layers) + (
+        6 * config.gbs * config.enc_seq_len * config.hs * config.vocab_size
+    )
+
+
+def llama(config: FLOPSConfig):
+    """Model FLOPs for llama3 family."""
+    return (
+        config.gbs
+        * config.enc_seq_len
+        * config.layers
+        * config.hs
+        * config.hs
+        * (
+            12
+            + (12 * config.query_groups / config.attention_heads)
+            + (18 * config.ffn_hs / config.hs)
+            + (6 * config.enc_seq_len / config.hs)
+            + (6 * config.vocab_size / (config.layers * config.hs))
+        )
+    )
+
+
+def nemotron(config: FLOPSConfig):
+    """Model FLOPs for nemotron family."""
+    return (
+        config.gbs
+        * config.enc_seq_len
+        * config.layers
+        * config.hs
+        * config.hs
+        * (
+            12
+            + (12 * config.query_groups / config.attention_heads)
+            + (12 * config.ffn_hs / config.hs)
+            + (12 * config.enc_seq_len / config.hs)
+            + (6 * config.vocab_size / (config.layers * config.hs))
+        )
+    )
+
+
+def mixtral(config: FLOPSConfig):
+    """Model FLOPs for mixtral family."""
+    return (
+        config.gbs
+        * config.enc_seq_len
+        * config.layers
+        * config.hs
+        * config.hs
+        * (
+            12
+            + (12 * config.query_groups / config.attention_heads)
+            + (18 * config.moe_router_topk * config.ffn_hs / config.hs)
+            + (12 * config.enc_seq_len / config.hs)
+            + (6 * config.vocab_size / (config.layers * config.hs))
+        )
+    )
+
+
+def qwen2(config: FLOPSConfig):
+    """Model FLOPs for Qwen2 family."""
+    causal_self_attn = True
+    seq_len = config.enc_seq_len
+    hidden_size = config.hs
+    gated_linear_multiplier = 2
+
+    # attention flops for GQA
+    attention_flops = (
+        3
+        * 2
+        * config.gbs
+        * config.layers
+        * seq_len
+        * hidden_size
+        * hidden_size
+        * (
+            (2 + 1)  # QKV gemm
+            + (
+                seq_len / hidden_size * 2 * (0.5 if causal_self_attn else 1)
+            )  # attention
+            + 1  # attention proj gemm
+        )
+    )
+
+    # mlp flops
+    mlp_flops = (
+        3
+        * 2
+        * config.gbs
+        * config.layers
+        * seq_len
+        * hidden_size
+        * (1 + gated_linear_multiplier)
+        * config.ffn_hs
+    )
+
+    # vocab flops
+    vocab_flops = 3 * 2 * config.gbs * seq_len * hidden_size * config.vocab_size
+
+    return attention_flops + mlp_flops + vocab_flops
+
+
+def qwen3(config: FLOPSConfig):
+    """Model FLOPs for Qwen3 family."""
+    causal_self_attn = True
+    seq_len = config.enc_seq_len
+    hidden_size = config.hs
+    gated_linear_multiplier = 2
+
+    # attention flops for GQA
+    attention_flops = (
+        3
+        * 2
+        * config.gbs
+        * config.layers
+        * seq_len
+        * hidden_size
+        * hidden_size
+        * (
+            (config.query_groups / config.attention_heads * 2 + 1)  # QKV gemm
+            + (
+                seq_len / hidden_size * 2 * (0.5 if causal_self_attn else 1)
+            )  # attention
+            + 1  # attention proj gemm
+        )
+    )
+
+    # mlp flops
+    mlp_flops = (
+        3
+        * 2
+        * config.gbs
+        * config.layers
+        * seq_len
+        * hidden_size
+        * (1 + gated_linear_multiplier)
+        * (config.moe_ffn_hidden_size * config.moe_router_topk)  # MoE layers
+    )
+
+    # vocab flops
+    vocab_flops = 3 * 2 * config.gbs * seq_len * hidden_size * config.vocab_size
+
+    return attention_flops + mlp_flops + vocab_flops
+
+
+def bert(config: FLOPSConfig):
+    """Model FLOPs for BERT family."""
+    return (
+        72
+        * config.gbs
+        * config.layers
+        * config.enc_seq_len
+        * config.hs
+        * config.hs
+        * (
+            1
+            + (config.enc_seq_len / (6 * config.hs))
+            + (config.vocab_size / (12 * config.hs * config.layers))
+        )
+    )
+
+
+def transformer(config: FLOPSConfig):
+    """Calculate FLOPs for a standard Transformer model.
+
+    Note: This does not cover encoder-decoder models.
+    """
+    # Extract parameters from config
+    batch_size = config.gbs
+    hidden_size = config.hs
+    seq_length = config.enc_seq_len
+    num_layers = config.layers
+    num_attention_heads = config.attention_heads
+    ffn_hidden_size = config.ffn_hs
+    vocab_size = config.vocab_size
+
+    if vocab_size is None:
+        raise ValueError("vocab_size is required for transformer FLOPs calculation")
+
+    # Handle optional parameters with reasonable defaults
+    query_groups = (
+        config.query_groups if config.query_groups is not None else num_attention_heads
+    )
+    causal_self_attn = (
+        config.causal_self_attn if config.causal_self_attn is not None else False
+    )
+    moe_router_topk = (
+        config.moe_router_topk if config.moe_router_topk is not None else 0
+    )
+    kv_channels = hidden_size // num_attention_heads  # Standard dimension per head
+
+    # Calculate query projection size and ratio
+    query_projection_size = kv_channels * num_attention_heads
+    query_projection_to_hidden_size_ratio = query_projection_size / hidden_size
+
+    # MoE parameters - simplified for NeMo config
+    # In this implementation, we assume all layers are dense if num_experts is None
+    if moe_router_topk == 0:
+        num_dense_layers = num_layers
+        num_moe_layers = 0
+        num_experts_routed_to = 0
+    else:
+        # Simplified MoE handling - assuming uniform distribution of MoE layers
+        # This can be expanded based on NeMo's actual MoE implementation
+        num_moe_layers = num_layers // 2  # Simplified assumption
+        num_dense_layers = num_layers - num_moe_layers
+        num_experts_routed_to = moe_router_topk
+
+    # Handle SwiGLU vs standard GELU/ReLU
+    # Default to standard activation (no SwiGLU)
+    gated_linear_multiplier = 1
+
+    # Define the expansion factor as described in the paper
+    # 3x: Each GEMM needs forward pass, backward wgrad, and backward dgrad
+    # 2x: GEMMs are stacked twice in standard Transformer architectures
+    # 2x: A GEMM of m*n with n*k requires 2mnk floating-point operations
+    expansion_factor = 3 * 2 * 2
+    # Attention
+    if not causal_self_attn:
+        attention_component = (
+            1
+            + (query_groups / num_attention_heads)
+            # Only half of the attention matrix is non-zero and needs to be multiplied with V
+            + (seq_length / hidden_size)  # If causal self attn -> divide by 2.
+        ) * query_projection_to_hidden_size_ratio
+    else:
+        attention_component = (
+            1
+            + (query_groups / num_attention_heads)
+            # Only half of the attention matrix is non-zero and needs to be multiplied with V
+            + (seq_length / hidden_size / 2)  # If causal self attn -> divide by 2.
+        ) * query_projection_to_hidden_size_ratio
+
+    # Calculate total FLOPs
+    total_flops = (
+        expansion_factor
+        * batch_size
+        * seq_length
+        * num_layers
+        * hidden_size
+        * hidden_size
+        * (
+            attention_component
+            # MLP component
+            + (
+                (
+                    # Dense layers
+                    (ffn_hidden_size * num_dense_layers)
+                    +
+                    # MoE layers
+                    (
+                        (
+                            # Routed experts
+                            ffn_hidden_size * num_experts_routed_to
+                            # Note: Shared experts are not implemented in this version
+                        )
+                        * num_moe_layers
+                    )
+                )
+                * gated_linear_multiplier
+                / (num_layers * hidden_size)
+            )
+            # Logit component
+            + (vocab_size / (2 * num_layers * hidden_size))
+        )
+    )
+
+    return total_flops
+
+
+def flux(config: FLOPSConfig):
+    """Model FLOPs for FLUX."""
+    hs = config.hs
+    seq_len = config.model_channels + config.inp_s
+    base_factor = 6 * config.gbs  # common multiplier for most terms
+
+    # Joint layer computations
+    joint_layer_flops = (
+        base_factor
+        * config.layers[0]
+        * (
+            10 * hs * hs  # hidden size operations
+            + 2
+            * hs
+            * (config.model_channels + config.inp_s)
+            * (1 + hs * 7)  # channel and context joint attention
+            + 2 * (config.model_channels + config.inp_s) * hs  # final projection
+        )
+    )
+
+    # Single layer computations
+    single_layer_flops = (
+        base_factor
+        * config.layers[1]
+        * seq_len
+        * hs
+        * (
+            3  # linear Y
+            + 1  # Modulation
+            + 4 * hs  # Linear computations
+            + (3 * hs + 2 * seq_len)  # attention operations
+            + 5 * hs  # feed-forward
+            + 1  # Modulation
+        )
+    )
+
+    # Embedding and projection layers
+    other_flops = base_factor * (
+        config.inp_s * config.in_channels * hs  # image embedding
+        + config.inp_s * hs * config.model_channels  # text embedding
+        + config.vec_in_dim * hs
+        + hs * hs  # vector embedding
+        + 2 * (config.model_channels * hs + hs * hs)  # guidance + timestep embedding
+        + (config.inp_s * config.in_channels * hs) / config.gbs  # final projection
+    )
+
+    return joint_layer_flops + single_layer_flops + other_flops
+
+
+def deepseekv3(config: FLOPSConfig):
+    """Model FLOPs for DeepSeek V3."""
+    # self-attention flops
+    bmm1_flops = (
+        0.5
+        * (config.qk_head_dim + config.qk_pos_emb_head_dim)
+        * config.attention_heads
+        * (config.enc_seq_len**2)
+    )
+    bmm2_flops = (
+        0.5 * config.v_head_dim * config.attention_heads * (config.enc_seq_len**2)
+    )
+    per_input_attention_flops = 6 * (bmm1_flops + bmm2_flops) * config.layers
+    if config.mtp_num_layers is not None:
+        per_input_attention_flops += (
+            6 * (bmm1_flops + bmm2_flops) * config.mtp_num_layers
+        )
+
+    # linear layer flops
+    # Q projection: check if using MLA (q_lora_rank is set) or standard attention
+    if config.q_lora_rank is not None:
+        # MLA for Q (e.g., DeepSeek-V3)
+        per_layer_mla_params = config.hs * config.q_lora_rank + config.q_lora_rank * (
+            (config.qk_head_dim + config.qk_pos_emb_head_dim) * config.attention_heads
+        )  # Q
+    else:
+        # Standard attention for Q (e.g., Moonlight)
+        per_layer_mla_params = config.hs * (
+            (config.qk_head_dim + config.qk_pos_emb_head_dim) * config.attention_heads
+        )  # Q
+
+    per_layer_mla_params += config.hs * config.qk_pos_emb_head_dim  # K^R
+    per_layer_mla_params += config.hs * config.kv_lora_rank + config.kv_lora_rank * (
+        (config.qk_head_dim + config.v_head_dim) * config.attention_heads
+    )  # K^C and V^C
+    per_layer_mla_params += (
+        config.v_head_dim * config.attention_heads * config.hs
+    )  # Proj
+    mla_params = per_layer_mla_params * config.layers
+    if config.mtp_num_layers is not None:
+        mla_params += per_layer_mla_params * config.mtp_num_layers
+
+    dense_layer_ffn_params = config.hs * config.ffn_hs * 3  # gated linear unit
+    per_shared_expert_params = (
+        config.hs * config.moe_shared_expert_intermediate_size * 3
+    )
+    per_selected_expert_params = config.hs * config.moe_ffn_hidden_size * 3
+    ffn_params = 0
+
+    if isinstance(config.moe_layer_freq, int):
+        moe_layer_pattern = [
+            1 if (i % config.moe_layer_freq == 0) else 0 for i in range(config.layers)
+        ]
+    else:
+        moe_layer_pattern = config.moe_layer_freq
+    for i in moe_layer_pattern:
+        if i == 0:
+            ffn_params += dense_layer_ffn_params
+        else:
+            ffn_params += per_shared_expert_params + (
+                per_selected_expert_params * config.moe_router_topk
+            )
+    if config.mtp_num_layers is not None:
+        for i in range(config.mtp_num_layers):
+            ffn_params += per_shared_expert_params + (
+                per_selected_expert_params * config.moe_router_topk
+            )
+    per_input_params = mla_params + ffn_params
+    per_input_linear_flops = 6 * per_input_params * config.enc_seq_len
+
+    # vocab flops
+    per_input_vocab_flops = 6 * config.vocab_size * config.hs * config.enc_seq_len
+    if config.mtp_num_layers is not None:
+        for i in range(config.mtp_num_layers):
+            per_input_vocab_flops += (
+                6 * config.vocab_size * config.hs * config.enc_seq_len
+            )
+            per_input_vocab_flops += 6 * config.hs * 2 * config.hs * config.enc_seq_len
+
+    return (
+        per_input_attention_flops + per_input_linear_flops + per_input_vocab_flops
+    ) * config.gbs
+
+
+def _mlp_layer_flops(config: FLOPSConfig):
+    """Model FLOPs for MLP layer."""
+    return (
+        6
+        * config.gbs
+        * config.enc_seq_len
+        * config.hs
+        * config.ffn_hs
+        * (2 if config.gated_linear_unit else 1)
+    )
+
+
+def _non_mla_attn_layer_flops(config: FLOPSConfig):
+    """Model FLOPs for attention layer."""
+    return (
+        6
+        * config.gbs
+        * config.enc_seq_len
+        * config.hs
+        * (
+            config.hs  # Q
+            + config.query_groups / config.attention_heads * config.hs * 2  # KV
+            + config.enc_seq_len / 2 * 2
+            + config.hs
+        )
+    )
+
+
+def _mamba_layer_flops(config: FLOPSConfig):
+    """Model FLOPs for Mamba layer. We ignore part of the flops of scan because the chunk size is not known from model config."""
+    assert config.mamba_state_dim is not None
+    assert config.mamba_head_dim is not None
+
+    if config.mamba_num_heads:
+        nheads = config.mamba_num_heads
+    else:
+        nheads = 2 * config.hs // config.mamba_head_dim  # default expand is 2
+    d_in = nheads * config.mamba_head_dim
+    return (
+        (
+            6
+            * config.gbs
+            * config.enc_seq_len
+            * config.hs
+            * (2 * d_in + 2 * config.mamba_num_groups * config.mamba_state_dim + nheads)
+        )
+        + (3 * 2 * config.gbs * config.enc_seq_len * d_in * config.mamba_state_dim)
+        + (6 * config.gbs * config.enc_seq_len * d_in * config.hs)
+    )
+
+
+def _hybrid_model_flops(config: FLOPSConfig):
+    """Model FLOPs for hybrid model."""
+    assert config.is_hybrid_model == True
+    assert config.hybrid_override_pattern is not None
+
+    num_attn_layers, num_mamba_layers, num_mlp_layers = 0, 0, 0
+    for c in config.hybrid_override_pattern:
+        if c == "M":
+            num_mamba_layers += 1
+        elif c == "-":
+            num_mlp_layers += 1
+        elif c == "*":
+            num_attn_layers += 1
+    return (
+        num_attn_layers * _non_mla_attn_layer_flops(config)
+        + num_mamba_layers * _mamba_layer_flops(config)
+        + num_mlp_layers * _mlp_layer_flops(config)
+        + 6 * config.gbs * config.enc_seq_len * config.hs * config.vocab_size
+    )
+
+
+def nemotronh(config: FLOPSConfig):
+    """Model FLOPs for NemotronH."""
+    return _hybrid_model_flops(config)
diff --git a/nemo_rl/utils/flops_tracker.py b/nemo_rl/utils/flops_tracker.py
new file mode 100644
index 0000000000..5bf462b2cb
--- /dev/null
+++ b/nemo_rl/utils/flops_tracker.py
@@ -0,0 +1,183 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from dataclasses import asdict
+from typing import Callable, Optional
+
+import torch
+from packaging.version import Version as PkgVersion
+from transformers import AutoConfig
+from transformers.configuration_utils import PretrainedConfig
+from transformers.models.llama.configuration_llama import LlamaConfig
+from transformers.models.qwen2.configuration_qwen2 import Qwen2Config
+from transformers.models.qwen3.configuration_qwen3 import Qwen3Config
+from transformers.models.qwen3_moe.configuration_qwen3_moe import Qwen3MoeConfig
+
+from nemo_rl.models.policy.utils import sliding_window_overwrite
+from nemo_rl.utils.flops_formulas import FLOPSConfig, deepseekv3, llama, qwen2, qwen3
+
+
+def get_default_hf_config(model_name: str) -> PretrainedConfig:
+    """Get the default Hugging Face config for a model.
+
+    Both the DTensor and MCore paths use the same default config, we initialize the model config
+    here to allow computation of theoretical flops which is agnostic to the backend.
+    """
+    return AutoConfig.from_pretrained(
+        model_name,
+        torch_dtype=torch.float32,
+        trust_remote_code=True,
+        **sliding_window_overwrite(model_name),
+    )
+
+
+def convert_config_to_flops_config(
+    config: PretrainedConfig,
+) -> tuple[FLOPSConfig, Callable]:
+    """Convert a pretrained config to a tuple containing a FLOPSConfig and a flops formula."""
+    if isinstance(config, Qwen2Config):
+        return FLOPSConfig(
+            gbs=0,
+            hs=config.hidden_size,
+            layers=config.num_hidden_layers,
+            ffn_hs=config.intermediate_size,
+            vocab_size=config.vocab_size,
+        ), qwen2
+    elif isinstance(config, (Qwen3Config, Qwen3MoeConfig)):
+        return FLOPSConfig(
+            gbs=0,
+            hs=config.hidden_size,
+            layers=config.num_hidden_layers,
+            ffn_hs=config.intermediate_size,
+            vocab_size=config.vocab_size,
+            query_groups=config.num_key_value_heads,
+            attention_heads=config.num_attention_heads,
+            # for non-MoE models, we use the intermediate size as the ffn hidden size
+            moe_ffn_hidden_size=config.intermediate_size,
+            moe_router_topk=1,
+        ), qwen3
+    elif isinstance(config, LlamaConfig):
+        return FLOPSConfig(
+            gbs=0,
+            hs=config.hidden_size,
+            layers=config.num_hidden_layers,
+            ffn_hs=config.intermediate_size,
+            query_groups=config.num_key_value_heads,
+            attention_heads=config.num_attention_heads,
+            vocab_size=config.vocab_size,
+        ), llama
+    elif config.__class__.model_type == "deepseek_v3":
+        return FLOPSConfig(
+            gbs=0,
+            hs=config.hidden_size,
+            layers=config.num_hidden_layers,
+            ffn_hs=config.intermediate_size,
+            attention_heads=config.num_attention_heads,
+            moe_router_topk=config.num_experts_per_tok,
+            query_groups=config.num_key_value_heads,
+            vocab_size=config.vocab_size,
+            q_lora_rank=config.q_lora_rank,
+            kv_lora_rank=config.kv_lora_rank,
+            qk_head_dim=config.qk_nope_head_dim,
+            qk_pos_emb_head_dim=config.qk_rope_head_dim,
+            v_head_dim=config.v_head_dim,
+            moe_layer_freq=1,
+            moe_shared_expert_intermediate_size=config.moe_intermediate_size,
+            moe_ffn_hidden_size=config.moe_intermediate_size,
+            mtp_num_layers=0,
+            causal_self_attn=True,
+        ), deepseekv3
+    else:
+        raise ValueError(f"Unsupported config type: {type(config)}")
+
+
+def is_using_tf32() -> bool:
+    """Check if the current device is using TF32."""
+    if PkgVersion(torch.__version__) < PkgVersion("2.9.0a0"):
+        return torch.backends.cuda.matmul.allow_tf32
+    else:
+        return torch.backends.cuda.matmul.fp32_precision == "tf32"
+
+
+THEORETICAL_TFLOPS = {
+    ("NVIDIA A100 80GB PCIe", torch.bfloat16): 624 / 2,
+    ("NVIDIA A100 80GB PCIe", torch.float32): 312 / 2 if is_using_tf32() else 19.5,
+    ("NVIDIA H100 80GB HBM3", torch.bfloat16): 1979 / 2,
+    ("NVIDIA H100 80GB HBM3", torch.float32): 989 / 2 if is_using_tf32() else 67.0,
+    ("NVIDIA B200", torch.bfloat16): 4500 / 2,
+    ("NVIDIA B200", torch.float32): 2200 / 2 if is_using_tf32() else 80.0,
+    ("NVIDIA B300", torch.bfloat16): 4500 / 2,
+    ("NVIDIA B300", torch.float32): 2200 / 2 if is_using_tf32() else 80.0,
+    ("NVIDIA GB200", torch.bfloat16): 4900 / 2,
+    ("NVIDIA GB200", torch.float32): 2500 / 2 if is_using_tf32() else 80.0,
+    ("NVIDIA GB300", torch.bfloat16): 4900 / 2,
+    ("NVIDIA GB300", torch.float32): 2500 / 2 if is_using_tf32() else 80.0,
+}
+
+
+def get_theoretical_tflops(device_name: str, model_dtype: torch.dtype) -> float:
+    """Get the theoretical total flops for a device name."""
+    if (device_name, model_dtype) in THEORETICAL_TFLOPS:
+        return THEORETICAL_TFLOPS[(device_name, model_dtype)]
+    else:
+        raise ValueError(
+            f"Unknown device name: {device_name} and dtype name: {model_dtype}"
+        )
+
+
+class FLOPTracker:
+    def __init__(
+        self,
+        model_name: str,
+        base_config: FLOPSConfig | None = None,
+        flops_formula: Callable[[FLOPSConfig], float] | None = None,
+    ):
+        self.model_name = model_name
+        self.base_config = base_config
+        self.total_flops = 0
+        self.flops_formula: Optional[Callable[[FLOPSConfig], float]] = flops_formula
+
+    @classmethod
+    def from_config(cls, model_name: str, config: PretrainedConfig) -> "FLOPTracker":
+        flops_config, flops_formula = convert_config_to_flops_config(config)
+        return cls(
+            model_name=model_name, base_config=flops_config, flops_formula=flops_formula
+        )
+
+    def track(self, n_samples: int, padded_seq_len: int):
+        if self.flops_formula is None:
+            raise ValueError("Flops formula is not set")
+
+        base_config_dict = (
+            asdict(self.base_config) if self.base_config is not None else {}
+        )
+
+        # Override gbs and enc_seq_len with current values
+        config_dict = {
+            **base_config_dict,
+            "gbs": n_samples,
+            "enc_seq_len": padded_seq_len,
+        }
+
+        # Compute and accumulate flops
+        flops = self.flops_formula(FLOPSConfig(**config_dict))
+        self.total_flops += flops
+
+    def track_batch(self, sequence_lengths: list[int]):
+        """Track the flops for a batch of sequences."""
+        for seq_len in sequence_lengths:
+            self.track(n_samples=1, padded_seq_len=seq_len)
+
+    def reset(self):
+        self.total_flops = 0
diff --git a/nemo_rl/utils/logger.py b/nemo_rl/utils/logger.py
index 4cf2621cd4..eef08b2e70 100644
--- a/nemo_rl/utils/logger.py
+++ b/nemo_rl/utils/logger.py
@@ -28,6 +28,7 @@
 import mlflow
 import ray
 import requests
+import swanlab
 import torch
 import wandb
 from matplotlib import pyplot as plt
@@ -51,6 +52,11 @@ class WandbConfig(TypedDict):
     name: NotRequired[str]
 
 
+class SwanlabConfig(TypedDict):
+    project: NotRequired[str]
+    name: NotRequired[str]
+
+
 class TensorboardConfig(TypedDict):
     log_dir: NotRequired[str]
 
@@ -59,6 +65,7 @@ class MLflowConfig(TypedDict):
     experiment_name: str
     run_name: str
     tracking_uri: NotRequired[str]
+    artifact_location: NotRequired[str | None]
 
 
 class GPUMonitoringConfig(TypedDict):
@@ -69,14 +76,16 @@ class GPUMonitoringConfig(TypedDict):
 class LoggerConfig(TypedDict):
     log_dir: str
     wandb_enabled: bool
+    swanlab_enabled: bool
     tensorboard_enabled: bool
     mlflow_enabled: bool
     wandb: WandbConfig
-    tensorboard: TensorboardConfig
+    tensorboard: NotRequired[TensorboardConfig]
+    swanlab: NotRequired[SwanlabConfig]
     mlflow: NotRequired[MLflowConfig]
     monitor_gpus: bool
     gpu_monitoring: GPUMonitoringConfig
-    num_val_samples_to_print: int
+    num_val_samples_to_print: NotRequired[int]
 
 
 class LoggerInterface(ABC):
@@ -122,9 +131,26 @@ def log_metrics(
             step_metric: Optional step metric name (ignored in TensorBoard)
         """
         for name, value in metrics.items():
+            # Penguin will add additional metrics like wandb histograms. However, some people will log to Tensorboard instead which may not be compatible
+            # This logic catches non-compatible objects being logged.
+            if not isinstance(value, (int, float, bool, str)):
+                continue
+
             if prefix:
                 name = f"{prefix}/{name}"
-            self.writer.add_scalar(name, value, step)
+
+            # Skip non-scalar values that TensorBoard can't handle
+            if isinstance(value, (dict, list)):
+                print(
+                    f"Warning: Skipping non-scalar metric '{name}' for TensorBoard logging (type: {type(value).__name__})"
+                )
+                continue
+
+            try:
+                self.writer.add_scalar(name, value, step)
+            except Exception as e:
+                print(f"Warning: Failed to log metric '{name}' to TensorBoard: {e}")
+                continue
 
     def log_hyperparams(self, params: Mapping[str, Any]) -> None:
         """Log hyperparameters to Tensorboard.
@@ -324,6 +350,75 @@ def log_plot(self, figure: plt.Figure, step: int, name: str) -> None:
         self.run.log({name: figure}, step=step)
 
 
+class SwanlabLogger(LoggerInterface):
+    """SwanLab logger backend."""
+
+    def __init__(self, cfg: SwanlabConfig, log_dir: Optional[str] = None):
+        self.run = swanlab.init(**cfg, logdir=log_dir)
+        print(
+            f"Initialized SwanlabLogger for project {cfg.get('project')}, run {cfg.get('name')} (with offline logdir={log_dir})"
+        )
+
+    def define_metric(
+        self,
+        name: str,
+        step_metric: Optional[str] = None,
+    ) -> None:
+        """Define a metric with custom step metric.
+
+        Args:
+            name: Name of the metric or pattern (e.g. 'ray/*')
+            step_metric: Optional name of the step metric to use
+        """
+        self.run.define_metric(name, step_metric=step_metric)
+
+    def log_metrics(
+        self,
+        metrics: dict[str, Any],
+        step: int,
+        prefix: Optional[str] = "",
+        step_metric: Optional[str] = None,
+    ) -> None:
+        """Log metrics to swanlab.
+
+        Args:
+            metrics: Dict of metrics to log
+            step: Global step value
+            prefix: Optional prefix for metric names
+            step_metric: Optional name of a field in metrics to use as step instead
+                         of the provided step value
+        """
+        if prefix:
+            metrics = {
+                f"{prefix}/{k}" if k != step_metric else k: v
+                for k, v in metrics.items()
+            }
+
+        # If step_metric is provided, use the corresponding value from metrics as step
+        if step_metric and step_metric in metrics:
+            # commit=False so the step does not get incremented
+            self.run.log(metrics, commit=False)
+        else:
+            self.run.log(metrics, step=step)
+
+    def log_hyperparams(self, params: Mapping[str, Any]) -> None:
+        """Log hyperparameters to swanlab.
+
+        Args:
+            params: Dict of hyperparameters to log
+        """
+        self.run.config.update(params)
+
+    def log_plot(self, figure: plt.Figure, step: int, name: str) -> None:
+        """Log a plot to swanlab.
+
+        Args:
+            figure: Matplotlib figure to log
+            step: Global step value
+        """
+        self.run.log({name: figure}, step=step)
+
+
 class GpuMetricSnapshot(TypedDict):
     step: int
     metrics: dict[str, Any]
@@ -630,31 +725,30 @@ def __init__(self, cfg: MLflowConfig, log_dir: Optional[str] = None):
 
         Args:
             cfg: MLflow configuration
-            log_dir: Optional log directory
+            log_dir: Optional log directory (used as fallback if artifact_location not in cfg)
         """
-        if cfg["tracking_uri"]:
-            mlflow.set_tracking_uri(cfg["tracking_uri"])
+        tracking_uri = cfg.get("tracking_uri")
+        if tracking_uri:
+            mlflow.set_tracking_uri(tracking_uri)
 
-        experiment = mlflow.get_experiment_by_name(cfg["experiment_name"])
+        experiment_name = cfg["experiment_name"]
+        experiment = mlflow.get_experiment_by_name(experiment_name)
         if experiment is None:
-            if log_dir:
-                mlflow.create_experiment(
-                    name=cfg["experiment_name"],
-                    artifact_location=log_dir,
-                )
-            else:
-                mlflow.create_experiment(cfg["experiment_name"])
+            mlflow.create_experiment(
+                name=experiment_name,
+                **{"artifact_location": cfg.get("artifact_location", log_dir)}
+                if "artifact_location" in cfg or log_dir
+                else {},
+            )
         else:
-            mlflow.set_experiment(cfg["experiment_name"])
+            mlflow.set_experiment(experiment_name)
 
         # Start run
-        run_kwargs: dict[str, str] = {}
-        run_kwargs["run_name"] = cfg["run_name"]
-
+        run_name = cfg["run_name"]
+        run_kwargs = {"run_name": run_name}
         self.run = mlflow.start_run(**run_kwargs)
         print(
-            f"Initialized MLflowLogger for experiment {cfg['experiment_name']}, "
-            f"run {cfg['run_name']}"
+            f"Initialized MLflowLogger for experiment {experiment_name}, run {run_name}"
         )
 
     def log_metrics(
@@ -727,6 +821,7 @@ def __init__(self, cfg: LoggerConfig):
         """
         self.loggers: list[LoggerInterface] = []
         self.wandb_logger = None
+        self.swanlab_logger = None
 
         self.base_log_dir = cfg["log_dir"]
         os.makedirs(self.base_log_dir, exist_ok=True)
@@ -737,6 +832,12 @@ def __init__(self, cfg: LoggerConfig):
             self.wandb_logger = WandbLogger(cfg["wandb"], log_dir=wandb_log_dir)
             self.loggers.append(self.wandb_logger)
 
+        if cfg["swanlab_enabled"]:
+            swanlab_log_dir = os.path.join(self.base_log_dir, "swanlab")
+            os.makedirs(swanlab_log_dir, exist_ok=True)
+            self.swanlab_logger = SwanlabLogger(cfg["swanlab"], log_dir=swanlab_log_dir)
+            self.loggers.append(self.swanlab_logger)
+
         if cfg["tensorboard_enabled"]:
             tensorboard_log_dir = os.path.join(self.base_log_dir, "tensorboard")
             os.makedirs(tensorboard_log_dir, exist_ok=True)
@@ -746,8 +847,10 @@ def __init__(self, cfg: LoggerConfig):
             self.loggers.append(tensorboard_logger)
 
         if cfg["mlflow_enabled"]:
-            mlflow_log_dir = os.path.join(self.base_log_dir, "mlflow")
-            os.makedirs(mlflow_log_dir, exist_ok=True)
+            mlflow_log_dir = self.base_log_dir
+            if mlflow_log_dir:
+                mlflow_log_dir = os.path.join(mlflow_log_dir, "mlflow")
+                os.makedirs(mlflow_log_dir, exist_ok=True)
             mlflow_logger = MLflowLogger(cfg["mlflow"], log_dir=mlflow_log_dir)
             self.loggers.append(mlflow_logger)
 
@@ -761,6 +864,11 @@ def __init__(self, cfg: LoggerConfig):
                     f"{metric_prefix}/*", step_metric=step_metric
                 )
 
+            if cfg["swanlab_enabled"] and self.swanlab_logger:
+                self.swanlab_logger.define_metric(
+                    f"{metric_prefix}/*", step_metric=step_metric
+                )
+
             self.gpu_monitor = RayGpuMonitorLogger(
                 collection_interval=cfg["gpu_monitoring"]["collection_interval"],
                 flush_interval=cfg["gpu_monitoring"]["flush_interval"],
diff --git a/nemo_rl/utils/native_checkpoint.py b/nemo_rl/utils/native_checkpoint.py
index 8d21aae7d3..8e4d8fd017 100644
--- a/nemo_rl/utils/native_checkpoint.py
+++ b/nemo_rl/utils/native_checkpoint.py
@@ -152,6 +152,8 @@ def save_checkpoint(
         optimizer: Optional optimizer to save
         scheduler: Optional scheduler to save
         optimizer_path: Path to save optimizer state (required if optimizer provided)
+        tokenizer: Optional tokenizer to save
+        tokenizer_path: Path to save tokenizer state (required if tokenizer provided)
     """
     model_state = {"model": ModelState(model)}
     dcp.save(model_state, checkpoint_id=weights_path)
@@ -169,6 +171,7 @@ def save_checkpoint(
             raise ValueError(
                 "tokenizer_path must be provided when saving tokenizer state"
             )
+        print(f"Saving tokenizer (or processor) to {tokenizer_path}")
         tokenizer.save_pretrained(tokenizer_path)
 
 
@@ -208,6 +211,7 @@ def convert_dcp_to_hf(
     model_name_or_path: str,
     tokenizer_name_or_path: str,
     overwrite: bool = False,
+    hf_overrides: Optional[dict[str, Any]] = {},
 ) -> str:
     """Convert a Torch DCP checkpoint to a Hugging Face checkpoint.
 
@@ -244,7 +248,9 @@ def convert_dcp_to_hf(
     )
     torch.save(state_dict["model"], weights_path)
 
-    config = AutoConfig.from_pretrained(model_name_or_path, trust_remote_code=True)
+    config = AutoConfig.from_pretrained(
+        model_name_or_path, trust_remote_code=True, **hf_overrides
+    )
     config.save_pretrained(hf_ckpt_path)
 
     # TODO: After the following PR gets merged:
diff --git a/nemo_rl/utils/nsys.py b/nemo_rl/utils/nsys.py
index b5609f8c41..d9282970ab 100644
--- a/nemo_rl/utils/nsys.py
+++ b/nemo_rl/utils/nsys.py
@@ -16,6 +16,7 @@
 from typing import Protocol
 
 import rich
+import torch
 
 NRL_NSYS_WORKER_PATTERNS = os.environ.get("NRL_NSYS_WORKER_PATTERNS", "")
 NRL_NSYS_PROFILE_STEP_RANGE = os.environ.get("NRL_NSYS_PROFILE_STEP_RANGE", "")
@@ -76,3 +77,18 @@ def stop_profiler_on_exit():
             )
             policy.stop_gpu_profiling()
             policy.__NRL_PROFILE_STARTED = False
+
+
+def wrap_with_nvtx_name(name: str):
+    """A decorator to wrap a function with an NVTX range with the given name."""
+
+    def decorator(func):
+        def wrapper(*args, **kwargs):
+            torch.cuda.nvtx.range_push(name)
+            ret = func(*args, **kwargs)
+            torch.cuda.nvtx.range_pop()
+            return ret
+
+        return wrapper
+
+    return decorator
diff --git a/nemo_rl/utils/packed_tensor.py b/nemo_rl/utils/packed_tensor.py
new file mode 100644
index 0000000000..681a0dcc00
--- /dev/null
+++ b/nemo_rl/utils/packed_tensor.py
@@ -0,0 +1,203 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+import os
+from functools import lru_cache
+from typing import Any, List, Tuple
+
+import torch
+
+
+@lru_cache(maxsize=1)
+def get_target_packed_tensor_size():
+    memory_ratio = os.getenv("NRL_REFIT_BUFFER_MEMORY_RATIO", "0.02")
+    device = torch.device("cuda")
+    props = torch.cuda.get_device_properties(device)
+    total_memory_bytes = props.total_memory
+    # max size is 5GB
+    target_size = min(int(total_memory_bytes * float(memory_ratio)), 5 * 1024**3)
+    return target_size
+
+
+@lru_cache(maxsize=1)
+def get_num_buffers():
+    return int(os.getenv("NRL_REFIT_NUM_BUFFERS", "2"))
+
+
+def packed_broadcast_producer(iterator, group, src, post_iter_func):
+    """Broadcast a list of tensors in a packed manner.
+
+    Args:
+        iterator: iterator of model parameters. Returns a tuple of (name, tensor)
+        group: process group (vllm PyNcclCommunicator)
+        src: source rank (0 in current implementation)
+        post_iter_func: function to apply to each tensor before packing, should return a tensor
+
+    Returns:
+        None
+
+    """
+    target_packed_tensor_size = get_target_packed_tensor_size()
+
+    num_buffers = get_num_buffers()
+    streams = [torch.cuda.Stream() for _ in range(num_buffers)]
+    buffer_idx = 0
+
+    packing_tensor_list = [[] for _ in range(num_buffers)]
+    packing_tensor_sizes = [0 for _ in range(num_buffers)]
+    packed_tensors = [
+        torch.empty(0, dtype=torch.uint8, device="cuda") for _ in range(num_buffers)
+    ]
+
+    while True:
+        # Move to the next buffer
+        buffer_idx = (buffer_idx + 1) % num_buffers
+        # Synchronize the current stream
+        streams[buffer_idx].synchronize()
+        # Start tasks for the new buffer in a new stream
+        with torch.cuda.stream(streams[buffer_idx]):  # type: ignore[arg-type]
+            try:
+                # Initialize the packing tensor list and sizes
+                packing_tensor_list[buffer_idx] = []
+                packing_tensor_sizes[buffer_idx] = 0
+                # Pack the tensors
+                while True:
+                    # Apply backend specific post processing and then convert to linearized uint8 tensor
+                    tensor = post_iter_func(next(iterator)).view(torch.uint8).view(-1)
+                    packing_tensor_list[buffer_idx].append(tensor)
+                    packing_tensor_sizes[buffer_idx] += tensor.view(torch.uint8).numel()
+                    if packing_tensor_sizes[buffer_idx] > target_packed_tensor_size:
+                        break
+                # Pack the tensors and call broadcast collective
+                packed_tensors[buffer_idx] = torch.cat(
+                    packing_tensor_list[buffer_idx], dim=0
+                )
+                group.broadcast(packed_tensors[buffer_idx], src=src)
+            except StopIteration:
+                # do the last broadcast if there are remaining tensors
+                if len(packing_tensor_list[buffer_idx]) > 0:
+                    packed_tensors[buffer_idx] = torch.cat(
+                        packing_tensor_list[buffer_idx], dim=0
+                    )
+                    group.broadcast(packed_tensors[buffer_idx], src=src)
+                break
+
+
+def packed_broadcast_consumer(iterator, group, src, post_unpack_func):
+    """Consume a packed tensor and unpack it into a list of tensors.
+
+    Args:
+        iterator: iterator of model parameters. Returns a tuple of (name, tensor)
+        group: process group (vllm PyNcclCommunicator)
+        src: source rank (0 in current implementation)
+        post_unpack_func: function to apply to each tensor after unpacking
+
+    Returns:
+        None
+
+    """
+
+    def unpack_tensor(
+        packed_tensor: torch.Tensor, meta_data_list: list[Any]
+    ) -> List[Tuple[str, torch.Tensor]]:
+        """Unpack a single tensor into a list of tensors.
+
+        Args:
+            packed_tensor: the packed torch.uint8 tensor to unpack
+            meta_data_list: List[(name, shape, dtype, offset, tensor_size)]
+
+        Returns:
+            unpacked List[(name, tensor)]
+        """
+        unpacked_list = []
+        # Perform batched split with torch.split_with_sizes
+        packed_tensor_sizes = list(map(lambda x: x[4], meta_data_list))
+        unpacked_tensor = packed_tensor.split_with_sizes(packed_tensor_sizes)
+
+        # unpacked_list = List[(name, torch.Tensor.view(dtype).view(*shape))]
+        unpacked_list = [
+            (
+                meta_data_list[i][0],
+                tensor.view(meta_data_list[i][2]).view(*meta_data_list[i][1]),
+            )
+            for i, tensor in enumerate(unpacked_tensor)
+        ]
+
+        return unpacked_list
+
+    target_packed_tensor_size = get_target_packed_tensor_size()
+
+    num_buffers = get_num_buffers()
+    streams = [torch.cuda.Stream() for _ in range(num_buffers)]
+    buffer_idx = 0
+
+    packing_tensor_meta_data = [[] for _ in range(num_buffers)]
+    packing_tensor_sizes = [0 for _ in range(num_buffers)]
+    offsets = [0 for _ in range(num_buffers)]
+    packed_tensors = [
+        torch.empty(0, dtype=torch.uint8, device="cuda") for _ in range(num_buffers)
+    ]
+
+    while True:
+        # Move to the next buffer
+        buffer_idx = (buffer_idx + 1) % num_buffers
+        # Synchronize the current stream
+        streams[buffer_idx].synchronize()
+        with torch.cuda.stream(streams[buffer_idx]):  # type: ignore[arg-type]
+            # Initialize the packing tensor meta data
+            packing_tensor_meta_data[buffer_idx] = []
+            packing_tensor_sizes[buffer_idx] = 0
+            offsets[buffer_idx] = 0
+            try:
+                # Form a packed tensor
+                while True:
+                    name, (shape, dtype) = next(iterator)
+                    tensor_size = math.prod(shape) * dtype.itemsize
+                    packing_tensor_meta_data[buffer_idx].append(
+                        (name, shape, dtype, offsets[buffer_idx], tensor_size)
+                    )
+                    packing_tensor_sizes[buffer_idx] += tensor_size
+                    offsets[buffer_idx] += tensor_size
+                    if packing_tensor_sizes[buffer_idx] > target_packed_tensor_size:
+                        break
+                # Create a packed tensor and broadcast it
+                packed_tensors[buffer_idx] = torch.empty(
+                    packing_tensor_sizes[buffer_idx], dtype=torch.uint8, device="cuda"
+                )
+                group.broadcast(packed_tensors[buffer_idx], src=src)
+                # Load the packed tensor into the model
+                post_unpack_func(
+                    unpack_tensor(
+                        packed_tensors[buffer_idx], packing_tensor_meta_data[buffer_idx]
+                    )
+                )
+            except StopIteration:
+                # do the last broadcast if there are remaining tensors
+                if len(packing_tensor_meta_data[buffer_idx]) > 0:
+                    # Create a packed tensor and broadcast it
+                    packed_tensors[buffer_idx] = torch.empty(
+                        packing_tensor_sizes[buffer_idx],
+                        dtype=torch.uint8,
+                        device="cuda",
+                    )
+                    group.broadcast(packed_tensors[buffer_idx], src=src)
+                    # Load the packed tensor into the model
+                    post_unpack_func(
+                        unpack_tensor(
+                            packed_tensors[buffer_idx],
+                            packing_tensor_meta_data[buffer_idx],
+                        )
+                    )
+                break
diff --git a/nemo_rl/utils/timer.py b/nemo_rl/utils/timer.py
index 4fdaffee98..5366d3f339 100644
--- a/nemo_rl/utils/timer.py
+++ b/nemo_rl/utils/timer.py
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import sys
 import time
 from contextlib import contextmanager
 from typing import Callable, Generator, Optional, Sequence, Union
@@ -245,3 +246,76 @@ def reset(self, label: Optional[str] = None) -> None:
         else:
             self._timers = {}
             self._start_times = {}
+
+
+def convert_to_seconds(time_string: str) -> int:
+    """Converts a time string in the format 'DD:HH:MM:SS' to total seconds.
+
+    Args:
+        time_string (str): Time duration string, e.g., '00:03:45:00'.
+
+    Returns:
+        int: Total time in seconds.
+    """
+    days, hours, minutes, seconds = map(int, time_string.split(":"))
+    return days * 86400 + hours * 3600 + minutes * 60 + seconds
+
+
+class TimeoutChecker:
+    def __init__(
+        self, timeout: Optional[str] = "00:03:45:00", fit_last_save_time: bool = False
+    ):
+        """Initializes the TimeoutChecker.
+
+        Args:
+            timeout (str or None): Timeout in format 'DD:HH:MM:SS'. If None, timeout is considered infinite.
+            fit_last_save_time (bool): If True, considers average iteration time when checking timeout.
+        """
+        super().__init__()
+        self.last_save_time = (
+            float("inf") if timeout is None else convert_to_seconds(timeout)
+        )
+        self.start_time = time.time()
+        self.last_saved = False
+        self.iteration_times = []
+        self.previous_iteration_time: Optional[float] = None
+        self.fit_last_save_time = fit_last_save_time
+
+    def check_save(self):
+        # Flush
+        sys.stdout.flush()
+        sys.stderr.flush()
+
+        # Already saved after timeout
+        if self.last_saved:
+            return False
+
+        current_time = time.time()
+        elapsed_time = current_time - self.start_time
+
+        if self.fit_last_save_time and self.iteration_times:
+            average_iteration_time = sum(self.iteration_times) / len(
+                self.iteration_times
+            )
+            if elapsed_time + average_iteration_time >= self.last_save_time:
+                self.last_saved = True
+                return True
+
+        if elapsed_time >= self.last_save_time:
+            self.last_saved = True
+            return True
+
+        return False
+
+    def start_iterations(self):
+        self.previous_iteration_time = time.time()
+
+    def mark_iteration(self):
+        sys.stdout.flush()
+        sys.stderr.flush()
+
+        current_time = time.time()
+        if self.previous_iteration_time is not None:
+            elapsed_time = current_time - self.previous_iteration_time
+            self.previous_iteration_time = current_time
+        self.iteration_times.append(elapsed_time)
diff --git a/pyproject.toml b/pyproject.toml
index 64a7ca8062..70c882a4d5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,142 +6,196 @@ build-backend = "setuptools.build_meta"
 packages = ["nemo_rl"]
 
 [tool.setuptools.dynamic]
-version = {attr = "nemo_rl.__version__"}  # any module attribute compatible with ast.literal_eval
-readme = {file = "README.md", content-type = "text/markdown"}
+version = { attr = "nemo_rl.__version__" }                      # any module attribute compatible with ast.literal_eval
+readme = { file = "README.md", content-type = "text/markdown" }
 
 [project]
 name = "nemo-rl"
-dynamic = [
-    "version",
-    "readme",
-]
+dynamic = ["version", "readme"]
 description = "NeMo RL: A Scalable and Efficient Post-Training Library for Models Ranging from 1 GPU to 1000s, and from Tiny to >100B Parameters"
 requires-python = ">=3.12"
-license = {text = "Apache 2.0"}
+license = { text = "Apache 2.0" }
 dependencies = [
-    "setuptools",
-    "ninja",  # for flash-attn parallel build
-    "torch==2.7.0",
-    "triton",
-    "colored==2.2.3",
-    "ray[default]==2.46.0",
-    "transformers>=4.51.0",
-    "wandb",
-    "numpy",
-    "datasets>=4.0.0",
-    "rich",
-    "math-verify",
-    "accelerate>=0.26",
-    "tensorboard",
-    "omegaconf",
-    "torchdata",
-    "nvidia-ml-py",
-    "hydra-core",
-    "tiktoken",
-    "blobfile",
-    "debugpy",
-    "nvtx",
-    "matplotlib",
-    "plotly",
-    "mlflow",
+  "setuptools",
+  "ninja",                                                                                                            # for flash-attn parallel build
+  "torch",  # version dictated by vllm
+  "triton; sys_platform == 'linux' and (platform_machine == 'x86_64' or platform_machine == 'aarch64')",
+  "colored==2.2.3",
+  "ray[default]==2.49.2",
+  "transformers>=4.55.4",
+  "wandb",
+  "numpy",
+  "datasets>=4.0.0",
+  "rich",
+  "math-verify",
+  "accelerate>=0.26",
+  "tensorboard",
+  "omegaconf",
+  "torchdata",
+  "nvidia-ml-py",
+  "hydra-core",
+  "tiktoken",
+  "blobfile",
+  "debugpy",
+  "nvtx",
+  "matplotlib",
+  "plotly",
+  "sympy>=1.14.0",
+  "pillow>=11.3.0",
+  "torchvision>=0.22.0",
+  "num2words>=0.5.14",                                                                                                # for SmolVLM
+  "mlflow>=3.5.0,<3.6.0",
+  "nvidia-nvshmem-cu12; sys_platform == 'linux' and (platform_machine == 'x86_64' or platform_machine == 'aarch64')", # for deep_ep build
+  "swanlab",
+  "pyzmq",
 ]
 
 [project.optional-dependencies]
 # Currently unused, but after https://github.com/NVIDIA-NeMo/RL/issues/501 is resolved, we should use this for the "BASE" PYEXECUTABLE
 automodel = [
-    # Flash-attn version should be selected to satisfy both TE + vLLM requirements (xformers in particular)
-    # https://github.com/NVIDIA/TransformerEngine/blob/v2.3/transformer_engine/pytorch/attention/dot_product_attention/utils.py#L108
-    # https://github.com/facebookresearch/xformers/blob/8354497deb2c04c67fbb2e2ad911e86530da0e90/xformers/ops/fmha/flash.py#L76
-    "flash-attn==2.7.4.post1",
+  "nemo-automodel",
+  # Flash-attn version should be selected to satisfy both TE + vLLM requirements (xformers in particular)
+  # https://github.com/NVIDIA/TransformerEngine/blob/v2.3/transformer_engine/pytorch/attention/dot_product_attention/utils.py#L108
+  # https://github.com/facebookresearch/xformers/blob/8354497deb2c04c67fbb2e2ad911e86530da0e90/xformers/ops/fmha/flash.py#L76
+  "vllm==0.18.0",      # Remove this once https://github.com/NVIDIA-NeMo/RL/issues/811 resolved
+  "flash-attn==2.8.3",
+  "mamba-ssm",
+  "causal-conv1d",
 ]
 vllm = [
-    "vllm==0.9.0",
-    # Remove this once https://github.com/NVIDIA-NeMo/RL/issues/501 resolved
-    "flash-attn==2.7.4.post1",
+  "cuda-python",
+  "deep_gemm @ git+https://github.com/deepseek-ai/DeepGEMM.git@7b6b5563b9d4c1ae07ffbce7f78ad3ac9204827c",
+  # deep_ep also needs libibverbs-dev
+  # sudo apt-get update
+  # sudo apt-get install libibverbs-dev
+  "deep_ep @ git+https://github.com/deepseek-ai/DeepEP.git@e3908bf5bd0cc6265bcb225d15cd8c996d4759ef",
+  "vllm==0.18.0",
+  "num2words>=0.5.14",
+  # Remove this once https://github.com/NVIDIA-NeMo/RL/issues/501 resolved
+  "flash-attn==2.8.3",
+  # Remove this once https://github.com/NVIDIA-NeMo/RL/issues/501 resolved
+  "mamba-ssm",
+  # Remove this once https://github.com/NVIDIA-NeMo/RL/issues/501 resolved
+  "causal-conv1d",
 ]
 mcore = [
-    # also need cudnn (https://developer.nvidia.com/cudnn-downloads?target_os=Linux&target_arch=x86_64&Distribution=Ubuntu&target_version=20.04&target_type=deb_network)
-    # wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.1-1_all.deb
-    # sudo dpkg -i cuda-keyring_1.1-1_all.deb
-    # sudo apt-get update
-    # sudo apt-get install cudnn-cuda-12
-    "transformer-engine[pytorch]==2.3.0",
-    "megatron-core",
-    "nemo-tron",
-    # Flash-attn version should be selected to satisfy both TE + vLLM requirements (xformers in particular)
-    # https://github.com/NVIDIA/TransformerEngine/blob/v2.3/transformer_engine/pytorch/attention/dot_product_attention/utils.py#L108
-    # https://github.com/facebookresearch/xformers/blob/8354497deb2c04c67fbb2e2ad911e86530da0e90/xformers/ops/fmha/flash.py#L76
-    "flash-attn==2.7.4.post1",
+  # also need cudnn (https://developer.nvidia.com/cudnn-downloads?target_os=Linux&target_arch=x86_64&Distribution=Ubuntu&target_version=20.04&target_type=deb_network)
+  # wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.1-1_all.deb
+  # sudo dpkg -i cuda-keyring_1.1-1_all.deb
+  # sudo apt-get update
+  # sudo apt-get install cudnn-cuda-12
+
+  # This dependency also needs to be compatible with the spec in Megatron-Bridge/pyproject.toml.
+  # It is specified here since we don't directly use Megatron-Bridge/pyproject.toml, but a proxy setup.py+pyproject.toml combo
+  # outside to allow "optionally" installing the megatron path. It's simpler to deal with transformer-engine here in the NeMo RL pyproject.toml
+  "transformer-engine[pytorch]==2.13.0",
+  "megatron-core",
+  "megatron-bridge",
+  # Remove this once https://github.com/NVIDIA-NeMo/RL/issues/501 resolved
+  "vllm==0.18.0",
+  # Flash-attn version should be selected to satisfy both TE + vLLM requirements (xformers in particular)
+  # https://github.com/NVIDIA/TransformerEngine/blob/v2.3/transformer_engine/pytorch/attention/dot_product_attention/utils.py#L108
+  # https://github.com/facebookresearch/xformers/blob/8354497deb2c04c67fbb2e2ad911e86530da0e90/xformers/ops/fmha/flash.py#L76
+  "flash-attn==2.8.3",
 ]
+penguin = ["penguin"]
 
 [dependency-groups]
 
 # This is a default group so that we install these even with bare `uv sync`
 build = [
-    # Build requirement for TE
-    "torch==2.7.0",
-    # Build requirement for TE
-    "setuptools",
-    "packaging",
-    "einops",
-    # Build requirement for nemo_run
-    "hatchling",
-    # Build requirement for mcore
-    "pybind11",
-    # Build requirement for flash-attn
-    "psutil",
+  # Build requirement for TE
+  "torch",  # version dictated by vllm
+  # Build requirement for TE
+  "setuptools",
+  "packaging",
+  "einops",
+  # Build requirement for nemo_run
+  "hatchling",
+  # Build requirement for mcore
+  "pybind11",
+  # Build requirement for flash-attn
+  "psutil",
 ]
 docs = [
-    "sphinx",
-    "sphinx-autobuild",  # For live doc serving while editing docs
-    "sphinx-autodoc2",  # For documenting Python API
-    "sphinx-copybutton",  # Adds a copy button for code blocks
-    "myst_parser",  # For our markdown docs
-    "nvidia-sphinx-theme",  # Our NVIDIA theme
+  "sphinx",
+  "sphinx-autobuild",          # For live doc serving while editing docs
+  "sphinx-autodoc2",           # For documenting Python API
+  "sphinx-copybutton",         # Adds a copy button for code blocks
+  "sphinx-design",             # For design components in docs
+  "myst_parser",               # For our markdown docs
+  "nvidia-sphinx-theme",       # Our NVIDIA theme
+  "gitpython>=3.1.45",         # To git-related information
+  "python-dotenv",             # For environment variable management
+  "sphinxcontrib-mermaid",     # For Mermaid diagram support
+  "swagger-plugin-for-sphinx", # For Swagger/OpenAPI documentation
 ]
 dev = [
-    "pre-commit==3.6.0",
-    "ruff==0.9.9",
-    "types-PyYAML",
-    "types-requests",
-    "pyrefly==0.24.2",
+  "pre-commit>=4.2.0",
+  "ruff==0.9.9",
+  "types-PyYAML",
+  "types-requests",
+  "pyrefly==0.24.2",
 ]
 test = [
-    "pytest>=7.0.0",
-    "pytest-timeout",
-    "pytest-cov",
-    "pytest-asyncio",
+  "pytest>=7.0.0",
+  "pytest-timeout",
+  "pytest-cov",
+  "pytest-asyncio",
+  "pytest-testmon",
 ]
 
 [tool.uv.sources]
 megatron-core = { workspace = true }
-nemo-tron = { workspace = true }
-# The NeMo Run source to be used by nemo-tron
+nemo-automodel = { workspace = true }
+megatron-bridge = { workspace = true }
+penguin = { workspace = true }
 nemo_run = { git = "https://github.com/NVIDIA-NeMo/Run", rev = "414f0077c648fde2c71bb1186e97ccbf96d6844c" }
 # torch/torchvision/triton all come from the torch index in order to pick up aarch64 wheels
 torch = [
-  { index = "pytorch-cu128" },
+  { index = "pytorch-cu129", marker = "sys_platform != 'darwin'" },
+  { index = "pypi", marker = "sys_platform == 'darwin'" },
 ]
 torchvision = [
-  { index = "pytorch-cu128" },
+  { index = "pytorch-cu129", marker = "sys_platform != 'darwin'" },
+  { index = "pypi", marker = "sys_platform == 'darwin'" },
 ]
 triton = [
-  { index = "pytorch-cu128" },
+  { index = "pytorch-cu129", marker = "sys_platform != 'darwin'" },
+  { index = "pypi", marker = "sys_platform == 'darwin'" },
 ]
+causal-conv1d = { git = "https://github.com/Dao-AILab/causal-conv1d", tag = "v1.5.0.post8" }
+mamba-ssm = { git = "https://github.com/state-spaces/mamba.git", rev = "2e16fc3062cdcd4ebef27a9aa4442676e1c7edf4" }
 
 [tool.uv.workspace]
 members = [
-    "3rdparty/Megatron-LM-workspace",
-    "3rdparty/NeMo-workspace",
+  "3rdparty/Megatron-LM-workspace",
+  "3rdparty/Automodel-workspace/Automodel",
+  "3rdparty/Megatron-Bridge-workspace",
+  "3rdparty/Penguin-workspace",
 ]
 
 [[tool.uv.index]]
-name = "pytorch-cu128"
-url = "https://download.pytorch.org/whl/cu128"
+name = "pypi"
+url = "https://pypi.org/simple"
+explicit = true
+
+[[tool.uv.index]]
+name = "pytorch-cu129"
+url = "https://download.pytorch.org/whl/cu129"
 explicit = true
 
 [tool.uv]
-no-build-isolation-package = ["transformer-engine-torch", "transformer-engine", "flash-attn"]
+preview = true # Enable preview features like extra-build-dependencies
+no-build-isolation-package = [
+  "transformer-engine-torch",
+  "transformer-engine",
+  "flash-attn",
+  "mamba-ssm",
+  "causal-conv1d",
+  "deep_gemm",
+  "deep_ep",
+]
 # Always apply the build group since dependencies like TE/mcore/nemo-run require build dependencies
 # and this lets us assume they are implicitly installed with a simply `uv sync`. Ideally, we'd
 # avoid including these in the default dependency set, but for now it's required.
@@ -151,12 +205,57 @@ default-groups = ["dev", "build"]
 #  --link-mode=copy (slower but more reliable; supresses warning)
 #  --link-mode=symlink (fastest option when uv cache and venv on different file-system; caveat: venv is brittle since it depends on the environment/container)
 link-mode = "copy"
+# The TE override is needed because automodel/mbridge we are on is still on 2.5.0
+# The opencv-python-headless override is needed because automodel pins it to 4.10.0.84, whereas vllm>=0.11.0 needs >= 4.11.0
+# TODO: REMOVE timm override before final merge - temporary hack to work around:
+#   nemo-automodel[all] requires timm==1.0.16, but megatron-bridge -> open-clip-torch requires timm>=1.0.17
+#   Upstream fix needed in both repos to align timm versions
+override-dependencies = [
+  "transformer-engine[pytorch]==2.8.0",
+  "opencv-python-headless>=4.11.0",
+  "timm==1.0.16",  # TEMP HACK: see comment above
+]
+
+# Augment build dependencies for packages that need torch at build time
+[tool.uv.extra-build-dependencies]
+flash-attn = [{ requirement = "torch", match-runtime = true }]
+# Git-sourced packages CAN use match-runtime = true if we provide dependency-metadata
+deep_ep = [{ requirement = "torch", match-runtime = true }]
+deep_gemm = [{ requirement = "torch", match-runtime = true }]
+transformer-engine = [{ requirement = "torch", match-runtime = true }]
+transformer-engine-torch = [{ requirement = "torch", match-runtime = true }]
+mamba-ssm = [{ requirement = "torch", match-runtime = true }]
+causal-conv1d = [{ requirement = "torch", match-runtime = true }]
 
 # Needed when building from source
 [[tool.uv.dependency-metadata]]
 name = "flash-attn"
 requires-dist = ["torch", "einops", "setuptools", "psutil", "ninja"]
 
+[[tool.uv.dependency-metadata]]
+name = "causal-conv1d"
+# This version has to match the version in the commit/rev/tag used
+version = "1.5.0.post8"
+requires-dist = ["torch", "packaging", "ninja"]
+
+[[tool.uv.dependency-metadata]]
+name = "mamba-ssm"
+# This version has to match the version in the commit/rev/tag used
+version = "2.2.4"
+requires-dist = ["torch", "packaging", "ninja", "causal-conv1d"]
+
+[[tool.uv.dependency-metadata]]
+name = "deep_ep"
+# This version has to match the version in the commit/rev/tag used
+version = "v1.1.0+e3908bf"
+requires-dist = ["torch", "packaging", "ninja"]
+
+[[tool.uv.dependency-metadata]]
+name = "deep_gemm"
+# This version has to match the version in the commit/rev/tag used
+version = "v2.0.0+7b6b556"
+requires-dist = ["torch", "packaging", "ninja"]
+
 [tool.black]
 line-length = 120
 include = '\.pyi?$'
@@ -173,8 +272,11 @@ addopts = "--durations=15 -s -rA -x"
 testpaths = ["tests"]
 python_files = "test_*.py"
 markers = [
-    "mcore: marks tests that require the mcore extra",
-    "hf_gated: marks tests that require HuggingFace token access for gated models",
+  "run_first: marks tests that should run before others",
+  "mcore: marks tests that require the mcore extra",
+  "hf_gated: marks tests that require HuggingFace token access for gated models",
+  "automodel: marks tests that require the automodel extra",
+  "vllm: marks tests that require the vllm extra",
 ]
 
 [tool.pyrefly]
@@ -203,11 +305,10 @@ ignore = ["D417", "D10", "F841"]
 convention = "google"
 
 # Section to exclude errors for different file types
-[tool.ruff.per-file-ignores]
+[tool.ruff.lint.per-file-ignores]
 # Ignore all directories named `tests`.
 "tests/**" = ["D"]
 # Ignore all files that end in `_test.py`.
 "*_test.py" = ["D"]
 # Ignore F401 (import but unused) in __init__.py
 "__init__.py" = ["F401"]
-
diff --git a/pyrefly.toml b/pyrefly.toml
index f3bc05a639..a1d64ad6fa 100644
--- a/pyrefly.toml
+++ b/pyrefly.toml
@@ -1,112 +1,122 @@
 python-version = "3.12.0"
 replace-imports-with-any = [
-    "pynvml.*",
-    "hydra._internal.*",
-    "hydra.core.override_parser.*",
-    "datasets.*",
-    "transformers.*",
-    "vllm.*",
-    "math_verify.*",
-    "sympy.*",
-    "torchdata.*",
-    "nemo.*",
-    "megatron.*",
-    "ray.*",
-    "numpy.*",
+  "nemo_automodel.*",
+  "pynvml.*",
+  "hydra._internal.*",
+  "hydra.core.override_parser.*",
+  "datasets.*",
+  "transformers.*",
+  "vllm.*",
+  "math_verify.*",
+  "sympy.*",
+  "torchdata.*",
+  "nemo.*",
+  "megatron.*",
+  "ray.*",
+  "numpy.*",
+  "sphinx.*",
+  "docutils.*",
 ]
 project-includes = [
-    # TODO: enable these once we have 100 correctness
-    #"nemo_rl/**/*.py",
-    #"examples/**/*.py",
-    #"docs/*.py",
-    #"tools/**/*.py",
-    
-    # Generate the list of errors per file
-    # uv run --group dev pyrefly check $(git ls-files 'nemo_rl/**/*.py' 'examples/**/*.py' 'docs/*.py' 'tools/**/*.py') --output-format json | jq -r --slurpfile all_files <(git ls-files 'nemo_rl/**/*.py' 'examples/**/*.py' 'docs/*.py' 'tools/**/*.py' | jq -R -s 'split("\n")[:-1]') --arg pwd "$(pwd)/" '(.errors | group_by(.path) | map({(.[0].path | sub($pwd; "")): length}) | add // {}) as $error_counts | $all_files[0][] | . as $file | "\($error_counts[$file] // 0)\t\($file)"' | sort -n
+  # TODO: enable these once we have 100 correctness
+  #"nemo_rl/**/*.py",
+  #"examples/**/*.py",
+  #"docs/*.py",
+  #"tools/**/*.py",
 
-    # Generate list of files with 0 errors
-    # uv run --group dev pyrefly check $(git ls-files 'nemo_rl/**/*.py' 'examples/**/*.py' 'docs/*.py' 'tools/**/*.py') --output-format json | jq -r --slurpfile all_files <(git ls-files 'nemo_rl/**/*.py' 'examples/**/*.py' 'docs/*.py' 'tools/**/*.py' | jq -R -s 'split("\n")[:-1]') --arg pwd "$(pwd)/" '(.errors | group_by(.path) | map({(.[0].path | sub($pwd; "")): length}) | add // {}) as $error_counts | $all_files[0][] | . as $file | if ($error_counts[$file] // 0) == 0 then $file else empty end'
-    # uv run --group dev pyrefly check $(git ls-files 'nemo_rl/**/*.py' 'examples/**/*.py' 'docs/*.py' 'tools/**/*.py') --output-format json | jq -r --slurpfile all_files <(git ls-files 'nemo_rl/**/*.py' 'examples/**/*.py' 'docs/*.py' 'tools/**/*.py' | jq -R -s 'split("\n")[:-1]') --arg pwd "$(pwd)/" '(.errors | group_by(.path) | map({(.[0].path | sub($pwd; "")): length}) | add // {}) as $error_counts | $all_files[0][] | . as $file | if ($error_counts[$file] // 0) == 0 then "    \"\($file)\"," else empty end'
-    "docs/conf.py",
-    "docs/helpers.py",
-    "examples/converters/convert_dcp_to_hf.py",
-    "examples/converters/convert_megatron_to_hf.py",
-    "nemo_rl/algorithms/__init__.py",
-    "nemo_rl/algorithms/interfaces.py",
-    "nemo_rl/algorithms/utils.py",
-    "nemo_rl/converters/__init__.py",
-    "nemo_rl/converters/huggingface/__init__.py",
-    "nemo_rl/converters/huggingface/vllm_export.py",
-    "nemo_rl/converters/megatron/__init__.py",
-    "nemo_rl/converters/megatron/vllm_export.py",
-    "nemo_rl/data/__init__.py",
-    "nemo_rl/data/datasets.py",
-    "nemo_rl/data/eval_datasets/__init__.py",
-    "nemo_rl/data/eval_datasets/aime2024.py",
-    "nemo_rl/data/eval_datasets/gpqa.py",
-    "nemo_rl/data/eval_datasets/local_math_dataset.py",
-    "nemo_rl/data/eval_datasets/math.py",
-    "nemo_rl/data/eval_datasets/mmlu.py",
-    "nemo_rl/data/eval_datasets/mmlu_pro.py",
-    "nemo_rl/data/hf_datasets/__init__.py",
-    "nemo_rl/data/hf_datasets/chat_templates.py",
-    "nemo_rl/data/hf_datasets/deepscaler.py",
-    "nemo_rl/data/hf_datasets/dpo.py",
-    "nemo_rl/data/hf_datasets/helpsteer3.py",
-    "nemo_rl/data/hf_datasets/oai_format_dataset.py",
-    "nemo_rl/data/hf_datasets/oasst.py",
-    "nemo_rl/data/hf_datasets/openmathinstruct2.py",
-    "nemo_rl/data/hf_datasets/prompt_response_dataset.py",
-    "nemo_rl/data/hf_datasets/squad.py",
-    "nemo_rl/data/interfaces.py",
-    "nemo_rl/data/packing/__init__.py",
-    "nemo_rl/data/processors.py",
-    "nemo_rl/distributed/__init__.py",
-    "nemo_rl/distributed/collectives.py",
-    "nemo_rl/distributed/named_sharding.py",
-    "nemo_rl/distributed/ray_actor_environment_registry.py",
-    "nemo_rl/distributed/virtual_cluster.py",
-    "nemo_rl/distributed/worker_group_utils.py",
-    "nemo_rl/environments/__init__.py",
-    "nemo_rl/environments/games/sliding_puzzle.py",
-    "nemo_rl/environments/interfaces.py",
-    "nemo_rl/environments/math_environment.py",
-    "nemo_rl/environments/metrics.py",
-    "nemo_rl/environments/utils.py",
-    "nemo_rl/evals/__init__.py",
-    "nemo_rl/evals/answer_parsing.py",
-    "nemo_rl/experience/__init__.py",
-    "nemo_rl/experience/rollouts.py",
-    "nemo_rl/metrics/__init__.py",
-    "nemo_rl/metrics/metrics_utils.py",
-    "nemo_rl/models/__init__.py",
-    "nemo_rl/models/dtensor/__init__.py",
-    "nemo_rl/models/dtensor/parallelize.py",
-    "nemo_rl/models/generation/__init__.py",
-    "nemo_rl/models/generation/interfaces.py",
-    "nemo_rl/models/huggingface/__init__.py",
-    "nemo_rl/models/megatron/__init__.py",
-    "nemo_rl/models/megatron/community_import.py",
-    "nemo_rl/models/megatron/converters/__init__.py",
-    "nemo_rl/models/megatron/converters/common.py",
-    "nemo_rl/models/megatron/converters/deepseek.py",
-    "nemo_rl/models/megatron/converters/llama.py",
-    "nemo_rl/models/megatron/converters/qwen2.py",
-    "nemo_rl/models/megatron/converters/qwen3.py",
-    "nemo_rl/models/policy/__init__.py",
-    "nemo_rl/models/policy/interfaces.py",
-    "nemo_rl/models/policy/utils.py",
-    "nemo_rl/utils/__init__.py",
-    "nemo_rl/utils/checkpoint.py",
-    "nemo_rl/utils/config.py",
-    "nemo_rl/utils/native_checkpoint.py",
-    "nemo_rl/utils/nsys.py",
-    "nemo_rl/utils/nvml.py",
-    "nemo_rl/utils/prefetch_venvs.py",
-    "nemo_rl/utils/timer.py",
-    "nemo_rl/utils/venvs.py",
-    "tools/model_diagnostics/1.max_model_len_respected.py",
-    "tools/model_diagnostics/2.long_generation_decode_vs_prefill.py",
+  # Generate the list of errors per file
+  # uv run --group dev pyrefly check $(git ls-files 'nemo_rl/**/*.py' 'examples/**/*.py' 'docs/*.py' 'tools/**/*.py') --output-format json | jq -r --slurpfile all_files <(git ls-files 'nemo_rl/**/*.py' 'examples/**/*.py' 'docs/*.py' 'tools/**/*.py' | jq -R -s 'split("\n")[:-1]') --arg pwd "$(pwd)/" '(.errors | group_by(.path) | map({(.[0].path | sub($pwd; "")): length}) | add // {}) as $error_counts | $all_files[0][] | . as $file | "\($error_counts[$file] // 0)\t\($file)"' | sort -n
+
+  # Generate list of files with 0 errors
+  # uv run --group dev pyrefly check $(git ls-files 'nemo_rl/**/*.py' 'examples/**/*.py' 'docs/*.py' 'tools/**/*.py') --output-format json | jq -r --slurpfile all_files <(git ls-files 'nemo_rl/**/*.py' 'examples/**/*.py' 'docs/*.py' 'tools/**/*.py' | jq -R -s 'split("\n")[:-1]') --arg pwd "$(pwd)/" '(.errors | group_by(.path) | map({(.[0].path | sub($pwd; "")): length}) | add // {}) as $error_counts | $all_files[0][] | . as $file | if ($error_counts[$file] // 0) == 0 then $file else empty end'
+  # uv run --group dev pyrefly check $(git ls-files 'nemo_rl/**/*.py' 'examples/**/*.py' 'docs/*.py' 'tools/**/*.py') --output-format json | jq -r --slurpfile all_files <(git ls-files 'nemo_rl/**/*.py' 'examples/**/*.py' 'docs/*.py' 'tools/**/*.py' | jq -R -s 'split("\n")[:-1]') --arg pwd "$(pwd)/" '(.errors | group_by(.path) | map({(.[0].path | sub($pwd; "")): length}) | add // {}) as $error_counts | $all_files[0][] | . as $file | if ($error_counts[$file] // 0) == 0 then "    \"\($file)\"," else empty end'
+  "docs/conf.py",
+  "docs/helpers.py",
+  "examples/converters/convert_dcp_to_hf.py",
+  "examples/converters/convert_megatron_to_hf.py",
+  "nemo_rl/algorithms/__init__.py",
+  "nemo_rl/algorithms/interfaces.py",
+  "nemo_rl/algorithms/utils.py",
+  "nemo_rl/algorithms/reward_functions.py",
+  "nemo_rl/data/__init__.py",
+  "nemo_rl/data/chat_templates.py",
+  "nemo_rl/data/collate_fn.py",
+  "nemo_rl/data/datasets/__init__.py",
+  "nemo_rl/data/datasets/eval_datasets/__init__.py",
+  "nemo_rl/data/datasets/eval_datasets/aime.py",
+  "nemo_rl/data/datasets/eval_datasets/gpqa.py",
+  "nemo_rl/data/datasets/eval_datasets/local_math_dataset.py",
+  "nemo_rl/data/datasets/eval_datasets/math.py",
+  "nemo_rl/data/datasets/eval_datasets/mmlu.py",
+  "nemo_rl/data/datasets/eval_datasets/mmlu_pro.py",
+  "nemo_rl/data/datasets/preference_datasets/__init__.py",
+  "nemo_rl/data/datasets/preference_datasets/binary_preference_dataset.py",
+  "nemo_rl/data/datasets/preference_datasets/helpsteer3.py",
+  "nemo_rl/data/datasets/preference_datasets/preference_dataset.py",
+  "nemo_rl/data/datasets/preference_datasets/tulu3.py",
+  "nemo_rl/data/datasets/processed_dataset.py",
+  "nemo_rl/data/datasets/response_datasets/__init__.py",
+  "nemo_rl/data/datasets/response_datasets/clevr.py",
+  "nemo_rl/data/datasets/response_datasets/deepscaler.py",
+  "nemo_rl/data/datasets/response_datasets/geometry3k.py",
+  "nemo_rl/data/datasets/response_datasets/oai_format_dataset.py",
+  "nemo_rl/data/datasets/response_datasets/oasst.py",
+  "nemo_rl/data/datasets/response_datasets/openmathinstruct2.py",
+  "nemo_rl/data/datasets/response_datasets/refcoco.py",
+  "nemo_rl/data/datasets/response_datasets/response_dataset.py",
+  "nemo_rl/data/datasets/response_datasets/squad.py",
+  "nemo_rl/data/datasets/utils.py",
+  "nemo_rl/data/interfaces.py",
+  "nemo_rl/data/multimodal_utils.py",
+  "nemo_rl/data/packing/__init__.py",
+  "nemo_rl/data/processors.py",
+  "nemo_rl/distributed/__init__.py",
+  "nemo_rl/distributed/collectives.py",
+  "nemo_rl/distributed/named_sharding.py",
+  "nemo_rl/distributed/ray_actor_environment_registry.py",
+  "nemo_rl/distributed/virtual_cluster.py",
+  "nemo_rl/distributed/worker_group_utils.py",
+  "nemo_rl/environments/__init__.py",
+  "nemo_rl/environments/games/sliding_puzzle.py",
+  "nemo_rl/environments/interfaces.py",
+  "nemo_rl/environments/math_environment.py",
+  "nemo_rl/environments/metrics.py",
+  "nemo_rl/environments/rewards.py",
+  "nemo_rl/environments/utils.py",
+  "nemo_rl/environments/vlm_environment.py",
+  "nemo_rl/evals/__init__.py",
+  "nemo_rl/evals/answer_parsing.py",
+  "nemo_rl/experience/__init__.py",
+  "nemo_rl/experience/rollouts.py",
+  "nemo_rl/models/__init__.py",
+  "nemo_rl/models/dtensor/__init__.py",
+  "nemo_rl/models/dtensor/parallelize.py",
+  "nemo_rl/models/generation/__init__.py",
+  "nemo_rl/models/generation/interfaces.py",
+  "nemo_rl/models/generation/vllm/__init__.py",
+  "nemo_rl/models/generation/vllm/config.py",
+  "nemo_rl/models/generation/vllm/utils.py",
+  "nemo_rl/models/generation/vllm/vllm_backend.py",
+  "nemo_rl/models/huggingface/__init__.py",
+  "nemo_rl/models/megatron/__init__.py",
+  "nemo_rl/models/megatron/community_import.py",
+  "nemo_rl/models/policy/__init__.py",
+  "nemo_rl/models/policy/interfaces.py",
+  "nemo_rl/models/policy/utils.py",
+  "nemo_rl/utils/__init__.py",
+  "nemo_rl/utils/checkpoint.py",
+  "nemo_rl/utils/config.py",
+  "nemo_rl/utils/native_checkpoint.py",
+  "nemo_rl/utils/automodel_checkpoint.py",
+  "nemo_rl/utils/nsys.py",
+  "nemo_rl/utils/nvml.py",
+  "nemo_rl/utils/packed_tensor.py",
+  "nemo_rl/utils/prefetch_venvs.py",
+  "nemo_rl/utils/timer.py",
+  "nemo_rl/utils/venvs.py",
+  "tools/model_diagnostics/1.max_model_len_respected.py",
+  "tools/model_diagnostics/2.long_generation_decode_vs_prefill.py",
+  "tools/model_diagnostics/4.vllm_precision_compilation_test.py",
 ]
 
 # Disable TypedDict mutation errors since TypedDict objects are regular dicts at runtime
diff --git a/ray.sub b/ray.sub
index 4635f66be6..9b4feb110c 100644
--- a/ray.sub
+++ b/ray.sub
@@ -31,8 +31,8 @@ maybe_gres_arg() {
   # Check if any nodes in the partition have GRES configured
   # Assumes a homogeneous allocation (not a heterogeneous job)
   if sinfo -p $SLURM_JOB_PARTITION -h -o "%G" | grep -q "gpu:"; then
-    # Do a quick assert here that gpus:8 == gpus:$GPUS_PER_NODE. It is probably a user error if someone isn't using GPUS_PER_NODE=8 on our clusters if it supports --gres=gpu:8.
-    if [[ $GPUS_PER_NODE -ne $(sinfo -p $SLURM_JOB_PARTITION -h -o "%G" | grep "gpu:" | cut -d: -f2) ]]; then
+    # Do a quick assert here that gpus:8 == gpus:$GPUS_PER_NODE. It is probably a user error if someone isn't using GPUS_PER_NODE=8 on our clusters if it supports --gres=gpu:8 or gpu:a100:8
+    if [[ $GPUS_PER_NODE -ne $(sinfo -p $SLURM_JOB_PARTITION -h -o "%G" | grep "gpu:" | awk -F: '{print $NF}') ]]; then
       echo "Error: GPUS_PER_NODE=$GPUS_PER_NODE but GRES detected is $(sinfo -p $SLURM_JOB_PARTITION -h -o "%G" | grep "gpu:") meaning GPUS_PER_NODE is not set to fully claim the GPUs on the nodes." >&2
       exit 1
     fi
@@ -59,18 +59,36 @@ DASHBOARD_AGENT_GRPC_PORT=${DASHBOARD_AGENT_GRPC_PORT:-53007}
 METRICS_EXPORT_PORT=${METRICS_EXPORT_PORT:-53009}
 
 # Ports for the head node
-PORT=${PORT:-54258}
+PORT=${PORT:-54514}
 RAY_CLIENT_SERVER_PORT=${RAY_CLIENT_SERVER_PORT:-10001}
 #REDIT_SHARD_PORTS=${REDIT_SHARD_PORTS:-"random"} ??
-DASHBOARD_GRPC_PORT=${DASHBOARD_GRPC_PORT:-52367}
 DASHBOARD_PORT=${DASHBOARD_PORT:-8265}  # Also used by debugger
 DASHBOARD_AGENT_LISTEN_PORT=${DASHBOARD_AGENT_LISTEN_PORT:-52365}
+RAY_DEBUGGER_ARGS=
+if [ "${RAY_DEBUG:-}" = "legacy" ]; then
+  RAY_DEBUGGER_ARGS="--ray-debugger-external"
+fi
+
+# After ray>=2.47, this feature is enabled by default which creates uv venvs for any py_executable starting with `uv run`.
+# There is severe contention and performance issues with this enabled considering our dependencies are so large and occasionally
+# need to be compiled, so NeMo RL has an implementation in nemo_rl/utils/venv.py that does it once per node as opposed to once per task.
+export RAY_ENABLE_UV_RUN_RUNTIME_ENV=0
+
+# Setting ulimit is recommended by ray best practices page
+# @ https://docs.ray.io/en/latest/cluster/vms/user-guides/large-cluster-best-practices.html
+# It's session based and won't affect the system outside the script
+# Ensure that the soft limit isn't above the hard limit
+if [[ $(ulimit -Hn) == "unlimited" ]] || [[ 65535 -lt $(ulimit -Hn) ]]; then
+  ulimit -Sn 65535
+elif [[ $(ulimit -Hn) != "unlimited" ]] && [[ $(ulimit -Hn) -lt 65535 ]]; then
+  echo "[WARNING]: Cannot increase ulimit on file descriptors to 65535 according ray recommendation: https://docs.ray.io/en/latest/cluster/vms/user-guides/large-cluster-best-practices.html. Speak to cluster admins to increase, otherwise ray may crash unexpectedly."
+fi
 
 # On our clusters, the largest port range on an idle worker appeared between 52369-64607
 # (not including the other ports set by this script). So this range is chosen to be
 # somewhere in the middle
 MIN_WORKER_PORT=${MIN_WORKER_PORT:-54001}
-MAX_WORKER_PORT=${MAX_WORKER_PORT:-54257}
+MAX_WORKER_PORT=${MAX_WORKER_PORT:-54513}
 ########################################################
 # Number seconds to sync logs from /tmp/ray/session_*/logs to $LOG_DIR/ray/
 RAY_LOG_SYNC_FREQUENCY=${RAY_LOG_SYNC_FREQUENCY:-}
@@ -118,13 +136,66 @@ CPUS_PER_WORKER=${CPUS_PER_WORKER:-$((GPUS_PER_NODE * 16))}
 
 num_retries=3
 
+# Track backgrounded srun client PIDs for head and workers
+declare -A SRUN_PIDS
+
+# Verify all backgrounded srun client processes are still alive; exit fast if any died
+check_srun_processes() {
+  for name in "${!SRUN_PIDS[@]}"; do
+    pid="${SRUN_PIDS[$name]}"
+    # Check if the process is still running
+    if ! kill -0 "$pid" 2>/dev/null; then
+      echo "[ERROR] Background srun '$name' died (pid=$pid). Could be a failure in startup or an issue with the node preventing the srun to start. Attempting to exit." >&2
+      # Signal sidecars inside containers to terminate ASAP
+      touch "$LOG_DIR/ENDED"
+      exit 1
+    fi
+  done
+}
+
 # Getting the node names and IP addresses in the SLURM allocation
 nodes=$(scontrol show hostnames "$SLURM_JOB_NODELIST")
 nodes_array=($nodes)
 ip_addresses_array=()
 
 for node in $nodes; do
-    ip_address=$(host $node | awk '/has address/ { print $4 }')
+    # Try multiple methods to get IP address - ENHANCED VERSION v2.0
+    echo "[DEBUG] Resolving hostname: $node using enhanced resolution methods"
+    ip_address=""
+    
+    # Method 1: Try host command
+    echo "[DEBUG] Method 1: host command"
+    ip_address=$(host $node 2>/dev/null | awk '/has address/ { print $4 }' | head -1 || true)
+    echo "[DEBUG] host result: '$ip_address'"
+    
+    # Method 2: If host fails, try getent
+    if [[ -z "$ip_address" ]]; then
+        echo "[DEBUG] Method 2: getent hosts"
+        ip_address=$(getent hosts $node 2>/dev/null | awk '{ print $1 }' | head -1 || true)
+        echo "[DEBUG] getent result: '$ip_address'"
+    fi
+    
+    # Method 3: If getent fails, try nslookup
+    if [[ -z "$ip_address" ]]; then
+        echo "[DEBUG] Method 3: nslookup"
+        ip_address=$(nslookup $node 2>/dev/null | awk '/^Address: / { print $2 }' | head -1 || true)
+        echo "[DEBUG] nslookup result: '$ip_address'"
+    fi
+    
+    # Method 4: If all DNS methods fail, try ping to extract IP
+    if [[ -z "$ip_address" ]]; then
+        echo "[DEBUG] Method 4: ping"
+        ip_address=$(ping -c 1 $node 2>/dev/null | grep "PING" | sed 's/.*(\([^)]*\)).*/\1/' || true)
+        echo "[DEBUG] ping result: '$ip_address'"
+    fi
+    
+    # If still no IP, use the hostname itself (might work if it's already an IP or resolvable)
+    if [[ -z "$ip_address" ]]; then
+        echo "[WARNING] Could not resolve IP for $node, using hostname as fallback"
+        ip_address=$node
+    fi
+    
+    echo "[INFO] Node: $node -> IP: $ip_address"
     # Add the IP address to the array
     ip_addresses_array+=("$ip_address")
 done
@@ -206,7 +277,6 @@ ray start --head \
     --node-ip-address="$head_node_ip" \
     --port=${PORT} \
     --ray-client-server-port=${RAY_CLIENT_SERVER_PORT} \
-    --dashboard-grpc-port=${DASHBOARD_GRPC_PORT} \
     --dashboard-port=${DASHBOARD_PORT} \
     \
     --node-manager-port=$((${NODE_MANAGER_PORT} + 1)) \
@@ -215,8 +285,9 @@ ray start --head \
     --dashboard-agent-grpc-port=$((${DASHBOARD_AGENT_GRPC_PORT} + 1)) \
     --dashboard-agent-listen-port=$((${DASHBOARD_AGENT_LISTEN_PORT} + 1)) \
     --metrics-export-port=$((${METRICS_EXPORT_PORT} + 1)) \
+    $RAY_DEBUGGER_ARGS \
     \
-    --block
+    --block 
 EOFINNER
 chmod +x /launch-head.sh
 
@@ -232,6 +303,7 @@ exit 1
 EOF
 )
 srun $COMMON_SRUN_ARGS --container-name=ray-head --nodes=1 --ntasks=1 --cpus-per-task=$CPUS_PER_WORKER -w "$head_node" -o $LOG_DIR/ray-head.log bash -x -c "$head_cmd" &
+SRUN_PIDS["ray-head"]=$!
 
 NUM_ACTORS=$((GPUS_PER_NODE * SLURM_JOB_NUM_NODES))
 
@@ -266,6 +338,37 @@ monitor-sidecar() {
 }
 monitor-sidecar &
 
+# Background process to sync ray logs every $RAY_LOG_SYNC_FREQUENCY seconds
+log-sync-sidecar() {
+  set +x
+  if [[ -z "$RAY_LOG_SYNC_FREQUENCY" ]]; then
+    echo "RAY_LOG_SYNC_FREQUENCY is not set, skipping log sync sidecar"
+    return
+  fi
+  mkdir -p $LOG_DIR/ray/$node_i
+  while true; do
+    sleep $RAY_LOG_SYNC_FREQUENCY
+    if ls /tmp/ray/session_[0-9]* > /dev/null 2>&1; then
+      for session_dir in /tmp/ray/session_[0-9]*/; do
+        if [[ -d "\$session_dir/logs" ]]; then
+          session_name=\$(basename "\$session_dir")
+          mkdir -p "$LOG_DIR/ray/$node_i/\$session_name"
+          if command -v rsync > /dev/null 2>&1; then
+            rsync -ahP "\$session_dir/logs/" $LOG_DIR/ray/$node_i/\$session_name/logs/ 2>/dev/null || true
+          else
+            cp -r "\$session_dir/logs" $LOG_DIR/ray/$node_i/\$session_name/
+          fi
+        fi
+      done
+    fi
+    if [[ -f "$LOG_DIR/ENDED" ]]; then
+      echo "Log sync sidecar terminating..."
+      break
+    fi
+  done
+}
+log-sync-sidecar &
+
 # Patch nsight.py before starting Ray worker
 sed -i 's/context\.py_executable = " "\.join(self\.nsight_cmd) + " python"/context.py_executable = " ".join(self.nsight_cmd) + f" {context.py_executable}"/g' /opt/nemo_rl_venv/lib64/python*/site-packages/ray/_private/runtime_env/nsight.py
 
@@ -282,8 +385,9 @@ ray start --address "$ip_head" \
           --dashboard-agent-grpc-port=${DASHBOARD_AGENT_GRPC_PORT} \
           --dashboard-agent-listen-port=${DASHBOARD_AGENT_LISTEN_PORT} \
           --metrics-export-port=${METRICS_EXPORT_PORT} \
+          $RAY_DEBUGGER_ARGS \
           \
-          --block
+          --block 
 EOFINNER
 
 count=0
@@ -298,11 +402,12 @@ exit 1
 EOF
 )
   srun $COMMON_SRUN_ARGS --container-name=ray-worker-$i --exact --nodes=1 --ntasks=1 --cpus-per-task=$CPUS_PER_WORKER -w "$node_i" -o $LOG_DIR/ray-worker-$i.log bash -x -c "$worker_cmd" &
+  SRUN_PIDS["ray-worker-$i"]=$!
   sleep 3
 done
 
 # Then we wait here for the file to be created by the head node container
-while ! srun --overlap --nodes=1 --ntasks=1 -w $head_node test -f $LOG_DIR/STARTED_RAY_HEAD; do 
+while check_srun_processes && ! srun --overlap --nodes=1 --ntasks=1 -w $head_node test -f $LOG_DIR/STARTED_RAY_HEAD; do
   echo "[INFO][$(date)] Waiting for head node container to start..."
   sleep 2
 done
@@ -327,9 +432,10 @@ extract_worker_units() {
 while true; do
   worker_units=$(extract_worker_units)
   echo "[INFO] Number of actors online: $worker_units/$NUM_ACTORS"
-  if [ "$worker_units" -eq "$NUM_ACTORS" ]; then
+  if [[ "$worker_units" -eq "$NUM_ACTORS" ]]; then
     break
   fi
+  check_srun_processes
   sleep 2
 done
 
@@ -338,7 +444,7 @@ echo "All workers connected!"
 # We can now launch a job on this cluster
 # We do so by launching a driver process on the physical node that the head node is on
 # This driver process is responsible for launching a job on the Ray cluster
-CONTAINER_CWD=$(scontrol show job $SLURM_JOB_ID --json | jq -r '.jobs[].current_working_directory')
+CONTAINER_CWD=$(scontrol show job $SLURM_JOB_ID | grep -oP 'WorkDir=\K[^ ]+' | head -1)
 if [[ -n "$COMMAND" ]]; then
   srun --no-container-mount-home --overlap --container-name=ray-head --container-workdir=$CONTAINER_CWD --nodes=1 --ntasks=1 -w "$head_node" -o $LOG_DIR/ray-driver.log bash -c "$COMMAND"
 else
@@ -346,10 +452,15 @@ else
   cat <<EOF >$SLURM_SUBMIT_DIR/${SLURM_JOB_ID}-attach.sh
 # No args launches on the head node (node 0)
 # Args 1-N launch on worker nodes (nodes 1 through N-1)
+# Optional: set COMMAND='...' to run non-interactively instead of opening an interactive shell
 WORKER_NUM=\${1:-}
 if [[ -z "\$WORKER_NUM" ]]; then
   # Empty means we are on the head node
-  srun --no-container-mount-home $GRES_ARG -A $SLURM_JOB_ACCOUNT -p $SLURM_JOB_PARTITION --overlap --container-name=ray-head --container-workdir=$CONTAINER_CWD --nodes=1 --ntasks=1 -w "$head_node" --jobid $SLURM_JOB_ID --pty bash
+  if [[ -n "\${COMMAND:-}" ]]; then
+    srun --no-container-mount-home $GRES_ARG -A $SLURM_JOB_ACCOUNT -p $SLURM_JOB_PARTITION --overlap --container-name=ray-head --container-workdir=$CONTAINER_CWD --nodes=1 --ntasks=1 -w "$head_node" --jobid $SLURM_JOB_ID bash -c "\$COMMAND"
+  else
+    srun --no-container-mount-home $GRES_ARG -A $SLURM_JOB_ACCOUNT -p $SLURM_JOB_PARTITION --overlap --container-name=ray-head --container-workdir=$CONTAINER_CWD --nodes=1 --ntasks=1 -w "$head_node" --jobid $SLURM_JOB_ID --pty bash
+  fi
 else
   # Worker numbers 1 through N-1 correspond to ray-worker-1 through ray-worker-(N-1)
   # and use nodes_array[1] through nodes_array[N-1]
@@ -358,10 +469,15 @@ else
     exit 1
   fi
   nodes_array=($nodes)
-  srun --no-container-mount-home $GRES_ARG -A $SLURM_JOB_ACCOUNT -p $SLURM_JOB_PARTITION --overlap --container-name=ray-worker-\$WORKER_NUM --container-workdir=$CONTAINER_CWD --nodes=1 --ntasks=1 -w "\${nodes_array[\$WORKER_NUM]}" --jobid $SLURM_JOB_ID --pty bash
+  if [[ -n "\${COMMAND:-}" ]]; then
+    srun --no-container-mount-home $GRES_ARG -A $SLURM_JOB_ACCOUNT -p $SLURM_JOB_PARTITION --overlap --container-name=ray-worker-\$WORKER_NUM --container-workdir=$CONTAINER_CWD --nodes=1 --ntasks=1 -w "\${nodes_array[\$WORKER_NUM]}" --jobid $SLURM_JOB_ID bash -c "\$COMMAND"
+  else
+    srun --no-container-mount-home $GRES_ARG -A $SLURM_JOB_ACCOUNT -p $SLURM_JOB_PARTITION --overlap --container-name=ray-worker-\$WORKER_NUM --container-workdir=$CONTAINER_CWD --nodes=1 --ntasks=1 -w "\${nodes_array[\$WORKER_NUM]}" --jobid $SLURM_JOB_ID --pty bash
+  fi
 fi
 EOF
   chmod +x $SLURM_SUBMIT_DIR/${SLURM_JOB_ID}-attach.sh
+  echo "     COMMAND='echo hello' bash $SLURM_SUBMIT_DIR/${SLURM_JOB_ID}-attach.sh    # run a non-interactive command on head node"
   echo "     bash $SLURM_SUBMIT_DIR/${SLURM_JOB_ID}-attach.sh    # to attach to head node (i.e., 'worker 0')"
   echo "     bash $SLURM_SUBMIT_DIR/${SLURM_JOB_ID}-attach.sh 1  # to attach to worker 1"
   echo "     bash $SLURM_SUBMIT_DIR/${SLURM_JOB_ID}-attach.sh 2  # to attach to worker 2, etc."
diff --git a/tests/check_metrics.py b/tests/check_metrics.py
index a48c2f4875..f0b3a9025b 100644
--- a/tests/check_metrics.py
+++ b/tests/check_metrics.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import argparse
+import builtins
 import json
 import statistics
 import sys
@@ -23,17 +24,77 @@
 # Custom functions for working with dictionary values
 def min(value):
     """Return the minimum value in a dictionary."""
-    return __builtins__.min(float(v) for v in value.values())
+    return builtins.min(float(v) for v in value.values())
 
 
 def max(value):
     """Return the maximum value in a dictionary."""
-    return __builtins__.max(float(v) for v in value.values())
+    return builtins.max(float(v) for v in value.values())
 
 
-def mean(value):
-    """Return the mean of values in a dictionary."""
-    return statistics.mean(float(v) for v in value.values())
+def ratio_above(value, threshold):
+    """Return the ratio of values that are >= threshold.
+
+    Args:
+        value: Dictionary of step -> value
+        threshold: Threshold value to compare against
+
+    Returns:
+        Float between 0.0 and 1.0 representing the proportion of values >= threshold
+    """
+    vals = [float(v) for v in value.values()]
+    if len(vals) == 0:
+        return 0.0
+    count_above = sum(1 for v in vals if v >= threshold)
+    return count_above / len(vals)
+
+
+def mean(value, range_start=1, range_end=0, ignore_top_p=0.0):
+    """Return the mean of values (or a range of values) in a dictionary.
+
+    Note:
+        step, and ranges, are 1 indexed. Range_end is exclusive.
+        range_end=0 means to include until the last step in the run
+
+    Args:
+        value: Dictionary of step -> value
+        range_start: Starting step (1-indexed, default=1)
+        range_end: Ending step (1-indexed, exclusive, 0 means last step)
+        ignore_top_p: Proportion of top outliers to ignore (0.0-1.0, default=0.0)
+                     E.g., 0.05 ignores the top 5% of values
+    """
+
+    ## find potential offset that might arise from resuming from a checkpoint
+    max_step_reached = builtins.max([int(s) for s in value.keys()])
+    ## this is the number of steps that occurred prior to resuming
+    offset = max_step_reached - len(value)
+
+    num_elem = len(value)
+    if range_start < 0:
+        range_start += num_elem + 1 + offset
+    if range_end <= 0:
+        range_end += num_elem + 1 + offset
+
+    vals = []
+    for step, v in value.items():
+        if range_start <= int(step) and int(step) < range_end:
+            vals.append(float(v))
+
+    # Validate ignore_top_p parameter
+    if not 0.0 <= ignore_top_p <= 1.0:
+        raise ValueError(
+            f"ignore_top_p must be between 0.0 and 1.0, got {ignore_top_p}"
+        )
+
+    # Filter out top outliers if requested
+    if ignore_top_p > 0.0 and len(vals) > 0:
+        # Sort values and determine cutoff index
+        sorted_vals = sorted(vals)
+        cutoff_idx = int(len(sorted_vals) * (1.0 - ignore_top_p))
+        # Take only values up to the cutoff (excluding top p%)
+        vals = sorted_vals[:cutoff_idx] if cutoff_idx > 0 else sorted_vals[:1]
+
+    return statistics.mean(vals)
 
 
 def evaluate_check(data: dict, check: str) -> tuple[bool, str, object]:
@@ -43,17 +104,23 @@ def evaluate_check(data: dict, check: str) -> tuple[bool, str, object]:
         Tuple of (passed, message, value)
     """
     # Create a local context with our custom functions and the data
-    local_context = {"data": data, "min": min, "max": max, "mean": mean}
+    local_context = {
+        "data": data,
+        "min": min,
+        "max": max,
+        "mean": mean,
+        "ratio_above": ratio_above,
+    }
 
     # Extract the value expression from the check
     value_expr = check.split(">")[0].split("<")[0].split("==")[0].strip()
 
     try:
         # Try to get the value first
-        value = eval(value_expr, {"__builtins__": __builtins__}, local_context)
+        value = eval(value_expr, {"__builtins__": builtins}, local_context)
 
         # Then evaluate the check
-        result = eval(check, {"__builtins__": __builtins__}, local_context)
+        result = eval(check, {"__builtins__": builtins}, local_context)
         if result:
             return True, f"PASS: {check}", value
         else:
@@ -85,6 +152,8 @@ def main():
       # Use helper functions
       python check_metrics.py results.json "min(data['class_f1']) > 0.6"
       python check_metrics.py results.json "mean(data['accuracies']) > 0.85"
+      python check_metrics.py results.json "mean(data['loss'], ignore_top_p=0.05) < 1.5"
+      python check_metrics.py results.json "ratio_above(data['error'], 1.05) < 0.02"
     """
     parser.formatter_class = argparse.RawDescriptionHelpFormatter
     args = parser.parse_args()
diff --git a/tests/docs/Docs_Tests.sh b/tests/docs/Docs_Tests.sh
new file mode 100644
index 0000000000..8aced480b3
--- /dev/null
+++ b/tests/docs/Docs_Tests.sh
@@ -0,0 +1,23 @@
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/bin/bash
+set -xeuo pipefail # Exit immediately if a command exits with a non-zero status
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+PROJECT_ROOT=$(realpath ${SCRIPT_DIR}/../..)
+
+cd ${PROJECT_ROOT}/docs
+uv run --no-sync coverage run -a --data-file=${PROJECT_ROOT}/tests/.coverage --source=${PROJECT_ROOT}/nemo_rl -m sphinx.cmd.build -b doctest . _build/doctest
+ls ${PROJECT_ROOT}/tests
diff --git a/tests/functional/L1_Functional_Tests_GPU.sh b/tests/functional/L1_Functional_Tests_GPU.sh
new file mode 100644
index 0000000000..9de07d28bd
--- /dev/null
+++ b/tests/functional/L1_Functional_Tests_GPU.sh
@@ -0,0 +1,39 @@
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/bin/bash
+set -xeuo pipefail # Exit immediately if a command exits with a non-zero status
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+PROJECT_ROOT=$(realpath ${SCRIPT_DIR}/../..)
+
+cd ${PROJECT_ROOT}
+time uv run --no-sync bash ./tests/functional/sft.sh
+time uv run --no-sync bash ./tests/functional/grpo.sh
+time uv run --no-sync bash ./tests/functional/grpo_async.sh
+time uv run --no-sync bash ./tests/functional/grpo_megatron.sh
+time uv run --no-sync bash ./tests/functional/grpo_multiturn.sh
+time uv run --no-sync bash ./tests/functional/grpo_non_colocated.sh
+time uv run --no-sync bash ./tests/functional/dpo.sh
+time uv run --no-sync bash ./tests/functional/rm.sh
+time uv run --no-sync bash ./tests/functional/eval.sh
+time uv run --no-sync bash ./tests/functional/eval_async.sh
+time uv run --no-sync bash ./tests/functional/test_mcore_extra_installed_correctly.sh
+time uv run --no-sync bash ./tests/functional/test_automodel_extra_installed_correctly.sh
+time uv run --no-sync bash ./tests/functional/vlm_grpo.sh
+time uv run --no-sync bash ./tests/functional/distillation.sh
+time uv run --no-sync bash ./tests/functional/distillation_megatron.sh
+
+cd ${PROJECT_ROOT}/tests
+coverage combine .coverage*
diff --git a/tests/functional/distillation.sh b/tests/functional/distillation.sh
new file mode 100644
index 0000000000..19cb71252c
--- /dev/null
+++ b/tests/functional/distillation.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+# clean up checkpoint directory on exit
+trap "rm -rf /tmp/distillation_checkpoints" EXIT
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+PROJECT_ROOT=$(realpath $SCRIPT_DIR/../..)
+# Mark the current repo as safe, since wandb fetches metadata about the repo
+git config --global --add safe.directory $PROJECT_ROOT
+
+set -euo pipefail
+
+EXP_NAME=$(basename $0 .sh)
+EXP_DIR=$SCRIPT_DIR/$EXP_NAME
+LOG_DIR=$EXP_DIR/logs
+JSON_METRICS=$EXP_DIR/metrics.json
+RUN_LOG=$EXP_DIR/run.log
+export PYTHONPATH=${PROJECT_ROOT}:${PYTHONPATH:-}
+
+rm -rf $EXP_DIR $LOG_DIR
+mkdir -p $EXP_DIR $LOG_DIR
+
+cd $PROJECT_ROOT
+uv run coverage run -a --data-file=$PROJECT_ROOT/tests/.coverage --source=$PROJECT_ROOT/nemo_rl \
+    $PROJECT_ROOT/examples/run_distillation_math.py \
+    policy.model_name=Qwen/Qwen3-0.6B \
+    teacher.model_name=Qwen/Qwen3-0.6B \
+    cluster.gpus_per_node=2 \
+    policy.train_global_batch_size=16 \
+    policy.dtensor_cfg.tensor_parallel_size=1 \
+    policy.dtensor_cfg.context_parallel_size=2 \
+    policy.max_total_sequence_length=2048 \
+    teacher.dtensor_cfg.tensor_parallel_size=2 \
+    teacher.dtensor_cfg.context_parallel_size=1 \
+    distillation.max_num_steps=3 \
+    distillation.num_prompts_per_step=16 \
+    distillation.max_val_samples=16 \
+    distillation.val_batch_size=8 \
+    distillation.val_period=3 \
+    data.dataset_name=OpenMathInstruct-2 \
+    loss_fn.zero_outside_topk=true \
+    logger.tensorboard_enabled=true \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=false \
+    logger.monitor_gpus=true \
+    checkpointing.enabled=true \
+    checkpointing.save_period=3 \
+    checkpointing.checkpoint_dir=/tmp/distillation_checkpoints \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+uv run tests/check_metrics.py $JSON_METRICS \
+  'data["train/loss"]["3"] < 1.0'
diff --git a/tests/functional/distillation_megatron.sh b/tests/functional/distillation_megatron.sh
new file mode 100644
index 0000000000..b56ea672fb
--- /dev/null
+++ b/tests/functional/distillation_megatron.sh
@@ -0,0 +1,58 @@
+#!/bin/bash
+
+# clean up checkpoint directory on exit
+trap "rm -rf /tmp/distillation_checkpoints" EXIT
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+PROJECT_ROOT=$(realpath $SCRIPT_DIR/../..)
+# Mark the current repo as safe, since wandb fetches metadata about the repo
+git config --global --add safe.directory $PROJECT_ROOT
+
+set -euo pipefail
+
+EXP_NAME=$(basename $0 .sh)
+EXP_DIR=$SCRIPT_DIR/$EXP_NAME
+LOG_DIR=$EXP_DIR/logs
+JSON_METRICS=$EXP_DIR/metrics.json
+RUN_LOG=$EXP_DIR/run.log
+export PYTHONPATH=${PROJECT_ROOT}:${PYTHONPATH:-}
+
+rm -rf $EXP_DIR $LOG_DIR
+mkdir -p $EXP_DIR $LOG_DIR
+
+cd $PROJECT_ROOT
+uv run coverage run -a --data-file=$PROJECT_ROOT/tests/.coverage --source=$PROJECT_ROOT/nemo_rl \
+    $PROJECT_ROOT/examples/run_distillation_math.py \
+    --config $PROJECT_ROOT/examples/configs/distillation_math_megatron.yaml \
+    policy.model_name=Qwen/Qwen3-0.6B-Base \
+    teacher.model_name=Qwen/Qwen3-0.6B \
+    cluster.gpus_per_node=2 \
+    policy.train_global_batch_size=16 \
+    policy.megatron_cfg.tensor_model_parallel_size=1 \
+    policy.megatron_cfg.pipeline_model_parallel_size=1 \
+    policy.megatron_cfg.context_parallel_size=2 \
+    policy.max_total_sequence_length=2048 \
+    teacher.megatron_cfg.tensor_model_parallel_size=2 \
+    teacher.megatron_cfg.pipeline_model_parallel_size=1 \
+    teacher.megatron_cfg.context_parallel_size=1 \
+    distillation.max_num_steps=3 \
+    distillation.num_prompts_per_step=16 \
+    distillation.max_val_samples=16 \
+    distillation.val_batch_size=8 \
+    distillation.val_period=3 \
+    data.dataset_name=OpenMathInstruct-2 \
+    loss_fn.zero_outside_topk=false \
+    logger.tensorboard_enabled=true \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=false \
+    logger.monitor_gpus=true \
+    checkpointing.enabled=true \
+    checkpointing.save_period=3 \
+    checkpointing.checkpoint_dir=/tmp/distillation_checkpoints \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+uv run tests/check_metrics.py $JSON_METRICS \
+  'data["train/loss"]["3"] < 1.0'
diff --git a/tests/functional/dpo.sh b/tests/functional/dpo.sh
index b03b611b25..5de36dfbd4 100755
--- a/tests/functional/dpo.sh
+++ b/tests/functional/dpo.sh
@@ -18,7 +18,8 @@ rm -rf $EXP_DIR $LOG_DIR
 mkdir -p $EXP_DIR $LOG_DIR
 
 cd $PROJECT_ROOT
-uv run $PROJECT_ROOT/examples/run_dpo.py \
+uv run coverage run -a --data-file=$PROJECT_ROOT/tests/.coverage --source=$PROJECT_ROOT/nemo_rl \
+    $PROJECT_ROOT/examples/run_dpo.py \
     policy.model_name=Qwen/Qwen3-0.6B \
     cluster.gpus_per_node=2 \
     dpo.max_num_steps=3 \
diff --git a/tests/functional/dpo_megatron.sh b/tests/functional/dpo_megatron.sh
new file mode 100755
index 0000000000..8c1524c2c5
--- /dev/null
+++ b/tests/functional/dpo_megatron.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+# clean up checkpoint directory on exit
+trap "rm -rf /tmp/sft_checkpoints" EXIT
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+PROJECT_ROOT=$(realpath $SCRIPT_DIR/../..)
+# Mark the current repo as safe, since wandb fetches metadata about the repo
+git config --global --add safe.directory $PROJECT_ROOT
+
+set -eou pipefail
+
+EXP_NAME=$(basename $0 .sh)
+EXP_DIR=$SCRIPT_DIR/$EXP_NAME
+LOG_DIR=$EXP_DIR/logs
+JSON_METRICS=$EXP_DIR/metrics.json
+RUN_LOG=$EXP_DIR/run.log
+export PYTHONPATH=${PROJECT_ROOT}:${PYTHONPATH:-}
+
+rm -rf $EXP_DIR $LOG_DIR
+mkdir -p $EXP_DIR $LOG_DIR
+
+cd $PROJECT_ROOT
+uv run $PROJECT_ROOT/examples/run_dpo.py \
+    --config $PROJECT_ROOT/examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-megatron.yaml \
+    policy.model_name=Qwen/Qwen3-0.6B \
+    cluster.gpus_per_node=2 \
+    dpo.max_num_steps=3 \
+    dpo.val_batches=1 \
+    dpo.val_period=3 \
+    logger.tensorboard_enabled=true \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=false \
+    logger.monitor_gpus=true \
+    checkpointing.enabled=false \
+    policy.megatron_cfg.tensor_model_parallel_size=1 \
+    policy.train_global_batch_size=8 \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+uv run tests/check_metrics.py $JSON_METRICS \
+  'data["train/loss"]["3"] < 5' \
+
diff --git a/tests/functional/eval.sh b/tests/functional/eval.sh
index 68fbd96d54..2a153ef153 100644
--- a/tests/functional/eval.sh
+++ b/tests/functional/eval.sh
@@ -18,7 +18,8 @@ rm -rf $EXP_DIR $LOG_DIR
 mkdir -p $EXP_DIR $LOG_DIR
 
 cd $PROJECT_ROOT
-uv run $PROJECT_ROOT/examples/run_eval.py \
+uv run coverage run -a --data-file=$PROJECT_ROOT/tests/.coverage --source=$PROJECT_ROOT/nemo_rl \
+    $PROJECT_ROOT/examples/run_eval.py \
     cluster.gpus_per_node=2 \
     $@ \
     2>&1 | tee $RUN_LOG
@@ -26,4 +27,4 @@ uv run $PROJECT_ROOT/examples/run_eval.py \
 cat $RUN_LOG | grep "score=" | sed 's/.*score=\([^ ]*\).*/{"score": \1}/' > $JSON_METRICS
 
 uv run tests/check_metrics.py $JSON_METRICS \
-  'data["score"] == 0.1' \
+  'data["score"] == 0.1'
diff --git a/tests/functional/eval_async.sh b/tests/functional/eval_async.sh
index 55a89ef012..c8c2a40433 100644
--- a/tests/functional/eval_async.sh
+++ b/tests/functional/eval_async.sh
@@ -18,7 +18,8 @@ rm -rf $EXP_DIR $LOG_DIR
 mkdir -p $EXP_DIR $LOG_DIR
 
 cd $PROJECT_ROOT
-uv run $PROJECT_ROOT/examples/run_eval.py \
+uv run coverage run -a --data-file=$PROJECT_ROOT/tests/.coverage --source=$PROJECT_ROOT/nemo_rl \
+    $PROJECT_ROOT/examples/run_eval.py \
     cluster.gpus_per_node=2 \
     generation.vllm_cfg.async_engine=True \
     generation.vllm_cfg.pipeline_parallel_size=2 \
@@ -28,4 +29,4 @@ uv run $PROJECT_ROOT/examples/run_eval.py \
 cat $RUN_LOG | grep "score=" | sed 's/.*score=\([^ ]*\).*/{"score": \1}/' > $JSON_METRICS
 
 uv run tests/check_metrics.py $JSON_METRICS \
-  'data["score"] == 0.1' \
+  'data["score"] == 0.1'
diff --git a/tests/functional/grpo.sh b/tests/functional/grpo.sh
index 7563f8707d..d581bb9dac 100755
--- a/tests/functional/grpo.sh
+++ b/tests/functional/grpo.sh
@@ -18,7 +18,8 @@ rm -rf $EXP_DIR $LOG_DIR
 mkdir -p $EXP_DIR $LOG_DIR
 
 cd $PROJECT_ROOT
-uv run $PROJECT_ROOT/examples/run_grpo_math.py \
+uv run coverage run -a --data-file=$PROJECT_ROOT/tests/.coverage --source=$PROJECT_ROOT/nemo_rl \
+    $PROJECT_ROOT/examples/run_grpo_math.py \
     policy.model_name=Qwen/Qwen3-0.6B \
     grpo.num_prompts_per_step=2 \
     grpo.num_generations_per_prompt=4 \
@@ -37,5 +38,5 @@ uv run $PROJECT_ROOT/examples/run_grpo_math.py \
 uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
 
 uv run tests/check_metrics.py $JSON_METRICS \
-    'max(data["train/token_mult_prob_error"]) < 1.05' \
+    'max(data["train/token_mult_prob_error"]) < 1.05'
 
diff --git a/tests/functional/grpo_async.sh b/tests/functional/grpo_async.sh
new file mode 100644
index 0000000000..1e14266e97
--- /dev/null
+++ b/tests/functional/grpo_async.sh
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+PROJECT_ROOT=$(realpath $SCRIPT_DIR/../..)
+# Mark the current repo as safe, since wandb fetches metadata about the repo
+git config --global --add safe.directory $PROJECT_ROOT
+
+set -eou pipefail
+
+EXP_NAME=$(basename $0 .sh)
+EXP_DIR=$SCRIPT_DIR/$EXP_NAME
+LOG_DIR=$EXP_DIR/logs
+JSON_METRICS=$EXP_DIR/metrics.json
+RUN_LOG=$EXP_DIR/run.log
+export PYTHONPATH=${PROJECT_ROOT}:${PYTHONPATH:-}
+
+rm -rf $EXP_DIR $LOG_DIR
+mkdir -p $EXP_DIR $LOG_DIR
+
+cd $PROJECT_ROOT
+uv run coverage run -a --data-file=$PROJECT_ROOT/tests/.coverage --source=$PROJECT_ROOT/nemo_rl \
+    $PROJECT_ROOT/examples/run_grpo_math.py \
+    policy.model_name=Qwen/Qwen3-0.6B \
+    grpo.num_prompts_per_step=2 \
+    grpo.num_generations_per_prompt=4 \
+    policy.train_global_batch_size=4 \
+    policy.train_micro_batch_size=1 \
+    cluster.gpus_per_node=2 \
+    grpo.max_num_steps=20 \
+    grpo.async_grpo.enabled=true \
+    grpo.async_grpo.max_trajectory_age_steps=1 \
+    policy.generation.vllm_cfg.async_engine=true \
+    loss_fn.use_importance_sampling_correction=true \
+    policy.generation.colocated.enabled=false \
+    policy.generation.colocated.resources.num_nodes=1 \
+    policy.generation.colocated.resources.gpus_per_node=1 \
+    logger.tensorboard_enabled=true \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=false \
+    logger.monitor_gpus=true \
+    checkpointing.enabled=false \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+uv run tests/check_metrics.py $JSON_METRICS \
+    'max(data["train/token_mult_prob_error"]) < 1.05'
+
diff --git a/tests/functional/grpo_megatron.sh b/tests/functional/grpo_megatron.sh
index 15fb2efec3..f4b4ede8c4 100755
--- a/tests/functional/grpo_megatron.sh
+++ b/tests/functional/grpo_megatron.sh
@@ -19,7 +19,8 @@ mkdir -p $EXP_DIR $LOG_DIR
 
 # Using Qwen2.5-0.5B instead of Qwen3-0.6B because the latter is not supported by Megatron yet
 cd $PROJECT_ROOT
-uv run $PROJECT_ROOT/examples/run_grpo_math.py \
+uv run coverage run -a --data-file=$PROJECT_ROOT/tests/.coverage --source=$PROJECT_ROOT/nemo_rl \
+    $PROJECT_ROOT/examples/run_grpo_math.py \
     --config $PROJECT_ROOT/examples/configs/grpo_math_1B_megatron.yaml \
     policy.model_name=Qwen/Qwen2.5-0.5B \
     grpo.num_prompts_per_step=2 \
@@ -40,5 +41,5 @@ uv run $PROJECT_ROOT/examples/run_grpo_math.py \
 uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
 
 uv run tests/check_metrics.py $JSON_METRICS \
-    'max(data["train/token_mult_prob_error"]) < 1.05' \
+    'max(data["train/token_mult_prob_error"]) < 1.05'
 
diff --git a/tests/functional/grpo_multiturn.sh b/tests/functional/grpo_multiturn.sh
index 20e1472051..9af8a8dc97 100755
--- a/tests/functional/grpo_multiturn.sh
+++ b/tests/functional/grpo_multiturn.sh
@@ -18,7 +18,8 @@ rm -rf $EXP_DIR $LOG_DIR
 mkdir -p $EXP_DIR $LOG_DIR
 
 cd $PROJECT_ROOT
-uv run $PROJECT_ROOT/examples/run_grpo_sliding_puzzle.py \
+uv run coverage run -a --data-file=$PROJECT_ROOT/tests/.coverage --source=$PROJECT_ROOT/nemo_rl \
+    $PROJECT_ROOT/examples/run_grpo_sliding_puzzle.py \
     policy.model_name=Qwen/Qwen3-0.6B \
     cluster.gpus_per_node=2 \
     grpo.max_rollout_turns=5 \
@@ -40,5 +41,5 @@ uv run $PROJECT_ROOT/examples/run_grpo_sliding_puzzle.py \
 uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
 
 uv run tests/check_metrics.py $JSON_METRICS \
-    'max(data["train/token_mult_prob_error"]) < 1.1' \
+    'max(data["train/token_mult_prob_error"]) < 1.1'
 
diff --git a/tests/functional/grpo_non_colocated.sh b/tests/functional/grpo_non_colocated.sh
index 2067779fd4..5f63fb5e9b 100755
--- a/tests/functional/grpo_non_colocated.sh
+++ b/tests/functional/grpo_non_colocated.sh
@@ -18,7 +18,8 @@ rm -rf $EXP_DIR $LOG_DIR
 mkdir -p $EXP_DIR $LOG_DIR
 
 cd $PROJECT_ROOT
-uv run $PROJECT_ROOT/examples/run_grpo_math.py \
+uv run coverage run -a --data-file=$PROJECT_ROOT/tests/.coverage --source=$PROJECT_ROOT/nemo_rl \
+    $PROJECT_ROOT/examples/run_grpo_math.py \
     policy.model_name=Qwen/Qwen3-0.6B \
     grpo.num_prompts_per_step=2 \
     grpo.num_generations_per_prompt=4 \
@@ -38,5 +39,5 @@ uv run $PROJECT_ROOT/examples/run_grpo_math.py \
 uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
 
 uv run tests/check_metrics.py $JSON_METRICS \
-    'max(data["train/token_mult_prob_error"]) < 1.05' \
+    'max(data["train/token_mult_prob_error"]) < 1.05'
 
diff --git a/tests/functional/grpo_rm_env.sh b/tests/functional/grpo_rm_env.sh
new file mode 100644
index 0000000000..6e58a3168a
--- /dev/null
+++ b/tests/functional/grpo_rm_env.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+PROJECT_ROOT=$(realpath $SCRIPT_DIR/../..)
+# Mark the current repo as safe, since wandb fetches metadata about the repo
+git config --global --add safe.directory $PROJECT_ROOT
+
+set -eou pipefail
+
+EXP_NAME=$(basename $0 .sh)
+EXP_DIR=$SCRIPT_DIR/$EXP_NAME
+LOG_DIR=$EXP_DIR/logs
+JSON_METRICS=$EXP_DIR/metrics.json
+RUN_LOG=$EXP_DIR/run.log
+export PYTHONPATH=${PROJECT_ROOT}:${PYTHONPATH:-}
+
+rm -rf $EXP_DIR $LOG_DIR
+mkdir -p $EXP_DIR $LOG_DIR
+
+cd $PROJECT_ROOT
+uv run coverage run -a --data-file=$PROJECT_ROOT/tests/.coverage --source=$PROJECT_ROOT/nemo_rl \
+    $PROJECT_ROOT/examples/run_grpo_rm.py \
+    --config examples/configs/grpo_rm_1B.yaml \
+    policy.model_name=Qwen/Qwen2.5-1.5B \
+    grpo.num_prompts_per_step=2 \
+    grpo.num_generations_per_prompt=4 \
+    grpo.max_num_steps=2 \
+    policy.train_global_batch_size=4 \
+    policy.train_micro_batch_size=1 \
+    cluster.gpus_per_node=2 \
+    cluster.num_nodes=1 \
+    env.reward_model.model_name=Skywork/Skywork-Reward-V2-Qwen3-0.6B \
+    env.reward_model.resources.gpus_per_node=1 \
+    env.reward_model.resources.num_nodes=1 \
+    logger.tensorboard_enabled=true \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=false \
+    logger.monitor_gpus=true \
+    checkpointing.enabled=false \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+uv run tests/check_metrics.py $JSON_METRICS \
+    'max(data["train/token_mult_prob_error"]) < 1.05'
+
diff --git a/tests/functional/rm.sh b/tests/functional/rm.sh
new file mode 100644
index 0000000000..21274e7a96
--- /dev/null
+++ b/tests/functional/rm.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+# clean up checkpoint directory on exit
+trap "rm -rf /tmp/rm_checkpoints" EXIT
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+PROJECT_ROOT=$(realpath $SCRIPT_DIR/../..)
+# Mark the current repo as safe, since wandb fetches metadata about the repo
+git config --global --add safe.directory $PROJECT_ROOT
+
+set -eou pipefail
+
+EXP_NAME=$(basename $0 .sh)
+EXP_DIR=$SCRIPT_DIR/$EXP_NAME
+LOG_DIR=$EXP_DIR/logs
+JSON_METRICS=$EXP_DIR/metrics.json
+RUN_LOG=$EXP_DIR/run.log
+export PYTHONPATH=${PROJECT_ROOT}:${PYTHONPATH:-}
+
+rm -rf $EXP_DIR $LOG_DIR
+mkdir -p $EXP_DIR $LOG_DIR
+
+cd $PROJECT_ROOT
+uv run coverage run -a --data-file=$PROJECT_ROOT/tests/.coverage --source=$PROJECT_ROOT/nemo_rl \
+    $PROJECT_ROOT/examples/run_rm.py \
+    --config examples/configs/rm.yaml \
+    cluster.gpus_per_node=2 \
+    rm.max_num_steps=3 \
+    rm.val_batches=1 \
+    rm.val_period=3 \
+    logger.tensorboard_enabled=true \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=false \
+    logger.monitor_gpus=true \
+    checkpointing.enabled=true \
+    checkpointing.save_period=3 \
+    checkpointing.checkpoint_dir=/tmp/rm_checkpoints \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+uv run tests/check_metrics.py $JSON_METRICS \
+  'data["train/accuracy"]["3"] > 0.1'
diff --git a/tests/functional/sft.sh b/tests/functional/sft.sh
index d23151d773..20c5e29479 100755
--- a/tests/functional/sft.sh
+++ b/tests/functional/sft.sh
@@ -21,7 +21,8 @@ rm -rf $EXP_DIR $LOG_DIR
 mkdir -p $EXP_DIR $LOG_DIR
 
 cd $PROJECT_ROOT
-uv run $PROJECT_ROOT/examples/run_sft.py \
+uv run coverage run -a --data-file=$PROJECT_ROOT/tests/.coverage --source=$PROJECT_ROOT/nemo_rl \
+    $PROJECT_ROOT/examples/run_sft.py \
     policy.model_name=Qwen/Qwen3-0.6B \
     cluster.gpus_per_node=2 \
     sft.max_num_steps=3 \
@@ -40,5 +41,5 @@ uv run $PROJECT_ROOT/examples/run_sft.py \
 uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
 
 uv run tests/check_metrics.py $JSON_METRICS \
-  'data["train/loss"]["3"] < 5.9' \
+  'data["train/loss"]["3"] < 5.9'
 
diff --git a/tests/functional/sft_megatron.sh b/tests/functional/sft_megatron.sh
new file mode 100755
index 0000000000..dfb7fcfdba
--- /dev/null
+++ b/tests/functional/sft_megatron.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+# clean up checkpoint directory on exit
+trap "rm -rf /tmp/sft_checkpoints" EXIT
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+PROJECT_ROOT=$(realpath $SCRIPT_DIR/../..)
+# Mark the current repo as safe, since wandb fetches metadata about the repo
+git config --global --add safe.directory $PROJECT_ROOT
+
+set -eou pipefail
+
+EXP_NAME=$(basename $0 .sh)
+EXP_DIR=$SCRIPT_DIR/$EXP_NAME
+LOG_DIR=$EXP_DIR/logs
+JSON_METRICS=$EXP_DIR/metrics.json
+RUN_LOG=$EXP_DIR/run.log
+export PYTHONPATH=${PROJECT_ROOT}:${PYTHONPATH:-}
+
+rm -rf $EXP_DIR $LOG_DIR
+mkdir -p $EXP_DIR $LOG_DIR
+
+cd $PROJECT_ROOT
+uv run $PROJECT_ROOT/examples/run_sft.py \
+    --config $PROJECT_ROOT/examples/configs/recipes/llm/sft-llama3.1-8b-1n8g-megatron.yaml \
+    policy.model_name=Qwen/Qwen3-0.6B \
+    policy.tokenizer.name=Qwen/Qwen3-0.6B \
+    cluster.gpus_per_node=2 \
+    sft.max_num_steps=3 \
+    sft.val_batches=1 \
+    sft.val_period=3 \
+    logger.tensorboard_enabled=true \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=false \
+    logger.monitor_gpus=true \
+    checkpointing.enabled=false \
+    policy.megatron_cfg.pipeline_model_parallel_size=1 \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+uv run tests/check_metrics.py $JSON_METRICS \
+  'data["train/loss"]["3"] < 0.8' \
+
diff --git a/tests/functional/test_automodel_extra_installed_correctly.sh b/tests/functional/test_automodel_extra_installed_correctly.sh
new file mode 100755
index 0000000000..81b1ff124a
--- /dev/null
+++ b/tests/functional/test_automodel_extra_installed_correctly.sh
@@ -0,0 +1,72 @@
+#!/bin/bash
+set -eoux pipefail
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+cd $SCRIPT_DIR
+
+uv sync
+# Just the first call with --extra automodel is invoked with --reinstall in case submodules were recently updated/downloaded
+uv run --reinstall --extra automodel --no-build-isolation python <<"EOF"
+import torch
+import transformers
+from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
+
+# Test basic transformers functionality that automodel extends
+config = AutoConfig.from_pretrained("microsoft/DialoGPT-small")
+print(f"Loaded config: {config.model_type}")
+
+# Test nemo_automodel import
+try:
+    import nemo_automodel
+    from nemo_automodel.components._transformers.auto_model import NeMoAutoModelForCausalLM
+    print("[NeMo Automodel import successful]")
+except ImportError as e:
+    print(f"[WARNING] NeMo Automodel import failed: {e}")
+    print("[This may be expected if nemo_automodel is not fully built]")
+
+# Test flash-attn import (part of automodel extra)
+try:
+    import flash_attn
+    print(f"[Flash Attention available: {flash_attn.__version__}]")
+except ImportError:
+    print("[WARNING] Flash Attention not available")
+
+# Test vllm import (part of automodel extra) 
+try:
+    import vllm
+    print(f"[vLLM available: {vllm.__version__}]")
+except ImportError:
+    print("[WARNING] vLLM not available")
+
+print("[Automodel extra dependencies test successful]")
+EOF
+
+# Test that automodel components can be accessed
+uv run --extra automodel --no-build-isolation python <<"EOF"
+# This must be the first import to get all of the automodel packages added to the path
+import nemo_rl
+
+# Test automodel utilities
+try:
+    from nemo_rl.utils.automodel_checkpoint import detect_checkpoint_format, load_checkpoint, save_checkpoint
+    print("[Automodel checkpoint utilities import successful]")
+except ImportError as e:
+    print(f"[Automodel checkpoint utilities import failed: {e}]")
+
+# Test automodel factory
+try:
+    from nemo_rl.models.policy.utils import AUTOMODEL_FACTORY, NEMO_AUTOMODEL_AVAILABLE
+    print(f"[Automodel factory available: {NEMO_AUTOMODEL_AVAILABLE}]")
+except ImportError as e:
+    print(f"[Automodel factory import failed: {e}]")
+
+print("[Automodel integration test successful]")
+EOF
+
+# Sync just to return the environment to the original base state
+uv sync --link-mode symlink --locked --no-install-project
+uv sync --link-mode symlink --locked --extra vllm --no-install-project
+uv sync --link-mode symlink --locked --extra mcore --no-install-project
+uv sync --link-mode symlink --locked --extra automodel --no-install-project
+uv sync --link-mode symlink --locked --all-groups --no-install-project
+echo Success
diff --git a/tests/functional/test_mcore_extra_installed_correctly.sh b/tests/functional/test_mcore_extra_installed_correctly.sh
index dfab0828d2..535765c2fe 100755
--- a/tests/functional/test_mcore_extra_installed_correctly.sh
+++ b/tests/functional/test_mcore_extra_installed_correctly.sh
@@ -37,44 +37,21 @@ EOF
 
 uv run --extra mcore --no-build-isolation python <<"EOF"
 import is_megatron_installed
-import is_nemo_installed
+import is_megatron_bridge_installed
 assert is_megatron_installed.INSTALLED, "Megatron is not installed. Please check if the submodule has been initialized. May need to run `git submodule update --init --recursive`"
-assert is_nemo_installed.INSTALLED, "NeMo is not installed. Please check if the submodule has been initialized. May need to run `git submodule update --init --recursive`"
+assert is_megatron_bridge_installed.INSTALLED, "Megatron Bridge is not installed. Please check if the submodule has been initialized. May need to run `git submodule update --init --recursive`"
 
 # This must be the first import to get all of the megatron non-core packages added to the path
 import nemo_rl
 import megatron.core
 from megatron.training.utils import get_ltor_masks_and_position_ids
-from nemo.tron.init import initialize_megatron
-from nemo.tron.config import (
-    ConfigContainer,
-    TrainingConfig,
-    LoggerConfig,
-    OptimizerConfig,
-    SchedulerConfig,
-    CheckpointConfig,
-    DistributedDataParallelConfig,
-)
-from nemo.tron.utils.common_utils import get_rank_safe
-from nemo.tron.config import TokenizerConfig
-from nemo.tron.model import get_model_from_config
-from nemo.tron.checkpointing import checkpoint_exists, load_checkpoint
-from nemo.tron.init import initialize_megatron, set_jit_fusion_options
-from nemo.tron.setup import _init_checkpointing_context, _update_model_config_funcs
-from nemo.tron.state import GlobalState
-from nemo.tron.optim import setup_optimizer
-from nemo.tron import fault_tolerance
-from nemo.tron.tokenizers.tokenizer import build_tokenizer
-from nemo.tron.utils.train_utils import (
-    calc_params_l2_norm,
-    logical_and_across_model_parallel_group,
-    reduce_max_stat_across_model_parallel_group,
-)
-from nemo.tron.train import train_step
-from nemo.tron.setup import HAVE_FSDP2
-print("[Nemo/Mcore imports successful]")
+from megatron.bridge import AutoBridge
+print("[Megatron-Core/Megatron-Bridge imports successful]")
 EOF
 
 # Sync just to return the environment to the original base state
-uv sync
+uv sync --link-mode symlink --locked --no-install-project
+uv sync --link-mode symlink --locked --extra vllm --no-install-project
+uv sync --link-mode symlink --locked --extra mcore --no-install-project
+uv sync --link-mode symlink --locked --all-groups --no-install-project
 echo Success
diff --git a/tests/functional/vlm_grpo.sh b/tests/functional/vlm_grpo.sh
new file mode 100755
index 0000000000..79ad6831b5
--- /dev/null
+++ b/tests/functional/vlm_grpo.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+PROJECT_ROOT=$(realpath $SCRIPT_DIR/../..)
+# Mark the current repo as safe, since wandb fetches metadata about the repo
+git config --global --add safe.directory $PROJECT_ROOT
+
+set -eou pipefail
+
+EXP_NAME=$(basename $0 .sh)
+EXP_DIR=$SCRIPT_DIR/$EXP_NAME
+LOG_DIR=$EXP_DIR/logs
+JSON_METRICS=$EXP_DIR/metrics.json
+RUN_LOG=$EXP_DIR/run.log
+export PYTHONPATH=${PROJECT_ROOT}:${PYTHONPATH:-}
+
+rm -rf $EXP_DIR $LOG_DIR
+mkdir -p $EXP_DIR $LOG_DIR
+
+cd $PROJECT_ROOT
+uv run $PROJECT_ROOT/examples/run_vlm_grpo.py \
+    policy.model_name=Qwen/Qwen2.5-VL-3B-Instruct \
+    grpo.num_prompts_per_step=2 \
+    grpo.num_generations_per_prompt=4 \
+    policy.train_global_batch_size=4 \
+    policy.train_micro_batch_size=1 \
+    cluster.gpus_per_node=2 \
+    grpo.max_num_steps=5 \
+    logger.tensorboard_enabled=true \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=false \
+    logger.monitor_gpus=true \
+    checkpointing.enabled=false \
+    cluster.gpus_per_node=2 \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+uv run tests/check_metrics.py $JSON_METRICS \
+    'max(data["train/token_mult_prob_error"]) < 1.05' \
+    'mean(data["train/token_mult_prob_error"]) < 1.05'
diff --git a/tests/json_dump_tb_logs.py b/tests/json_dump_tb_logs.py
index 7d2e5607fa..973e37659b 100644
--- a/tests/json_dump_tb_logs.py
+++ b/tests/json_dump_tb_logs.py
@@ -38,16 +38,16 @@
 error_console = Console(stderr=True)
 
 
-def merge_tb_logs_to_json(log_dir, output_path, allow_conflicts=False):
+def merge_tb_logs_to_json(log_dir, output_path, error_on_conflicts=False):
     """Merge multiple TensorBoard event files into a single JSON file.
 
     Arguments:
         log_dir: Path to directory containing TensorBoard event files (searched recursively)
         output_path: Path to save the output JSON file
-        allow_conflicts: If True, allow multiple values for the same step (last one wins)
+        error_on_conflicts: If True, raise an error if conflicting values are found for the same step
 
     Raises:
-        ValueError: If conflicting values are found for the same step and allow_conflicts is False
+        ValueError: If conflicting values are found for the same step and error_on_conflicts is True
     """
     # Find all event files recursively
     files = glob.glob(f"{log_dir}/**/events*tfevents*", recursive=True)
@@ -89,19 +89,19 @@ def merge_tb_logs_to_json(log_dir, output_path, allow_conflicts=False):
             for scalar in ea.Scalars(metric_name):
                 step, value = scalar.step, scalar.value
 
-                # Check for conflicts - immediately raise error if not allowing conflicts
+                # Check for conflicts - raise error only if error_on_conflicts is True
                 if step in merged_data[metric_name]:
                     existing_value, existing_file = merged_data[metric_name][step]
 
                     # Only consider it a conflict if the values are different
                     if existing_value != value:
-                        if not allow_conflicts:
-                            # Immediate error if not allowing conflicts
+                        if error_on_conflicts:
+                            # Immediate error if we choose to error on conflicts
                             raise ValueError(
                                 f"Conflict detected for metric '{metric_name}' at step {step}:\n"
                                 f"  File #{file_index_map[existing_file]}: {existing_file} has value {existing_value}\n"
                                 f"  File #{file_index_map[event_file]}: {event_file} has value {value}\n"
-                                f"Use --allow-conflicts to force merging with latest value."
+                                f"Re-run without --error-on-conflicts to merge with the latest value."
                             )
 
                 # Add or override the value
@@ -218,15 +218,15 @@ def merge_tb_logs_to_json(log_dir, output_path, allow_conflicts=False):
         help="Path to save the output JSON file",
     )
     parser.add_argument(
-        "--allow-conflicts",
+        "--error-on-conflicts",
         action="store_true",
-        help="Allow conflicting values for the same step (last one wins)",
+        help="Error out when conflicting values are found for the same step",
     )
 
     args = parser.parse_args()
 
     try:
-        merge_tb_logs_to_json(args.log_dir, args.output_path, args.allow_conflicts)
+        merge_tb_logs_to_json(args.log_dir, args.output_path, args.error_on_conflicts)
     except Exception as e:
         error_console.print(f"[bold red]Error: {e}[/bold red]")
         sys.exit(1)
diff --git a/tests/run_unit.sh b/tests/run_unit.sh
index 5367749199..0366d6864b 100755
--- a/tests/run_unit.sh
+++ b/tests/run_unit.sh
@@ -32,7 +32,15 @@ export PYTHONPATH=$(realpath ${SCRIPT_DIR}/..):${PYTHONPATH:-}
 
 # Run unit tests
 echo "Running unit tests..."
-if ! pytest unit/ "$@"; then
+if [[ "$#" -eq 0 ]]; then
+    pytest_args="unit/"
+elif [[ "$1" != unit/* ]]; then
+    pytest_args="unit/ $@"
+else
+    pytest_args="$@"
+fi
+
+if ! pytest $pytest_args; then
     echo "[ERROR]: Unit tests failed."
     exit 1
 fi
diff --git a/tests/test_suites/README.md b/tests/test_suites/README.md
index 0759f06f25..e13b330c05 100644
--- a/tests/test_suites/README.md
+++ b/tests/test_suites/README.md
@@ -2,20 +2,43 @@
 
 ## Naming
 
-Each test is named:
+Base pattern (LLM):
+
 ```
-<algo>-<model>-#n#g-<parallelism>-<opt:long><opt:v$N>.sh
+<algo>-<model>-<nodes>n<gpus>g-<strategy-and-params>[-modifiers][-long][.vN].sh
 ```
 
+VLM pattern:
+
+```
+vlm_<algo>-<model>-<nodes>n<gpus>g-<strategy>[-modifiers][.vN].sh
+```
+
+- **algo**: task or algorithm, e.g., `sft`, `dpo`, `grpo`.
+- **model**: model identifier, e.g., `llama3.1-8b-instruct`, `qwen2.5-7b-instruct`.
+- **nodes/gpus**: cluster allocation, e.g., `1n8g`, `4n8g`, `8n8g`.
+- **strategy-and-params**: parallelism or framework detail, e.g., `fsdp2tp1`, `tp4pp2`, `megatron`, `dtensor2tp1`.
+- **modifiers** (optional): short flags like `sp` (sequence packing), `actckpt` (activation checkpointing), `fp8`, `noncolocated`, `quick`.
+- **-long** (optional): indicates long-running recipe.
+- **.vN** (optional): version suffix (e.g., `.v2`, `.v3`) reserved for convergence-impacting changes. Use when the recipe's convergence behavior changes (dataset, loss, convergence bug fix). Pure performance changes do not require a version bump.
+
 Examples:
-* sft-llama3.2-1b-1n8g-fsdp2tp1.sh
-* grpo-qwen2-1.5B-instruct-4n8g-fsdp2tp2.sh
-* grpo-qwen2-1.5B-instruct-4n8g-fsdp2tp2-long.sh
-* grpo-qwen2-1.5B-instruct-4n8g-fsdp2tp2-long.v2.sh
-    * The final verison suffix (starts with `.v2`, `.v3`, ...), is reserved for cases contributors believe the recipe's 
-      convergence has changed due to their commit. Versioning signals that this recipe should not be compared to its
-      predecessor due to a change in convergence behavior. Examples of this change include: changing dataset, changing loss,
-      convergence bug fix. Changes affecting performance do not need a version change. 
+
+```
+sft-llama3.1-8b-1n8g-fsdp2tp1-long.sh
+dpo-llama3.1-8b-instruct-4n8g-fsdp2tp4.sh
+grpo-llama3.1-8b-instruct-1n8g-megatron-fp8.sh
+grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.v3.sh
+```
+
+Known exceptions currently present:
+- Deepscaler recipes encode context length in place of the cluster tuple, e.g., `grpo-deepscaler-1.5b-8K.sh`. These are allowed but should document the intended hardware in the script body.
+- Some recipes include additional short flags in the strategy token (e.g., `fsdp2tp8sp`). Treat these as modifiers appended to the strategy.
+
+Directory placement and naming parity:
+- Place driver scripts under `tests/test_suites/llm/` or `tests/test_suites/vlm/`.
+- The script filename should mirror the YAML recipe filename under `examples/configs/recipes/**` but with a `.sh` suffix.
+- Add the relative script path to `tests/test_suites/nightly.txt` for nightly execution.
 
 ## Running manually
 
@@ -50,6 +73,10 @@ DRYRUN=1 CONTAINER=... ACCOUNT=... PARTITION=... ../tools/launch ./llm/sft-llama
 
 # Prints Estimated GPUhrs, creates code snapshot, then exits
 DRYRUN=2 CONTAINER=... ACCOUNT=... PARTITION=... ../tools/launch ./llm/sft-llama3.2-1b-1n8g-fsdp2tp1.sh
+
+# Launch but set extra env vars
+EXTRA_ENV="NRL_FORCE_REBUILD_VENVS=true NRL_DEEPSCALER_8K_CKPT=/8k-ckpt NRL_DEEPSCALER_16K_CKPT=/16k-ckpt" \
+CONTAINER=... ACCOUNT=... PARTITION=... ../tools/launch ./llm/sft-llama3.2-1b-1n8g-fsdp2tp1.sh
 ```
 
 After this completes, you can find the result under
diff --git a/tests/test_suites/llm/common.env b/tests/test_suites/llm/common.env
index c2008292b9..5bdb8c6b28 100644
--- a/tests/test_suites/llm/common.env
+++ b/tests/test_suites/llm/common.env
@@ -7,8 +7,7 @@ set -eou pipefail
 SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
 # Mark all repos as safe in the test context, since wandb fetchs metadata about the repo and it's a
 # catch-22 to get the project root and mark it safe if you don't know the project root
-git config --global --add safe.directory "*"
-PROJECT_ROOT=$(git rev-parse --show-toplevel)
+PROJECT_ROOT=$(realpath $SCRIPT_DIR/../../..)
 
 exit_if_max_steps_reached() {
   # Early stopping to save compute if max step has been reached
diff --git a/tests/test_suites/llm/dapo-qwen2.5-7b.sh b/tests/test_suites/llm/dapo-qwen2.5-7b.sh
new file mode 100755
index 0000000000..c68b52d4b9
--- /dev/null
+++ b/tests/test_suites/llm/dapo-qwen2.5-7b.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=16
+STEPS_PER_RUN=20
+MAX_STEPS=20
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=240
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'mean(data["train/token_mult_prob_error"]) < 1.1' \
+        'data["train/token_mult_prob_error"]["20"] < 1.05' \
+        'data["train/reward"]["20"] > -0.45' \
+        'data["train/filtered_reward"]["20"] > -0.2'
+fi
diff --git a/tests/test_suites/llm/distillation-qwen3-32b-to-1.7b-base-1n8g-fsdp2tp1.v1.sh b/tests/test_suites/llm/distillation-qwen3-32b-to-1.7b-base-1n8g-fsdp2tp1.v1.sh
new file mode 100755
index 0000000000..3ef39d91a3
--- /dev/null
+++ b/tests/test_suites/llm/distillation-qwen3-32b-to-1.7b-base-1n8g-fsdp2tp1.v1.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=1
+STEPS_PER_RUN=10
+MAX_STEPS=10
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=60
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_distillation_math.py \
+    --config $CONFIG_PATH \
+    distillation.max_num_steps=$MAX_STEPS \
+    distillation.val_period=20 \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl-distillation \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'data["train/loss"]["1"] < 1.5' \
+        'data["train/loss"]["10"] < 0.5' \
+        'max(data["ray/node.0.gpu.0.mem_gb"]) < 70' \
+        'mean(data["timing/train/total_step_time"], -6, -1) < 500'
+fi
diff --git a/tests/test_suites/llm/distillation-qwen3-32b-to-1.7b-base-1n8g-megatron-tp2pp2cp2-pack.sh b/tests/test_suites/llm/distillation-qwen3-32b-to-1.7b-base-1n8g-megatron-tp2pp2cp2-pack.sh
new file mode 100755
index 0000000000..6710ac87ce
--- /dev/null
+++ b/tests/test_suites/llm/distillation-qwen3-32b-to-1.7b-base-1n8g-megatron-tp2pp2cp2-pack.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=1
+STEPS_PER_RUN=10
+MAX_STEPS=10
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=60
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_distillation_math.py \
+    --config $CONFIG_PATH \
+    distillation.max_num_steps=$MAX_STEPS \
+    distillation.val_period=20 \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl-distillation \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'data["train/loss"]["1"] < 1.5' \
+        'data["train/loss"]["10"] < 0.5' \
+        'max(data["ray/node.0.gpu.0.mem_gb"]) < 75' \
+        'mean(data["timing/train/total_step_time"], -6, -1) < 500'
+fi
diff --git a/tests/test_suites/llm/distillation-qwen3-32b-to-4b-base-1n8g-fsdp2tp2-dynamicbatch.v1.sh b/tests/test_suites/llm/distillation-qwen3-32b-to-4b-base-1n8g-fsdp2tp2-dynamicbatch.v1.sh
new file mode 100755
index 0000000000..52f17c2c28
--- /dev/null
+++ b/tests/test_suites/llm/distillation-qwen3-32b-to-4b-base-1n8g-fsdp2tp2-dynamicbatch.v1.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=1
+STEPS_PER_RUN=20
+MAX_STEPS=20
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=120
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_distillation_math.py \
+    --config $CONFIG_PATH \
+    distillation.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl-distillation \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'data["train/loss"]["1"] < 1.5' \
+        'data["train/loss"]["20"] < 0.3' \
+        'data["validation/accuracy"]["20"] > 0.1' \
+        'mean(data["timing/train/total_step_time"], -6, -1) < 1000'
+fi
diff --git a/tests/test_suites/llm/distillation-qwen3-32b-to-4b-base-2n8g-fsdp2tp2-long.v1.sh b/tests/test_suites/llm/distillation-qwen3-32b-to-4b-base-2n8g-fsdp2tp2-long.v1.sh
new file mode 100755
index 0000000000..cd4b635e72
--- /dev/null
+++ b/tests/test_suites/llm/distillation-qwen3-32b-to-4b-base-2n8g-fsdp2tp2-long.v1.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=2
+STEPS_PER_RUN=50
+MAX_STEPS=100
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=240
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_distillation_math.py \
+    --config $CONFIG_PATH \
+    distillation.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl-distillation \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'data["train/loss"]["1"] < 1.5' \
+        'data["train/loss"]["100"] < 0.25' \
+        'data["validation/accuracy"]["100"] > 0.2' \
+        'mean(data["timing/train/total_step_time"], -6, -1) < 1600'
+fi
diff --git a/tests/test_suites/llm/distillation-qwen3-32b-to-4b-base-2n8g-fsdp2tp2-seqpack.v1.sh b/tests/test_suites/llm/distillation-qwen3-32b-to-4b-base-2n8g-fsdp2tp2-seqpack.v1.sh
new file mode 100755
index 0000000000..df8d6daed7
--- /dev/null
+++ b/tests/test_suites/llm/distillation-qwen3-32b-to-4b-base-2n8g-fsdp2tp2-seqpack.v1.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=2
+STEPS_PER_RUN=20
+MAX_STEPS=20
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=120
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_distillation_math.py \
+    --config $CONFIG_PATH \
+    distillation.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl-distillation \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'data["train/loss"]["1"] < 1.5' \
+        'data["train/loss"]["20"] < 0.3' \
+        'data["validation/accuracy"]["20"] > 0.1' \
+        'mean(data["timing/train/total_step_time"], -6, -1) < 1000'
+fi
diff --git a/tests/test_suites/llm/distillation-qwen3-32b-to-4b-base-2n8g-fsdp2tp8-noncolocated.v1.sh b/tests/test_suites/llm/distillation-qwen3-32b-to-4b-base-2n8g-fsdp2tp8-noncolocated.v1.sh
new file mode 100755
index 0000000000..df8d6daed7
--- /dev/null
+++ b/tests/test_suites/llm/distillation-qwen3-32b-to-4b-base-2n8g-fsdp2tp8-noncolocated.v1.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=2
+STEPS_PER_RUN=20
+MAX_STEPS=20
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=120
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_distillation_math.py \
+    --config $CONFIG_PATH \
+    distillation.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl-distillation \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'data["train/loss"]["1"] < 1.5' \
+        'data["train/loss"]["20"] < 0.3' \
+        'data["validation/accuracy"]["20"] > 0.1' \
+        'mean(data["timing/train/total_step_time"], -6, -1) < 1000'
+fi
diff --git a/tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.sh b/tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.sh
index f5b29b7db7..a8d2d04adc 100755
--- a/tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.sh
+++ b/tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.sh
@@ -38,5 +38,6 @@ if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | ma
         'data["train/loss"]["20"] < 3.4' \
         'data["train/preference_loss"]["1"] > 0.69314' \
         'data["train/preference_loss"]["1"] < 0.69316' \
-        'data["train/preference_loss"]["20"] < 0.6'
-fi 
+        'data["train/preference_loss"]["20"] < 0.6' \
+        'mean(data["timing/train/total_step_time"], -10, -1) < 7.8'
+fi
diff --git a/tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n8g-megatron.sh b/tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp4.sh
similarity index 91%
rename from tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n8g-megatron.sh
rename to tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp4.sh
index e9ccb1e147..fbda6865f5 100755
--- a/tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n8g-megatron.sh
+++ b/tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp4.sh
@@ -38,5 +38,6 @@ if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | ma
         'data["train/loss"]["150"] < 3.0' \
         'data["train/preference_loss"]["1"] > 0.69314' \
         'data["train/preference_loss"]["1"] < 0.69316' \
-        'data["train/preference_loss"]["150"] < 0.4'
-fi 
+        'data["train/preference_loss"]["150"] < 0.4' \
+        'mean(data["timing/train/total_step_time"], -11, -1) < 24'
+fi
diff --git a/tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp1.v2.sh b/tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n8g-megatron.v2.sh
similarity index 91%
rename from tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp1.v2.sh
rename to tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n8g-megatron.v2.sh
index e9ccb1e147..7cc74e26df 100755
--- a/tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp1.v2.sh
+++ b/tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n8g-megatron.v2.sh
@@ -38,5 +38,6 @@ if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | ma
         'data["train/loss"]["150"] < 3.0' \
         'data["train/preference_loss"]["1"] > 0.69314' \
         'data["train/preference_loss"]["1"] < 0.69316' \
-        'data["train/preference_loss"]["150"] < 0.4'
-fi 
+        'data["train/preference_loss"]["150"] < 0.4' \
+        'mean(data["timing/train/total_step_time"], -11, -1) < 11.5'
+fi
diff --git a/tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n8g-megatrontp2pp2-quick.sh b/tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n8g-megatrontp2pp2-quick.sh
index f5b29b7db7..497e0b8f68 100755
--- a/tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n8g-megatrontp2pp2-quick.sh
+++ b/tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n8g-megatrontp2pp2-quick.sh
@@ -38,5 +38,6 @@ if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | ma
         'data["train/loss"]["20"] < 3.4' \
         'data["train/preference_loss"]["1"] > 0.69314' \
         'data["train/preference_loss"]["1"] < 0.69316' \
-        'data["train/preference_loss"]["20"] < 0.6'
-fi 
+        'data["train/preference_loss"]["20"] < 0.6' \
+        'mean(data["timing/train/total_step_time"], -10) < 6.7'
+fi
diff --git a/tests/test_suites/llm/dpo-llama3.1-8b-tulu3-1n8g-fsdp2tp1.sh b/tests/test_suites/llm/dpo-llama3.1-8b-tulu3-1n8g-fsdp2tp1.sh
new file mode 100755
index 0000000000..a6beabb886
--- /dev/null
+++ b/tests/test_suites/llm/dpo-llama3.1-8b-tulu3-1n8g-fsdp2tp1.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=1
+STEPS_PER_RUN=150
+MAX_STEPS=150
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=45
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_dpo.py \
+    --config $CONFIG_PATH \
+    dpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'data["train/sft_loss"]["1"] < 0.00001' \
+        'data["train/sft_loss"]["150"] < 0.00001' \
+        'data["train/preference_loss"]["1"] > 0.6930' \
+        'data["train/preference_loss"]["1"] < 0.6932' \
+        'data["train/preference_loss"]["150"] < 0.68'
+fi
diff --git a/tests/test_suites/llm/dpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v2.sh b/tests/test_suites/llm/dpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v2.sh
index 6606099df7..0b0c67b312 100755
--- a/tests/test_suites/llm/dpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v2.sh
+++ b/tests/test_suites/llm/dpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v2.sh
@@ -36,5 +36,6 @@ if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | ma
     uv run tests/check_metrics.py $JSON_METRICS \
         'data["train/loss"]["1"] > 0.69314' \
         'data["train/loss"]["1"] < 0.69316' \
-        'data["train/loss"]["150"] < 0.55'
-fi 
+        'data["train/loss"]["150"] < 0.55' \
+        'mean(data["timing/train/total_step_time"], -11, -1) < 1.3'
+fi
diff --git a/tests/test_suites/llm/dpo-mistral-nemo-instruct-2407-1n8g-fsdp2tp8-actckpt-long.sh.disabled b/tests/test_suites/llm/dpo-mistral-nemo-instruct-2407-1n8g-fsdp2tp8-actckpt-long.sh.disabled
new file mode 100755
index 0000000000..3466de2fce
--- /dev/null
+++ b/tests/test_suites/llm/dpo-mistral-nemo-instruct-2407-1n8g-fsdp2tp8-actckpt-long.sh.disabled
@@ -0,0 +1,40 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=1
+STEPS_PER_RUN=100
+MAX_STEPS=100
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=60
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_dpo.py \
+    --config $CONFIG_PATH \
+    dpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'data["train/loss"]["1"] > 0.6990' \
+        'data["train/loss"]["1"] < 0.6992' \
+        'data["train/loss"]["100"] < 0.60'
+fi 
diff --git a/tests/test_suites/llm/grpo-dapomath17k-dsv3-megatron.sh b/tests/test_suites/llm/grpo-dapomath17k-dsv3-megatron.sh
new file mode 100755
index 0000000000..3522261d9c
--- /dev/null
+++ b/tests/test_suites/llm/grpo-dapomath17k-dsv3-megatron.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=32
+STEPS_PER_RUN=10
+MAX_STEPS=10
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=240
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Use the DeepSeek-V3 checkpoint converted to BF16.
+if [[ -z "$NRL_DEEPSEEK_V3_BF16_CKPT" ]]; then
+    echo "Need to set NRL_DEEPSEEK_V3_BF16_CKPT to the path of DeepSeek-V3 checkpoint converted to BF16. See docs/guides/deepseek.md for more details."
+    exit 1
+fi
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    policy.model_name=$NRL_DEEPSEEK_V3_BF16_CKPT \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=False \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'min(data["train/token_mult_prob_error"]) < 1.05' \
+        'data["train/reward"]["10"] > 0.4'
+fi
diff --git a/tests/test_suites/llm/grpo-deepscaler-1.5b-16K.sh b/tests/test_suites/llm/grpo-deepscaler-1.5b-16K.sh
new file mode 100755
index 0000000000..633b0d8297
--- /dev/null
+++ b/tests/test_suites/llm/grpo-deepscaler-1.5b-16K.sh
@@ -0,0 +1,68 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=1
+STEPS_PER_RUN=20
+MAX_STEPS=20
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=240
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Use checkpoint created from the 8K checkpoint in grpo-deepscaler-1.5b-8K.sh
+if [[ -z "$NRL_DEEPSCALER_8K_CKPT" ]]; then
+    echo "Need to set NRL_DEEPSCALER_8K_CKPT to the path to the trained 8K checkpoint"
+    exit 1
+fi
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    policy.model_name=$NRL_DEEPSCALER_8K_CKPT \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'mean(data["train/token_mult_prob_error"]) < 1.05' \
+        "data['train/token_mult_prob_error']['$MAX_STEPS'] < 1.05"
+fi
+
+# Convert 16k checkpoint
+uv run examples/converters/convert_dcp_to_hf.py \
+  --config=$CKPT_DIR/step_${MAX_STEPS}/config.yaml \
+  --dcp-ckpt-path=$CKPT_DIR/step_${MAX_STEPS}/policy/weights \
+  --hf-ckpt-path=$CKPT_DIR/grpo-deepscaler-16k-${MAX_STEPS}-hf
+
+# Run eval
+uv run examples/run_eval.py \
+    generation.model_name=$CKPT_DIR/grpo-deepscaler-16k-${MAX_STEPS}-hf \
+    data.prompt_file=examples/prompts/cot.txt \
+    generation.vllm_cfg.max_model_len=32768 \
+    generation.vllm_cfg.enforce_eager=True \
+    generation.temperature=1.0 \
+    eval.num_tests_per_prompt=16 \
+    2>&1 | tee ${RUN_LOG}.aime-16k
+
+cat ${RUN_LOG}.aime-16k       | grep "score=" | sed 's/.*score=\([^ ]*\).*/{"score": \1}/' > ${RUN_LOG}-16k-metric.json
+ 
+# 240 step checkpoint 0.3
+uv run tests/check_metrics.py ${RUN_LOG}-16k-metric.json \
+  'data["score"] >= 0.2396'
diff --git a/tests/test_suites/llm/grpo-deepscaler-1.5b-24K.sh b/tests/test_suites/llm/grpo-deepscaler-1.5b-24K.sh
new file mode 100755
index 0000000000..87b6e9065c
--- /dev/null
+++ b/tests/test_suites/llm/grpo-deepscaler-1.5b-24K.sh
@@ -0,0 +1,67 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=1
+STEPS_PER_RUN=10
+MAX_STEPS=10
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=240
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Use checkpoint created from the 16K checkpoint in grpo-deepscaler-1.5b-16K.sh
+if [[ -z "$NRL_DEEPSCALER_16K_CKPT" ]]; then
+    echo "Need to set NRL_DEEPSCALER_16K_CKPT to the path to the trained 16K checkpoint"
+    exit 1
+fi
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    policy.model_name=$NRL_DEEPSCALER_16K_CKPT \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'mean(data["train/token_mult_prob_error"]) < 1.05' \
+        "data['train/token_mult_prob_error']['$MAX_STEPS'] < 1.05"
+fi
+
+# Convert 24k checkpoint
+uv run examples/converters/convert_dcp_to_hf.py \
+  --config=$CKPT_DIR/step_${MAX_STEPS}/config.yaml \
+  --dcp-ckpt-path=$CKPT_DIR/step_${MAX_STEPS}/policy/weights \
+  --hf-ckpt-path=$CKPT_DIR/grpo-deepscaler-24k-${MAX_STEPS}-hf
+
+# Run eval
+uv run examples/run_eval.py \
+    generation.model_name=$CKPT_DIR/grpo-deepscaler-24k-${MAX_STEPS}-hf \
+    data.prompt_file=examples/prompts/cot.txt \
+    generation.vllm_cfg.max_model_len=32768 \
+    generation.vllm_cfg.enforce_eager=True \
+    generation.temperature=1.0 \
+    eval.num_tests_per_prompt=16 \
+    2>&1 | tee ${RUN_LOG}.aime-24k
+
+cat ${RUN_LOG}.aime-24k       | grep "score=" | sed 's/.*score=\([^ ]*\).*/{"score": \1}/' > ${RUN_LOG}-24k-metric.json
+ 
+uv run tests/check_metrics.py ${RUN_LOG}-24k-metric.json \
+  'data["score"] >= 0.2396'
diff --git a/tests/test_suites/llm/grpo-deepscaler-1.5b-8K.sh b/tests/test_suites/llm/grpo-deepscaler-1.5b-8K.sh
new file mode 100755
index 0000000000..ba2f5993d4
--- /dev/null
+++ b/tests/test_suites/llm/grpo-deepscaler-1.5b-8K.sh
@@ -0,0 +1,123 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=1
+STEPS_PER_RUN=40
+MAX_STEPS=40
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=240
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'mean(data["train/token_mult_prob_error"]) < 1.05' \
+        "data['train/token_mult_prob_error']['$MAX_STEPS'] < 1.05"
+fi
+
+# Convert 8k checkpoint
+uv run examples/converters/convert_dcp_to_hf.py \
+  --config=$CKPT_DIR/step_${MAX_STEPS}/config.yaml \
+  --dcp-ckpt-path=$CKPT_DIR/step_${MAX_STEPS}/policy/weights \
+  --hf-ckpt-path=$CKPT_DIR/grpo-deepscaler-8k-${MAX_STEPS}-hf
+
+# Run eval
+uv run examples/run_eval.py \
+    generation.model_name=$CKPT_DIR/grpo-deepscaler-8k-${MAX_STEPS}-hf \
+    data.prompt_file=examples/prompts/cot.txt \
+    generation.vllm_cfg.max_model_len=32768 \
+    generation.vllm_cfg.enforce_eager=True \
+    generation.temperature=1.0 \
+    eval.num_tests_per_prompt=16 \
+    2>&1 | tee ${RUN_LOG}.aime-8k
+
+cat ${RUN_LOG}.aime-8k       | grep "score=" | sed 's/.*score=\([^ ]*\).*/{"score": \1}/' > ${RUN_LOG}-8k-metric.json
+ 
+# 0.2 is the baseline score for AIME on the base checkpoint
+uv run tests/check_metrics.py ${RUN_LOG}-8k-metric.json \
+  'data["score"] >= 0.2396' 
+
+# This comment is for reference on how the aime24 eval baseline was chosen:
+# The variance in aime24 is pretty high when only taking one sample per prompt.
+# I have observed huge variance even between A100 and H100 with one sample per prompt,
+# and even 2-3% difference with 16 prompts. Anecdotally, when there is something wrong
+# with logprob error, the accuracy can fall below even the starting checkpoint. For that
+# reason, all the deepscaler recipes compare against 0.2396 and use 16 generations per
+# prompt to mitigate the variance.
+#
+# Additionally, 16 generations is about 12 minutes, so that should be factored into
+# the overall time to run the test.
+########################################################
+# deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
+########################################################
+# num_tests_per_prompt=1
+# score=0.2333
+# real    3m9.173s
+# num_tests_per_prompt=5
+# score=0.2267
+# real    4m50.247s
+# num_tests_per_prompt=10
+# score=0.2367
+# real    8m1.174s
+# num_tests_per_prompt=16
+# score=0.2396
+# real    11m46.489s
+
+########################################################
+# grpo-deepscaler-8k-240-hf
+########################################################
+# num_tests_per_prompt=1
+# score=0.2667
+# num_tests_per_prompt=5
+# score=0.3267
+# num_tests_per_prompt=10
+# score=0.3367
+# num_tests_per_prompt=16
+# score=0.2833
+
+########################################################
+# grpo-deepscaler-16k-290-hf
+########################################################
+# num_tests_per_prompt=1
+# score=0.2000
+# num_tests_per_prompt=5
+# score=0.3267
+# num_tests_per_prompt=10
+# score=0.3167
+# num_tests_per_prompt=16
+# score=0.3271
+
+########################################################
+# grpo-deepscaler-24k-100-hf
+########################################################
+# num_tests_per_prompt=1
+# score=0.3000
+# num_tests_per_prompt=5
+# score=0.3333
+# num_tests_per_prompt=10
+# score=0.3700
+# num_tests_per_prompt=16
+# score=0.3396
diff --git a/tests/test_suites/llm/grpo-gemma3-1b-it-1n8g-fsdp2tp1.sh b/tests/test_suites/llm/grpo-gemma3-1b-it-1n8g-fsdp2tp1.sh
index aea8c91747..4624b7282d 100755
--- a/tests/test_suites/llm/grpo-gemma3-1b-it-1n8g-fsdp2tp1.sh
+++ b/tests/test_suites/llm/grpo-gemma3-1b-it-1n8g-fsdp2tp1.sh
@@ -4,8 +4,8 @@ source $SCRIPT_DIR/common.env
 
 # ===== BEGIN CONFIG =====
 NUM_NODES=1
-STEPS_PER_RUN=500
-MAX_STEPS=500
+STEPS_PER_RUN=400
+MAX_STEPS=400
 NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
 NUM_MINUTES=120
 # ===== END CONFIG =====
@@ -35,5 +35,6 @@ uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
 if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
     uv run tests/check_metrics.py $JSON_METRICS \
         'mean(data["train/token_mult_prob_error"]) < 1.1' \
-        'data["train/token_mult_prob_error"]["500"] < 1.1'
+        "data[\"train/token_mult_prob_error\"][\"${MAX_STEPS}\"] < 1.1" \
+        'mean(data["timing/train/total_step_time"], -6, -1) < 14'
 fi
diff --git a/tests/test_suites/llm/grpo-gemma3-27b-it-16n8g-fsdp2tp8sp-actckpt-long.sh b/tests/test_suites/llm/grpo-gemma3-27b-it-8n8g-fsdp2tp8-actckpt-long.sh
similarity index 99%
rename from tests/test_suites/llm/grpo-gemma3-27b-it-16n8g-fsdp2tp8sp-actckpt-long.sh
rename to tests/test_suites/llm/grpo-gemma3-27b-it-8n8g-fsdp2tp8-actckpt-long.sh
index 69c9899ccd..a6ce1800d9 100755
--- a/tests/test_suites/llm/grpo-gemma3-27b-it-16n8g-fsdp2tp8sp-actckpt-long.sh
+++ b/tests/test_suites/llm/grpo-gemma3-27b-it-8n8g-fsdp2tp8-actckpt-long.sh
@@ -37,4 +37,3 @@ if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | ma
         'mean(data["train/token_mult_prob_error"]) < 1.1' \
         'data["train/token_mult_prob_error"]["20"] < 1.1'
 fi
-
diff --git a/tests/test_suites/llm/grpo-gspo-deepscaler-1.5b-8K.sh b/tests/test_suites/llm/grpo-gspo-deepscaler-1.5b-8K.sh
new file mode 100755
index 0000000000..ce2adb1c51
--- /dev/null
+++ b/tests/test_suites/llm/grpo-gspo-deepscaler-1.5b-8K.sh
@@ -0,0 +1,67 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=1
+STEPS_PER_RUN=40
+MAX_STEPS=40
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=240
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'mean(data["train/token_mult_prob_error"]) < 1.1' \
+        "data['train/token_mult_prob_error']['$MAX_STEPS'] < 1.1"
+fi
+
+# TODO: enable in subsequent PR to do a quick accuracy check
+## Convert 8k checkpoint
+#uv run examples/converters/convert_dcp_to_hf.py \
+#  --config=$CKPT_DIR/step_${MAX_STEPS}/config.yaml \
+#  --dcp-ckpt-path=$CKPT_DIR/step_${MAX_STEPS}/policy/weights \
+#  --hf-ckpt-path=$CKPT_DIR/gspo-deepscaler-8k-${MAX_STEPS}-hf
+#
+## Run eval
+#uv run examples/run_eval.py \
+#    generation.model_name=$CKPT_DIR/gspo-deepscaler-8k-${MAX_STEPS}-hf \
+#    data.prompt_file=examples/prompts/cot.txt \
+#    generation.vllm_cfg.max_model_len=32768 2>&1 | tee ${RUN_LOG}.aime-8k
+#
+#cat ${RUN_LOG}.aime-8k       | grep "score=" | sed 's/.*score=\([^ ]*\).*/{"score": \1}/' > ${RUN_LOG}-8k-metric.json
+# 
+#uv run tests/check_metrics.py ${RUN_LOG}-8k-metric.json \
+#  'data["score"] >= 0.25' \
+#
+##uv run examples/run_eval.py \
+##    generation.model_name=deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B \
+##    data.prompt_file=examples/prompts/cot.txt \
+##    generation.vllm_cfg.max_model_len=32768 2>&1 | tee ${RUN_LOG}.aime-baseline
+#
+##cat ${RUN_LOG}.aime-baseline | grep "score=" | sed 's/.*score=\([^ ]*\).*/{"score": \1}/' > ${RUN_LOG}-baseline-metric.json
+#
+##uv run tests/check_metrics.py ${RUN_LOG}-baseline-metric.json \
+##  'data["score"] == 0.2' \
diff --git a/tests/test_suites/llm/grpo-llama3.1-8b-instruct-1n8g-megatron-fp8-e2e.sh b/tests/test_suites/llm/grpo-llama3.1-8b-instruct-1n8g-megatron-fp8-e2e.sh
new file mode 100755
index 0000000000..de177d2c65
--- /dev/null
+++ b/tests/test_suites/llm/grpo-llama3.1-8b-instruct-1n8g-megatron-fp8-e2e.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=1
+STEPS_PER_RUN=100
+MAX_STEPS=100
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=240
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'mean(data["train/token_mult_prob_error"]) < 1.1' \
+        'data["train/token_mult_prob_error"]["100"] < 1.1'
+fi
diff --git a/tests/test_suites/llm/grpo-llama3.1-8b-instruct-1n8g-megatron-fp8-rollouts.v3.sh b/tests/test_suites/llm/grpo-llama3.1-8b-instruct-1n8g-megatron-fp8-rollouts.v3.sh
new file mode 100755
index 0000000000..956c94bb5c
--- /dev/null
+++ b/tests/test_suites/llm/grpo-llama3.1-8b-instruct-1n8g-megatron-fp8-rollouts.v3.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=1
+STEPS_PER_RUN=100
+MAX_STEPS=100
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=180
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    # With a few number of steps the logprob can have spikes that can move the average up.
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'mean(data["train/token_mult_prob_error"], ignore_top_p=0.05) < 1.1' \
+        'ratio_above(data["train/token_mult_prob_error"], 1.1) < 0.1'
+    # ratio_above @ 1.1 was 0.03,0.06,0.05: 3sigma ~=0.1
+fi
diff --git a/tests/test_suites/llm/grpo-llama3.1-8b-instruct-2n8g-fsdp2tp1-noncolocated.sh b/tests/test_suites/llm/grpo-llama3.1-8b-instruct-2n8g-fsdp2tp1-noncolocated.sh
new file mode 100755
index 0000000000..d018032576
--- /dev/null
+++ b/tests/test_suites/llm/grpo-llama3.1-8b-instruct-2n8g-fsdp2tp1-noncolocated.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=2
+STEPS_PER_RUN=30
+MAX_STEPS=30
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=120
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'mean(data["train/token_mult_prob_error"]) < 1.1' \
+        'data["train/token_mult_prob_error"]["30"] < 1.1'
+fi
diff --git a/tests/test_suites/llm/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.v3.sh b/tests/test_suites/llm/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.v3.sh
index 6e64876058..af44d060cb 100755
--- a/tests/test_suites/llm/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.v3.sh
+++ b/tests/test_suites/llm/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.v3.sh
@@ -37,4 +37,3 @@ if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | ma
         'mean(data["train/token_mult_prob_error"]) < 1.1' \
         'data["train/token_mult_prob_error"]["100"] < 1.1'
 fi
-
diff --git a/tests/test_suites/llm/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v3.sh b/tests/test_suites/llm/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v3.sh
index 45cfad6e83..562ff730e7 100755
--- a/tests/test_suites/llm/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v3.sh
+++ b/tests/test_suites/llm/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v3.sh
@@ -35,6 +35,6 @@ uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
 if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
     uv run tests/check_metrics.py $JSON_METRICS \
         'mean(data["train/token_mult_prob_error"]) < 1.1' \
-        'data["train/token_mult_prob_error"]["500"] < 1.1'
+        'data["train/token_mult_prob_error"]["500"] < 1.1' \
+        'mean(data["timing/train/total_step_time"], -6, -1) < 10'
 fi
-
diff --git a/tests/test_suites/llm/grpo-llama3.2-1b-instruct-1n8g-megatron.sh b/tests/test_suites/llm/grpo-llama3.2-1b-instruct-1n8g-megatron.sh
new file mode 100755
index 0000000000..90e309e128
--- /dev/null
+++ b/tests/test_suites/llm/grpo-llama3.2-1b-instruct-1n8g-megatron.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=1
+STEPS_PER_RUN=500
+MAX_STEPS=500
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=180
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'mean(data["train/token_mult_prob_error"]) < 1.1' \
+        'data["train/token_mult_prob_error"]["500"] < 1.1' \
+        'data["train/reward"]["500"] > 0.1' \
+        'mean(data["timing/train/total_step_time"], -6, -1) < 10.5'
+fi
diff --git a/tests/test_suites/llm/grpo-math-llama-nemotron-super-49b-v.5-4n8g-fsdp2tp8.sh.disabled b/tests/test_suites/llm/grpo-math-llama-nemotron-super-49b-v.5-4n8g-fsdp2tp8.sh.disabled
new file mode 100755
index 0000000000..9420b53c9d
--- /dev/null
+++ b/tests/test_suites/llm/grpo-math-llama-nemotron-super-49b-v.5-4n8g-fsdp2tp8.sh.disabled
@@ -0,0 +1,41 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=4
+STEPS_PER_RUN=2  # 40min: step_time: [1341, 801]
+MAX_STEPS=2
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=30
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'mean(data["train/token_mult_prob_error"]) < 1.1' \
+        'data["train/token_mult_prob_error"]["2"] < 1.1' \
+        'mean(data["timing/train/policy_training"]) < 280' \
+        'mean(data["ray/node.0.gpu.0.mem_gb"]) < 75'
+fi
diff --git a/tests/test_suites/llm/grpo-math-qwen3-30ba3b-megatron-tp4-32k.sh b/tests/test_suites/llm/grpo-math-qwen3-30ba3b-megatron-tp4-32k.sh
new file mode 100755
index 0000000000..4a310b673b
--- /dev/null
+++ b/tests/test_suites/llm/grpo-math-qwen3-30ba3b-megatron-tp4-32k.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=4
+STEPS_PER_RUN=3
+MAX_STEPS=3
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=240
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'mean(data["train/token_mult_prob_error"]) < 1.1' \
+        'data["train/token_mult_prob_error"]["3"] < 1.1'
+fi
diff --git a/tests/test_suites/llm/grpo-moonlight-16ba3b-4n8g-megatron.sh b/tests/test_suites/llm/grpo-moonlight-16ba3b-4n8g-megatron.sh
new file mode 100755
index 0000000000..24e49d1a8d
--- /dev/null
+++ b/tests/test_suites/llm/grpo-moonlight-16ba3b-4n8g-megatron.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=4
+STEPS_PER_RUN=30
+MAX_STEPS=30
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=60
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+PYTHONPATH=$HF_HOME/modules:$PYTHONPATH uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'mean(data["train/token_mult_prob_error"]) < 1.1' \
+        'data["train/token_mult_prob_error"]["30"] < 1.1' \
+        'mean(data["train/reward"]) > 0.45' \
+        'mean(data["timing/train/total_step_time"], -11, -1) < 70'
+fi
diff --git a/tests/test_suites/llm/grpo-nano-v2-12b-1n8g-megatron.sh b/tests/test_suites/llm/grpo-nano-v2-12b-1n8g-megatron.sh
new file mode 100755
index 0000000000..68a694098c
--- /dev/null
+++ b/tests/test_suites/llm/grpo-nano-v2-12b-1n8g-megatron.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=1
+STEPS_PER_RUN=30
+MAX_STEPS=30
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=60
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'mean(data["train/token_mult_prob_error"]) < 1.05' \
+        'data["train/token_mult_prob_error"]["30"] < 1.05' \
+        'data["train/reward"]["30"] > 0.4' \
+        'mean(data["timing/train/total_step_time"], -6, -1) < 80'
+fi
diff --git a/tests/test_suites/llm/grpo-nano-v2-12b-2n8g-fsdp2tp1.sh b/tests/test_suites/llm/grpo-nano-v2-12b-2n8g-fsdp2tp1.sh
new file mode 100755
index 0000000000..d1ad766b5b
--- /dev/null
+++ b/tests/test_suites/llm/grpo-nano-v2-12b-2n8g-fsdp2tp1.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=2
+STEPS_PER_RUN=30
+MAX_STEPS=30
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=60
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'mean(data["train/token_mult_prob_error"]) < 1.05' \
+        'data["train/token_mult_prob_error"]["30"] < 1.05' \
+        'data["train/reward"]["30"] > 0.4' \
+        'mean(data["timing/train/total_step_time"], -6, -1) < 60'
+fi
diff --git a/tests/test_suites/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt-long.v3.sh b/tests/test_suites/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8-actckpt-long.v3.sh
similarity index 99%
rename from tests/test_suites/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt-long.v3.sh
rename to tests/test_suites/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8-actckpt-long.v3.sh
index 7d1fdc2858..fa7fbd5bd6 100755
--- a/tests/test_suites/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt-long.v3.sh
+++ b/tests/test_suites/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8-actckpt-long.v3.sh
@@ -37,4 +37,3 @@ if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | ma
         'mean(data["train/token_mult_prob_error"]) < 1.1' \
         'data["train/token_mult_prob_error"]["20"] < 1.1'
 fi
-
diff --git a/tests/test_suites/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt.v3.sh b/tests/test_suites/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8-actckpt.v3.sh
similarity index 99%
rename from tests/test_suites/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt.v3.sh
rename to tests/test_suites/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8-actckpt.v3.sh
index e96f3de7fa..98591ba9b3 100755
--- a/tests/test_suites/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt.v3.sh
+++ b/tests/test_suites/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8-actckpt.v3.sh
@@ -37,4 +37,3 @@ if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | ma
         'mean(data["train/token_mult_prob_error"]) < 1.1' \
         'data["train/token_mult_prob_error"]["2"] < 1.1'
 fi
-
diff --git a/tests/test_suites/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.v3.sh b/tests/test_suites/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4.v3.sh
similarity index 99%
rename from tests/test_suites/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.v3.sh
rename to tests/test_suites/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4.v3.sh
index b3071fb58e..ec613562f2 100755
--- a/tests/test_suites/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.v3.sh
+++ b/tests/test_suites/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4.v3.sh
@@ -37,4 +37,3 @@ if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | ma
         'mean(data["train/token_mult_prob_error"]) < 1.1' \
         'data["train/token_mult_prob_error"]["30"] < 1.1'
 fi
-
diff --git a/tests/test_suites/llm/grpo-qwen2.5-7b-instruct-4n8g-megatron.sh b/tests/test_suites/llm/grpo-qwen2.5-7b-instruct-4n8g-megatron.sh
new file mode 100755
index 0000000000..45f354043a
--- /dev/null
+++ b/tests/test_suites/llm/grpo-qwen2.5-7b-instruct-4n8g-megatron.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=4
+STEPS_PER_RUN=30
+MAX_STEPS=30
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=180
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'mean(data["train/token_mult_prob_error"]) < 1.1' \
+        'data["train/token_mult_prob_error"]["30"] < 1.1' \
+	'mean(data["train/reward"]) > 0.56' \
+        'mean(data["timing/train/total_step_time"], 2) < 50'
+fi
diff --git a/tests/test_suites/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.v3.sh b/tests/test_suites/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.v3.sh
index 98df00c25c..35810c4eec 100755
--- a/tests/test_suites/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.v3.sh
+++ b/tests/test_suites/llm/grpo-qwen2.5-math-1.5b-instruct-1n8g-fsdp2tp1.v3.sh
@@ -35,6 +35,6 @@ uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
 if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
     uv run tests/check_metrics.py $JSON_METRICS \
         'mean(data["train/token_mult_prob_error"]) < 1.1' \
-        'data["train/token_mult_prob_error"]["450"] < 1.1'
+        'data["train/token_mult_prob_error"]["450"] < 1.1' \
+        'mean(data["timing/train/total_step_time"], 2) < 25'
 fi
-
diff --git a/tests/test_suites/llm/grpo-qwen3-30ba3b-8n8g-megatron.sh b/tests/test_suites/llm/grpo-qwen3-30ba3b-8n8g-megatron.sh
new file mode 100755
index 0000000000..f89041cd40
--- /dev/null
+++ b/tests/test_suites/llm/grpo-qwen3-30ba3b-8n8g-megatron.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=8
+STEPS_PER_RUN=30
+MAX_STEPS=30
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=240
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'data["train/token_mult_prob_error"]["30"] < 1.1' \
+        'data["train/reward"]["30"] > 0.43' \
+        'mean(data["timing/train/total_step_time"], -6, -1) < 220'
+fi
diff --git a/tests/test_suites/llm/performance/common.env b/tests/test_suites/llm/performance/common.env
new file mode 100644
index 0000000000..99b7e9ecfd
--- /dev/null
+++ b/tests/test_suites/llm/performance/common.env
@@ -0,0 +1,45 @@
+#!/bin/bash
+# Source this file before running test to setup
+#
+#   source ./common.env
+set -eou pipefail
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+# Mark all repos as safe in the test context, since wandb fetchs metadata about the repo and it's a
+# catch-22 to get the project root and mark it safe if you don't know the project root
+PROJECT_ROOT=$(realpath $SCRIPT_DIR/../../../..)
+
+exit_if_max_steps_reached() {
+  # Early stopping to save compute if max step has been reached
+  STEPS_SO_FAR=$(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS || echo 0)
+  if [[ $STEPS_SO_FAR -ge $MAX_STEPS ]]; then
+      echo "[INFO] Target step $MAX_STEPS reached, skipping run"
+      exit 0
+  fi
+  echo "[INFO] Steps so far: $STEPS_SO_FAR, running till $MAX_STEPS steps"
+}
+
+EXP_NAME=$(basename $0 .sh)
+EXP_DIR=$SCRIPT_DIR/$EXP_NAME
+LOG_DIR=$EXP_DIR/logs
+CKPT_DIR=$EXP_DIR/ckpts
+JSON_METRICS=$EXP_DIR/metrics.json
+RUN_LOG=$EXP_DIR/run.log
+
+# Test script has path:  tests/test_suites/llm/${EXP_NAME}.sh
+# where config has path:  examples/configs/recipes/llm/${EXP_NAME}.yaml
+# We will assume/check the path matches this pattern
+CONFIG_PATH=$(echo $SCRIPT_DIR/${EXP_NAME}.yaml | sed 's#tests/test_suites#examples/configs/recipes#')
+if [[ ! -f $CONFIG_PATH ]]; then
+  echo "[ERROR] Config file $CONFIG_PATH not found"
+  exit 1
+fi
+
+export PYTHONPATH=${PROJECT_ROOT}:${PYTHONPATH:-}
+
+if [[ -n "${TEST_DRYRUN:-}" ]]; then
+  echo "[INFO] TEST_DRYRUN mode: used for testing"
+  exit
+fi
+
+mkdir -p $EXP_DIR $LOG_DIR $CKPT_DIR
diff --git a/tests/test_suites/llm/performance/grpo-deepseek-v3-32n8g.sh b/tests/test_suites/llm/performance/grpo-deepseek-v3-32n8g.sh
new file mode 100755
index 0000000000..738b38dd5b
--- /dev/null
+++ b/tests/test_suites/llm/performance/grpo-deepseek-v3-32n8g.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+# disable NVLS to avoid OOM issue
+export NCCL_NVLS_ENABLE=0
+# allow user to pass an existing HF checkpoint path based on instruction in https://github.com/NVIDIA-NeMo/RL/blob/main/docs/guides/deepseek.md
+export MODEL_NAME=${NRL_DEEPSEEK_V3_HF_CKPT:-"unsloth/DeepSeek-V3-0324-BF16"}
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=32
+STEPS_PER_RUN=10
+MAX_STEPS=10
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=240
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    policy.model_name=$MODEL_NAME \
+    policy.tokenizer.name=$MODEL_NAME \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'mean(data["train/token_mult_prob_error"]) < 1.1' \
+        'data["train/token_mult_prob_error"]["10"] < 1.1'
+fi
diff --git a/tests/test_suites/llm/performance/grpo-deepseek-v3-64n8g-async-1off.sh b/tests/test_suites/llm/performance/grpo-deepseek-v3-64n8g-async-1off.sh
new file mode 100755
index 0000000000..14138486e1
--- /dev/null
+++ b/tests/test_suites/llm/performance/grpo-deepseek-v3-64n8g-async-1off.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+# disable NVLS to avoid OOM issue
+export NCCL_NVLS_ENABLE=0
+# allow user to pass an existing HF checkpoint path based on instruction in https://github.com/NVIDIA-NeMo/RL/blob/main/docs/guides/deepseek.md
+export MODEL_NAME=${NRL_DEEPSEEK_V3_HF_CKPT:-"unsloth/DeepSeek-V3-0324-BF16"}
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=64
+STEPS_PER_RUN=10
+MAX_STEPS=10
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=240
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    policy.model_name=$MODEL_NAME \
+    policy.tokenizer.name=$MODEL_NAME \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'mean(data["train/token_mult_prob_error"]) < 1.1' \
+        'data["train/token_mult_prob_error"]["10"] < 1.1'
+fi
diff --git a/tests/test_suites/llm/performance/grpo-llama3.1-8b-instruct-2n8g-async-1off.sh b/tests/test_suites/llm/performance/grpo-llama3.1-8b-instruct-2n8g-async-1off.sh
new file mode 100755
index 0000000000..e7636f3e93
--- /dev/null
+++ b/tests/test_suites/llm/performance/grpo-llama3.1-8b-instruct-2n8g-async-1off.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=2
+STEPS_PER_RUN=10
+MAX_STEPS=10
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=100
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'mean(data["train/token_mult_prob_error"]) < 1.1' \
+        'data["train/token_mult_prob_error"]["10"] < 1.1'
+fi
diff --git a/tests/test_suites/llm/performance/grpo-llama3.1-8b-instruct-2n8g.sh b/tests/test_suites/llm/performance/grpo-llama3.1-8b-instruct-2n8g.sh
new file mode 100755
index 0000000000..e7636f3e93
--- /dev/null
+++ b/tests/test_suites/llm/performance/grpo-llama3.1-8b-instruct-2n8g.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=2
+STEPS_PER_RUN=10
+MAX_STEPS=10
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=100
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'mean(data["train/token_mult_prob_error"]) < 1.1' \
+        'data["train/token_mult_prob_error"]["10"] < 1.1'
+fi
diff --git a/tests/test_suites/llm/performance/grpo-qwen3-235b-16n8g.sh b/tests/test_suites/llm/performance/grpo-qwen3-235b-16n8g.sh
new file mode 100755
index 0000000000..0f9bf9289f
--- /dev/null
+++ b/tests/test_suites/llm/performance/grpo-qwen3-235b-16n8g.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+# disable NVLS to avoid OOM issue
+export NCCL_NVLS_ENABLE=0
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=16
+STEPS_PER_RUN=10
+MAX_STEPS=10
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=100
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'mean(data["train/token_mult_prob_error"]) < 1.1' \
+        'data["train/token_mult_prob_error"]["10"] < 1.1'
+fi
diff --git a/tests/test_suites/llm/performance/grpo-qwen3-235b-32n8g-async-1off.sh b/tests/test_suites/llm/performance/grpo-qwen3-235b-32n8g-async-1off.sh
new file mode 100755
index 0000000000..f7dac553af
--- /dev/null
+++ b/tests/test_suites/llm/performance/grpo-qwen3-235b-32n8g-async-1off.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+# disable NVLS to avoid OOM issue
+export NCCL_NVLS_ENABLE=0
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=32
+STEPS_PER_RUN=10
+MAX_STEPS=10
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=100
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'mean(data["train/token_mult_prob_error"]) < 1.1' \
+        'data["train/token_mult_prob_error"]["10"] < 1.1'
+fi
diff --git a/tests/test_suites/llm/performance/grpo-qwen3-30ba3b-4n8g-async-1off.sh b/tests/test_suites/llm/performance/grpo-qwen3-30ba3b-4n8g-async-1off.sh
new file mode 100755
index 0000000000..0de5a124ed
--- /dev/null
+++ b/tests/test_suites/llm/performance/grpo-qwen3-30ba3b-4n8g-async-1off.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=4
+STEPS_PER_RUN=10
+MAX_STEPS=10
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=100
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'mean(data["train/token_mult_prob_error"]) < 1.1' \
+        'data["train/token_mult_prob_error"]["10"] < 1.1'
+fi
diff --git a/tests/test_suites/llm/performance/grpo-qwen3-30ba3b-4n8g.sh b/tests/test_suites/llm/performance/grpo-qwen3-30ba3b-4n8g.sh
new file mode 100755
index 0000000000..0de5a124ed
--- /dev/null
+++ b/tests/test_suites/llm/performance/grpo-qwen3-30ba3b-4n8g.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=4
+STEPS_PER_RUN=10
+MAX_STEPS=10
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=100
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'mean(data["train/token_mult_prob_error"]) < 1.1' \
+        'data["train/token_mult_prob_error"]["10"] < 1.1'
+fi
diff --git a/tests/test_suites/llm/performance/grpo-qwen3-32b-4n8g.sh b/tests/test_suites/llm/performance/grpo-qwen3-32b-4n8g.sh
new file mode 100755
index 0000000000..0de5a124ed
--- /dev/null
+++ b/tests/test_suites/llm/performance/grpo-qwen3-32b-4n8g.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=4
+STEPS_PER_RUN=10
+MAX_STEPS=10
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=100
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'mean(data["train/token_mult_prob_error"]) < 1.1' \
+        'data["train/token_mult_prob_error"]["10"] < 1.1'
+fi
diff --git a/tests/test_suites/llm/performance/grpo-qwen3-32b-8n8g-async-1off.sh b/tests/test_suites/llm/performance/grpo-qwen3-32b-8n8g-async-1off.sh
new file mode 100755
index 0000000000..35d58c98f7
--- /dev/null
+++ b/tests/test_suites/llm/performance/grpo-qwen3-32b-8n8g-async-1off.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=8
+STEPS_PER_RUN=10
+MAX_STEPS=10
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=100
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_grpo_math.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'mean(data["train/token_mult_prob_error"]) < 1.1' \
+        'data["train/token_mult_prob_error"]["10"] < 1.1'
+fi
diff --git a/tests/test_suites/llm/sft-llama3.1-70b-8n8g-tp4pp2-long-megatron.sh b/tests/test_suites/llm/sft-llama3.1-70b-8n8g-tp4pp2-long-megatron.sh
new file mode 100755
index 0000000000..718322e33a
--- /dev/null
+++ b/tests/test_suites/llm/sft-llama3.1-70b-8n8g-tp4pp2-long-megatron.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=8
+STEPS_PER_RUN=300
+MAX_STEPS=300
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=240
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_sft.py \
+    --config $CONFIG_PATH \
+    sft.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# TODO: the memory check is known to OOM. see https://github.com/NVIDIA-NeMo/RL/issues/263
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'data["train/loss"]["1"] < 0.55' \
+        'data["train/loss"]["300"] < 0.285' \
+        'max(data["ray/node.0.gpu.0.mem_gb"]) < 70' \
+        'mean(data["timing/train/total_step_time"], 2) < 20'
+fi
diff --git a/tests/test_suites/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long.v2.sh b/tests/test_suites/llm/sft-llama3.1-8b-1n8g-fsdp2tp1-dynamicbatch.sh
similarity index 81%
rename from tests/test_suites/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long.v2.sh
rename to tests/test_suites/llm/sft-llama3.1-8b-1n8g-fsdp2tp1-dynamicbatch.sh
index b22c00dec0..d5dfde39b9 100755
--- a/tests/test_suites/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long.v2.sh
+++ b/tests/test_suites/llm/sft-llama3.1-8b-1n8g-fsdp2tp1-dynamicbatch.sh
@@ -2,11 +2,10 @@
 SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
 source $SCRIPT_DIR/common.env
 
-# TODO: @ashors real convergence run (dataset only has 2737)
 # ===== BEGIN CONFIG =====
 NUM_NODES=1
-STEPS_PER_RUN=2730
-MAX_STEPS=2730
+STEPS_PER_RUN=250
+MAX_STEPS=250
 NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
 NUM_MINUTES=120
 # ===== END CONFIG =====
@@ -35,9 +34,9 @@ uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
 # TODO: the memory check is known to OOM. see https://github.com/NVIDIA-NeMo/RL/issues/263
 # Only run metrics if the target step is reached
 if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
-    # TODO: FIGURE OUT CORRECT METRICS
     uv run tests/check_metrics.py $JSON_METRICS \
-        'data["train/loss"]["1"] < 5' \
-        'data["train/loss"]["2730"] < 0.3' \
-        'max(data["ray/node.0.gpu.0.mem_gb"]) < 50'
+	    'data["train/loss"]["1"] < 0.6' \
+        'data["train/loss"]["250"] < 0.36' \
+        'max(data["ray/node.0.gpu.0.mem_gb"]) < 70' \
+        'mean(data["timing/train/total_step_time"], -6, -1) < 10'
 fi
diff --git a/tests/test_suites/llm/sft-llama3.1-8b-1n8g-fsdp2tp1-long.sh b/tests/test_suites/llm/sft-llama3.1-8b-1n8g-fsdp2tp1-long.sh
new file mode 100755
index 0000000000..4b243e8fe9
--- /dev/null
+++ b/tests/test_suites/llm/sft-llama3.1-8b-1n8g-fsdp2tp1-long.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=1
+STEPS_PER_RUN=250
+MAX_STEPS=250
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=120
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_sft.py \
+    --config $CONFIG_PATH \
+    sft.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# TODO: the memory check is known to OOM. see https://github.com/NVIDIA-NeMo/RL/issues/263
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'data["train/loss"]["1"] < 0.6' \
+        'data["train/loss"]["250"] < 0.36' \
+	    'max(data["ray/node.0.gpu.0.mem_gb"]) < 80' \
+        'mean(data["timing/train/total_step_time"], 2) < 22'
+fi
diff --git a/tests/test_suites/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp2sp.v2.sh b/tests/test_suites/llm/sft-llama3.1-8b-1n8g-fsdp2tp2.sh
similarity index 83%
rename from tests/test_suites/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp2sp.v2.sh
rename to tests/test_suites/llm/sft-llama3.1-8b-1n8g-fsdp2tp2.sh
index abed80e5ed..87ca1e9dad 100755
--- a/tests/test_suites/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp2sp.v2.sh
+++ b/tests/test_suites/llm/sft-llama3.1-8b-1n8g-fsdp2tp2.sh
@@ -4,8 +4,8 @@ source $SCRIPT_DIR/common.env
 
 # ===== BEGIN CONFIG =====
 NUM_NODES=1
-STEPS_PER_RUN=350
-MAX_STEPS=350
+STEPS_PER_RUN=50
+MAX_STEPS=50
 NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
 NUM_MINUTES=45
 # ===== END CONFIG =====
@@ -35,9 +35,9 @@ uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
 
 # Only run metrics if the target step is reached
 if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
-    # TODO: FIGURE OUT CORRECT METRICS
     uv run tests/check_metrics.py $JSON_METRICS \
-        'data["train/loss"]["1"] < 5' \
-        'data["train/loss"]["350"] < 0.5' \
-        'max(data["ray/node.0.gpu.0.mem_gb"]) < 45'
-fi 
+        'data["train/loss"]["1"] < 0.6' \
+        'data["train/loss"]["50"] < 0.38' \
+        'max(data["ray/node.0.gpu.0.mem_gb"]) < 70' \
+        'mean(data["timing/train/total_step_time"], 2) < 32'
+fi
diff --git a/tests/test_suites/llm/sft-llama3.1-8b-instruct-1n8g-megatron.sh b/tests/test_suites/llm/sft-llama3.1-8b-1n8g-megatron-seqpack.sh
similarity index 87%
rename from tests/test_suites/llm/sft-llama3.1-8b-instruct-1n8g-megatron.sh
rename to tests/test_suites/llm/sft-llama3.1-8b-1n8g-megatron-seqpack.sh
index cf72bd9377..e063b39861 100755
--- a/tests/test_suites/llm/sft-llama3.1-8b-instruct-1n8g-megatron.sh
+++ b/tests/test_suites/llm/sft-llama3.1-8b-1n8g-megatron-seqpack.sh
@@ -31,9 +31,9 @@ uv run examples/run_sft.py \
 # Convert tensorboard logs to json
 uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
 
-# TODO: @ashors tighter bounds
 if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
     uv run tests/check_metrics.py $JSON_METRICS \
-        'data["train/loss"]["1"] < 2' \
-        'data["train/loss"]["250"] < 0.3'
-fi 
\ No newline at end of file
+        'data["train/loss"]["1"] < 0.6' \
+        'data["train/loss"]["250"] < 0.36' \
+        'mean(data["timing/train/total_step_time"], 2) < 6'
+fi
diff --git a/tests/test_suites/llm/sft-llama3.2-1b-1n8g-fsdp2tp1.v2.sh b/tests/test_suites/llm/sft-llama3.1-8b-1n8g-megatron.sh
similarity index 81%
rename from tests/test_suites/llm/sft-llama3.2-1b-1n8g-fsdp2tp1.v2.sh
rename to tests/test_suites/llm/sft-llama3.1-8b-1n8g-megatron.sh
index 32c66dae04..8ef0dfafe6 100755
--- a/tests/test_suites/llm/sft-llama3.2-1b-1n8g-fsdp2tp1.v2.sh
+++ b/tests/test_suites/llm/sft-llama3.1-8b-1n8g-megatron.sh
@@ -4,10 +4,10 @@ source $SCRIPT_DIR/common.env
 
 # ===== BEGIN CONFIG =====
 NUM_NODES=1
-STEPS_PER_RUN=500
-MAX_STEPS=500
+STEPS_PER_RUN=250
+MAX_STEPS=250
 NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
-NUM_MINUTES=15
+NUM_MINUTES=120
 # ===== END CONFIG =====
 
 exit_if_max_steps_reached
@@ -31,11 +31,9 @@ uv run examples/run_sft.py \
 # Convert tensorboard logs to json
 uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
 
-# Only run metrics if the target step is reached
 if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
     uv run tests/check_metrics.py $JSON_METRICS \
-        'data["train/loss"]["1"] < 2.4' \
-        'data["train/loss"]["500"] < 0.5' \
-        'max(data["ray/node.0.gpu.0.mem_gb"]) < 25'
+        'data["train/loss"]["1"] < 0.6' \
+        'data["train/loss"]["250"] < 0.36' \
+        'mean(data["timing/train/total_step_time"], 2) < 20'
 fi
-
diff --git a/tests/test_suites/llm/sft-llama3.2-1b-1n8g-fsdp2tp1.v3.sh b/tests/test_suites/llm/sft-llama3.2-1b-1n8g-fsdp2tp1.v3.sh
new file mode 100755
index 0000000000..b5edc8043e
--- /dev/null
+++ b/tests/test_suites/llm/sft-llama3.2-1b-1n8g-fsdp2tp1.v3.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=1
+STEPS_PER_RUN=250
+MAX_STEPS=250
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=15
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_sft.py \
+    --config $CONFIG_PATH \
+    sft.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'data["train/loss"]["1"] < 0.82' \
+        'mean(data["train/loss"],-10,-1) < 0.58' \
+        'max(data["ray/node.0.gpu.0.mem_gb"]) < 25' \
+        'mean(data["timing/train/total_step_time"], -6, -1) < 0.7'
+    # mean(data["train/loss"],-10,-1) observed to be 0.5557474825117323
+    # timing/train/total_step_time observed 0.6-0.64
+fi
\ No newline at end of file
diff --git a/tests/test_suites/llm/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.v2.sh b/tests/test_suites/llm/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.v3.sh
similarity index 93%
rename from tests/test_suites/llm/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.v2.sh
rename to tests/test_suites/llm/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.v3.sh
index 257add6fc5..3b987df72b 100755
--- a/tests/test_suites/llm/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.v2.sh
+++ b/tests/test_suites/llm/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.v3.sh
@@ -37,7 +37,7 @@ uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
 # Only run metrics if the target step is reached
 if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
     uv run tests/check_metrics.py $JSON_METRICS \
-        'data["train/loss"]["1"] < 1.5' \
-        'data["train/loss"]["20"] < 0.3' \
+        'data["train/loss"]["1"] < 0.37' \
+        'mean(data["train/loss"], 16) < 0.31' \
         'max(data["ray/node.0.gpu.0.mem_gb"]) < 35'
-fi 
+fi
diff --git a/tests/test_suites/llm/sft-qwen2.5-math7b-2n8g-megatron.sh b/tests/test_suites/llm/sft-qwen2.5-math7b-2n8g-megatron.sh
new file mode 100755
index 0000000000..897f4fbb60
--- /dev/null
+++ b/tests/test_suites/llm/sft-qwen2.5-math7b-2n8g-megatron.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# TODO: this config can crash on OOM
+# https://github.com/NVIDIA-NeMo/RL/issues/263
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=2
+STEPS_PER_RUN=80  # step_time ~ 29sec
+MAX_STEPS=80
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=30
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_sft.py \
+    --config $CONFIG_PATH \
+    sft.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    ~policy.tokenizer.chat_template \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'data["train/loss"]["80"] < 0.301' \
+        'data["validation/val_loss"]["80"] < 0.304'
+fi
diff --git a/tests/test_suites/nightly.txt b/tests/test_suites/nightly.txt
index d28e61a8e6..ea353d1131 100644
--- a/tests/test_suites/nightly.txt
+++ b/tests/test_suites/nightly.txt
@@ -8,26 +8,71 @@ tests/test_suites/llm/grpo-llama3.2-1b-instruct-1n8g-fsdp2tp1.v3.sh
 tests/test_suites/llm/grpo-gemma3-1b-it-1n8g-fsdp2tp1.sh
 
 # Dtensor (Qwen/Qwen2.5-7B-Instruct)
-tests/test_suites/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4sp.v3.sh
+tests/test_suites/llm/grpo-qwen2.5-7b-instruct-4n8g-fsdp2tp4.v3.sh
+
+# Megatron
+tests/test_suites/llm/grpo-llama3.2-1b-instruct-1n8g-megatron.sh
 
 # Functional 32b run
-tests/test_suites/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt.v3.sh
+tests/test_suites/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8-actckpt.v3.sh
+
+# Functional moonlight run 
+tests/test_suites/llm/grpo-moonlight-16ba3b-4n8g-megatron.sh
+
+# Functional VLM run
+tests/test_suites/vlm/vlm_grpo-qwen2.5-vl-3b-instruct-clevr-1n2g-dtensor2tp1.v1.sh
+tests/test_suites/vlm/vlm_grpo-qwen2.5-vl-3b-instruct-clevr-1n2g-megatrontp2.v1.sh
+
+# Removing this until this issue is resolved: https://github.com/huggingface/transformers/issues/41190
+# tests/test_suites/vlm/vlm_grpo-smolvlm2-2.2b-instruct-clevr-1n2g-dtensor2tp1.v2.sh
+
+# Deepscaler (short tests)
+tests/test_suites/llm/grpo-deepscaler-1.5b-16K.sh
+tests/test_suites/llm/grpo-deepscaler-1.5b-24K.sh
+tests/test_suites/llm/grpo-deepscaler-1.5b-8K.sh
+
+# Deepscaler (GSPO)
+tests/test_suites/llm/grpo-gspo-deepscaler-1.5b-8K.sh
+
+# GRPO math test run (32K context mcore)
+tests/test_suites/llm/grpo-math-qwen3-30ba3b-megatron-tp4-32k.sh
+
+# FP8
+tests/test_suites/llm/grpo-llama3.1-8b-instruct-1n8g-megatron-fp8-rollouts.v3.sh
+tests/test_suites/llm/grpo-llama3.1-8b-instruct-1n8g-megatron-fp8-e2e.sh
+
+# Non-colocated
+tests/test_suites/llm/grpo-llama3.1-8b-instruct-2n8g-fsdp2tp1-noncolocated.sh
+
+# Nemotron Super 49B
+#https://github.com/NVIDIA-NeMo/RL/issues/1374
+#tests/test_suites/llm/grpo-math-llama-nemotron-super-49b-v.5-4n8g-fsdp2tp8.sh
+
+# Nano-v2
+tests/test_suites/llm/grpo-nano-v2-12b-1n8g-megatron.sh
+tests/test_suites/llm/grpo-nano-v2-12b-2n8g-fsdp2tp1.sh
 
 #######
 # SFT #
 #######
 
 # 1N 1B/8B runs
-tests/test_suites/llm/sft-llama3.2-1b-1n8g-fsdp2tp1.v2.sh
+tests/test_suites/llm/sft-llama3.2-1b-1n8g-fsdp2tp1.v3.sh
 
 # Dtensor (8B)
-tests/test_suites/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp2sp.v2.sh
+tests/test_suites/llm/sft-llama3.1-8b-1n8g-fsdp2tp2.sh
+# dynamic batching
+tests/test_suites/llm/sft-llama3.1-8b-1n8g-fsdp2tp1-dynamicbatch.sh
 
 # Functional 32b test
-tests/test_suites/llm/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.v2.sh
+tests/test_suites/llm/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.v3.sh
 
 # Megatron
-tests/test_suites/llm/sft-llama3.1-8b-instruct-1n8g-megatron.sh
+tests/test_suites/llm/sft-llama3.1-8b-1n8g-megatron.sh
+# sequence packing
+tests/test_suites/llm/sft-llama3.1-8b-1n8g-megatron-seqpack.sh
+# validate TP/DP
+tests/test_suites/llm/sft-qwen2.5-math7b-2n8g-megatron.sh
 
 #######
 # DPO #
@@ -41,3 +86,18 @@ tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.sh
 
 # Short megatron
 tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n8g-megatrontp2pp2-quick.sh
+
+# Long dtensor
+# Disabling until transformers upgraded to >=4.56
+# Issue with details: https://github.com/NVIDIA-NeMo/RL/issues/1343
+# tests/test_suites/llm/dpo-mistral-nemo-instruct-2407-1n8g-fsdp2tp8-actckpt-long.sh
+
+################
+# Distillation #
+################
+
+# Distillation tests
+tests/test_suites/llm/distillation-qwen3-32b-to-1.7b-base-1n8g-fsdp2tp1.v1.sh
+
+# Short megatron
+tests/test_suites/llm/distillation-qwen3-32b-to-1.7b-base-1n8g-megatron-tp2pp2cp2-pack.sh
diff --git a/tests/test_suites/nightly_performance.txt b/tests/test_suites/nightly_performance.txt
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/tests/test_suites/performance.txt b/tests/test_suites/performance.txt
new file mode 100644
index 0000000000..728159945d
--- /dev/null
+++ b/tests/test_suites/performance.txt
@@ -0,0 +1,15 @@
+########
+# GRPO #
+########
+
+tests/test_suites/llm/performance/grpo-llama3.1-8b-instruct-2n8g.sh
+tests/test_suites/llm/performance/grpo-qwen3-30ba3b-4n8g.sh
+tests/test_suites/llm/performance/grpo-deepseek-v3-32n8g.sh
+tests/test_suites/llm/performance/grpo-qwen3-32b-4n8g.sh
+tests/test_suites/llm/performance/grpo-qwen3-235b-16n8g.sh
+
+tests/test_suites/llm/performance/grpo-deepseek-v3-64n8g-async-1off.sh
+tests/test_suites/llm/performance/grpo-llama3.1-8b-instruct-2n8g-async-1off.sh
+tests/test_suites/llm/performance/grpo-qwen3-32b-8n8g-async-1off.sh
+tests/test_suites/llm/performance/grpo-qwen3-235b-32n8g-async-1off.sh
+tests/test_suites/llm/performance/grpo-qwen3-30ba3b-4n8g-async-1off.sh
\ No newline at end of file
diff --git a/tests/test_suites/release.txt b/tests/test_suites/release.txt
index e339ef0bc1..43de424107 100644
--- a/tests/test_suites/release.txt
+++ b/tests/test_suites/release.txt
@@ -2,26 +2,54 @@
 # GRPO #
 ########
 
+# Megatron (Qwen/Qwen2.5-7B-Instruct)
+tests/test_suites/llm/grpo-qwen2.5-7b-instruct-4n8g-megatron.sh
+
 # Long 8b run
 tests/test_suites/llm/grpo-llama3.1-8b-instruct-4n8g-fsdp2tp1-long.v3.sh
 
 # Long 32b run
-tests/test_suites/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8sp-actckpt-long.v3.sh
+tests/test_suites/llm/grpo-qwen2.5-32b-32n8g-fsdp2tp8-actckpt-long.v3.sh
 
 # Long Gemma3 27b run
-tests/test_suites/llm/grpo-gemma3-27b-it-16n8g-fsdp2tp8sp-actckpt-long.sh
+tests/test_suites/llm/grpo-gemma3-27b-it-8n8g-fsdp2tp8-actckpt-long.sh
+
+# Long Megatron Qwen3 30B-A3B run
+tests/test_suites/llm/grpo-qwen3-30ba3b-8n8g-megatron.sh
+
+# DAPO 4h run
+tests/test_suites/llm/dapo-qwen2.5-7b.sh
+
+# Deepseek-V3 on DAPO dataset
+tests/test_suites/llm/grpo-dapomath17k-dsv3-megatron.sh
 
 #######
 # SFT #
 #######
 
 # Long 8b convergence
-tests/test_suites/llm/sft-llama3.1-8b-instruct-1n8g-fsdp2tp1-long.v2.sh
+tests/test_suites/llm/sft-llama3.1-8b-1n8g-fsdp2tp1-long.sh
+
+# 300 step 70b convergence
+tests/test_suites/llm/sft-llama3.1-70b-8n8g-tp4pp2-long-megatron.sh
 
 #######
 # DPO #
 #######
 
 # Long 8b convergence
-tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp1.v2.sh
-tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n8g-megatron.sh
+tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp4.sh
+tests/test_suites/llm/dpo-llama3.1-8b-instruct-4n8g-megatron.v2.sh
+tests/test_suites/llm/dpo-llama3.1-8b-tulu3-1n8g-fsdp2tp1.sh
+
+################
+# Distillation #
+################
+
+# Long 4b convergence
+tests/test_suites/llm/distillation-qwen3-32b-to-4b-base-2n8g-fsdp2tp2-long.v1.sh
+ 
+# 20 step functional tests on dynamic batching, non-colocated and seqence packing features
+tests/test_suites/llm/distillation-qwen3-32b-to-4b-base-1n8g-fsdp2tp2-dynamicbatch.v1.sh
+tests/test_suites/llm/distillation-qwen3-32b-to-4b-base-2n8g-fsdp2tp8-noncolocated.v1.sh
+tests/test_suites/llm/distillation-qwen3-32b-to-4b-base-2n8g-fsdp2tp2-seqpack.v1.sh
diff --git a/tests/test_suites/release_performance.txt b/tests/test_suites/release_performance.txt
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/tests/test_suites/vlm/common.env b/tests/test_suites/vlm/common.env
new file mode 120000
index 0000000000..ec5d3dc65c
--- /dev/null
+++ b/tests/test_suites/vlm/common.env
@@ -0,0 +1 @@
+../llm/common.env
\ No newline at end of file
diff --git a/tests/test_suites/vlm/vlm_grpo-qwen2.5-vl-3b-instruct-clevr-1n2g-dtensor2tp1.v1.sh b/tests/test_suites/vlm/vlm_grpo-qwen2.5-vl-3b-instruct-clevr-1n2g-dtensor2tp1.v1.sh
new file mode 100755
index 0000000000..1a5bc2deea
--- /dev/null
+++ b/tests/test_suites/vlm/vlm_grpo-qwen2.5-vl-3b-instruct-clevr-1n2g-dtensor2tp1.v1.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=1
+STEPS_PER_RUN=200
+MAX_STEPS=200
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=180
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_vlm_grpo.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'data["train/reward"]["200"] > 0.9'
+fi
+
diff --git a/tests/test_suites/vlm/vlm_grpo-qwen2.5-vl-3b-instruct-clevr-1n2g-megatrontp2.v1.sh b/tests/test_suites/vlm/vlm_grpo-qwen2.5-vl-3b-instruct-clevr-1n2g-megatrontp2.v1.sh
new file mode 100755
index 0000000000..b3c6764f65
--- /dev/null
+++ b/tests/test_suites/vlm/vlm_grpo-qwen2.5-vl-3b-instruct-clevr-1n2g-megatrontp2.v1.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=1
+STEPS_PER_RUN=200
+MAX_STEPS=200
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=180
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_vlm_grpo.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'data["train/loss"]["200"] < 0.1' \
+        'data["train/reward"]["200"] > 0.9'
+fi
+
diff --git a/tests/test_suites/vlm/vlm_grpo-smolvlm2-2.2b-instruct-clevr-1n2g-dtensor2tp1.v2.sh.disabled b/tests/test_suites/vlm/vlm_grpo-smolvlm2-2.2b-instruct-clevr-1n2g-dtensor2tp1.v2.sh.disabled
new file mode 100755
index 0000000000..9d8e4a555e
--- /dev/null
+++ b/tests/test_suites/vlm/vlm_grpo-smolvlm2-2.2b-instruct-clevr-1n2g-dtensor2tp1.v2.sh.disabled
@@ -0,0 +1,40 @@
+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
+source $SCRIPT_DIR/common.env
+
+# ===== BEGIN CONFIG =====
+NUM_NODES=1
+STEPS_PER_RUN=130
+MAX_STEPS=130
+NUM_RUNS=$(( (MAX_STEPS + STEPS_PER_RUN - 1) / STEPS_PER_RUN ))  # Round up
+NUM_MINUTES=180
+# ===== END CONFIG =====
+
+exit_if_max_steps_reached
+
+# Run the experiment
+cd $PROJECT_ROOT
+uv run examples/run_vlm_grpo.py \
+    --config $CONFIG_PATH \
+    grpo.max_num_steps=$MAX_STEPS \
+    logger.log_dir=$LOG_DIR \
+    logger.wandb_enabled=True \
+    logger.wandb.project=nemo-rl \
+    logger.wandb.name=$EXP_NAME \
+    logger.monitor_gpus=True \
+    logger.tensorboard_enabled=True \
+    checkpointing.enabled=True \
+    checkpointing.checkpoint_dir=$CKPT_DIR \
+    $@ \
+    2>&1 | tee $RUN_LOG
+
+# Convert tensorboard logs to json
+uv run tests/json_dump_tb_logs.py $LOG_DIR --output_path $JSON_METRICS
+
+# Only run metrics if the target step is reached
+if [[ $(jq 'to_entries | .[] | select(.key == "train/loss") | .value | keys | map(tonumber) | max' $JSON_METRICS) -ge $MAX_STEPS ]]; then
+    uv run tests/check_metrics.py $JSON_METRICS \
+        'data["train/loss"]["130"] < 0.1' \
+        'mean(data["train/reward"], -6, -1) > 0.6'  # less performant than qwen
+fi
+
diff --git a/tests/unit/L0_Unit_Tests_Generation.sh b/tests/unit/L0_Unit_Tests_Generation.sh
new file mode 100644
index 0000000000..e7b7a6e2ca
--- /dev/null
+++ b/tests/unit/L0_Unit_Tests_Generation.sh
@@ -0,0 +1,47 @@
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/bin/bash
+set -xeuo pipefail # Exit immediately if a command exits with a non-zero status
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+PROJECT_ROOT=$(realpath ${SCRIPT_DIR}/../..)
+
+cd ${PROJECT_ROOT}
+uv run tests/unit/prepare_unit_test_assets.py
+uv run --no-sync bash -x ./tests/run_unit.sh unit/models/generation/ --cov=nemo_rl --cov-report=term-missing --cov-report=json --hf-gated
+
+# Check and run mcore tests
+exit_code=$(uv run --extra mcore pytest tests/unit/models/generation/ --collect-only --hf-gated --mcore-only -q >/dev/null 2>&1; echo $?)
+if [[ $exit_code -eq 5 ]]; then
+    echo "No mcore tests to run"
+else
+    uv run --extra mcore bash -x ./tests/run_unit.sh unit/models/generation/ --cov=nemo_rl --cov-append --cov-report=term-missing --cov-report=json --hf-gated --mcore-only
+fi
+
+# Check and run automodel tests
+exit_code=$(uv run --extra automodel pytest tests/unit/models/generation/ --collect-only --hf-gated --automodel-only -q >/dev/null 2>&1; echo $?)
+if [[ $exit_code -eq 5 ]]; then
+    echo "No automodel tests to run"
+else
+    uv run --extra automodel bash -x ./tests/run_unit.sh unit/models/generation/ --cov=nemo_rl --cov-append --cov-report=term-missing --cov-report=json --hf-gated --automodel-only
+fi
+
+# Check and run vllm tests
+exit_code=$(uv run --extra vllm pytest tests/unit/models/generation/ --collect-only --hf-gated --vllm-only -q >/dev/null 2>&1; echo $?)
+if [[ $exit_code -eq 5 ]]; then
+    echo "No vllm tests to run"
+else
+    uv run --extra vllm bash -x ./tests/run_unit.sh unit/models/generation/ --cov=nemo_rl --cov-append --cov-report=term-missing --cov-report=json --hf-gated --vllm-only
+fi
diff --git a/tests/unit/L0_Unit_Tests_Other.sh b/tests/unit/L0_Unit_Tests_Other.sh
new file mode 100644
index 0000000000..3908183305
--- /dev/null
+++ b/tests/unit/L0_Unit_Tests_Other.sh
@@ -0,0 +1,47 @@
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/bin/bash
+set -xeuo pipefail # Exit immediately if a command exits with a non-zero status
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+PROJECT_ROOT=$(realpath ${SCRIPT_DIR}/../..)
+
+cd ${PROJECT_ROOT}
+uv run tests/unit/prepare_unit_test_assets.py
+uv run --no-sync bash -x ./tests/run_unit.sh unit/ --ignore=unit/models/generation/ --ignore=unit/models/policy/ --cov=nemo_rl --cov-report=term-missing --cov-report=json --hf-gated
+
+# Check and run mcore tests
+exit_code=$(uv run --extra mcore pytest tests/unit/ --ignore=tests/unit/models/generation/ --ignore=tests/unit/models/policy/ --collect-only --hf-gated --mcore-only -q >/dev/null 2>&1; echo $?)
+if [[ $exit_code -eq 5 ]]; then
+    echo "No mcore tests to run"
+else
+    uv run --extra mcore bash -x ./tests/run_unit.sh unit/ --ignore=unit/models/generation/ --ignore=unit/models/policy/ --cov=nemo_rl --cov-append --cov-report=term-missing --cov-report=json --hf-gated --mcore-only
+fi
+
+# Check and run automodel tests
+exit_code=$(uv run --extra automodel pytest tests/unit/ --ignore=tests/unit/models/generation/ --ignore=tests/unit/models/policy/ --collect-only --hf-gated --automodel-only -q >/dev/null 2>&1; echo $?)
+if [[ $exit_code -eq 5 ]]; then
+    echo "No automodel tests to run"
+else
+    uv run --extra automodel bash -x ./tests/run_unit.sh unit/ --ignore=unit/models/generation/ --ignore=unit/models/policy/ --cov=nemo_rl --cov-append --cov-report=term-missing --cov-report=json --hf-gated --automodel-only
+fi
+
+# Check and run vllm tests
+exit_code=$(uv run --extra vllm pytest tests/unit/ --ignore=tests/unit/models/generation/ --ignore=tests/unit/models/policy/ --collect-only --hf-gated --vllm-only -q >/dev/null 2>&1; echo $?)
+if [[ $exit_code -eq 5 ]]; then
+    echo "No vllm tests to run"
+else
+    uv run --extra vllm bash -x ./tests/run_unit.sh unit/ --ignore=unit/models/generation/ --ignore=unit/models/policy/ --cov=nemo_rl --cov-append --cov-report=term-missing --cov-report=json --hf-gated --vllm-only
+fi
diff --git a/tests/unit/L0_Unit_Tests_Policy.sh b/tests/unit/L0_Unit_Tests_Policy.sh
new file mode 100644
index 0000000000..bae1178c72
--- /dev/null
+++ b/tests/unit/L0_Unit_Tests_Policy.sh
@@ -0,0 +1,47 @@
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/bin/bash
+set -xeuo pipefail # Exit immediately if a command exits with a non-zero status
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+PROJECT_ROOT=$(realpath ${SCRIPT_DIR}/../..)
+
+cd ${PROJECT_ROOT}
+uv run tests/unit/prepare_unit_test_assets.py
+uv run --no-sync bash -x ./tests/run_unit.sh unit/models/policy/ --cov=nemo_rl --cov-report=term-missing --cov-report=json --hf-gated
+
+# Check and run mcore tests
+exit_code=$(uv run --extra mcore pytest tests/unit/models/policy/ --collect-only --hf-gated --mcore-only -q >/dev/null 2>&1; echo $?)
+if [[ $exit_code -eq 5 ]]; then
+    echo "No mcore tests to run"
+else
+    uv run --extra mcore bash -x ./tests/run_unit.sh unit/models/policy/ --cov=nemo_rl --cov-append --cov-report=term-missing --cov-report=json --hf-gated --mcore-only
+fi
+
+# Check and run automodel tests
+exit_code=$(uv run --extra automodel pytest tests/unit/models/policy/ --collect-only --hf-gated --automodel-only -q >/dev/null 2>&1; echo $?)
+if [[ $exit_code -eq 5 ]]; then
+    echo "No automodel tests to run"
+else
+    uv run --extra automodel bash -x ./tests/run_unit.sh unit/models/policy/ --cov=nemo_rl --cov-append --cov-report=term-missing --cov-report=json --hf-gated --automodel-only
+fi
+
+# Check and run vllm tests
+exit_code=$(uv run --extra vllm pytest tests/unit/models/policy/ --collect-only --hf-gated --vllm-only -q >/dev/null 2>&1; echo $?)
+if [[ $exit_code -eq 5 ]]; then
+    echo "No vllm tests to run"
+else
+    uv run --extra vllm bash -x ./tests/run_unit.sh unit/models/policy/ --cov=nemo_rl --cov-append --cov-report=term-missing --cov-report=json --hf-gated --vllm-only
+fi
diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py
index 341a77c5bc..31c1220368 100644
--- a/tests/unit/__init__.py
+++ b/tests/unit/__init__.py
@@ -11,3 +11,15 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+
+"""
+Auto-loading remote_select plugin here:
+- Ensures the plugin is discovered without extra CLI flags or global config.
+- Loads early in pytest’s startup so ``pytest_load_initial_conftests`` can
+  rewrite args before other plugins (e.g., testmon) prune collection.
+- Scopes behavior to unit tests only (does not affect functional tests).
+- Avoids a top-level ``conftest.py`` that would apply repo-wide.
+"""
+
+pytest_plugins = ["tests.unit._plugins.remote_select"]
diff --git a/tests/unit/_plugins/remote_select.py b/tests/unit/_plugins/remote_select.py
new file mode 100644
index 0000000000..a3f21c136a
--- /dev/null
+++ b/tests/unit/_plugins/remote_select.py
@@ -0,0 +1,284 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Remote-aware test selection helper for pytest-testmon (Python 3.12).
+
+Purpose
+-------
+When running unit tests with ``--testmon``, pytest-testmon tracks in-process
+Python execution and reruns only affected tests. Code executed inside
+``@ray.remote`` actors runs out-of-process, so testmon alone cannot see those
+dependencies. This lightweight test-only plugin augments selection so that
+edits inside remote actors can still retrigger the relevant tests.
+
+How it works
+------------
+- Builds a static mapping from each unit test (nodeid) to the transitive set
+  of ``nemo_rl`` Python files that the test module imports.
+- Stores the mapping in ``.nrl_remote_map.json`` and tracks mtimes in
+  ``.nrl_remote_state.json`` at repo root.
+- When ``--testmon`` is present:
+  - On first run, seeds the state file and does not change selection.
+  - On subsequent runs, compares mtimes; if tracked files changed, it replaces
+    the pytest positional args with the affected nodeids so those tests run.
+- Honors ``-k``. If a ``-k`` filter is provided, the plugin does not alter
+  selection and lets user intent win.
+
+Limitations
+-----------
+- Static import analysis only; dynamic imports/loading are not discovered.
+- Only Python files are considered (YAML/JSON/shell edits are not tracked).
+- The mapping is conservative; if a test exercises code not visible via
+  imports, run it once explicitly to seed the map.
+
+Activation
+----------
+This plugin auto-loads via ``tests/unit/__init__.py`` and only engages when
+``--testmon`` is present.
+
+Artifacts
+---------
+Two JSON files are written to the repository root:
+
+1) ``.nrl_remote_map.json``
+   - Maps test nodeids to the transitive set of project files (under ``nemo_rl/``)
+     imported by that test module.
+   - Example (paths abbreviated for readability):
+     {
+       "tests/unit/distributed/test_worker_groups.py::test_configure_worker_interaction": [
+         "/workspaces/nemo-rl/nemo_rl/distributed/worker_groups.py",
+         "/workspaces/nemo-rl/nemo_rl/distributed/virtual_cluster.py"
+       ],
+       "tests/unit/models/policy/test_dtensor_worker.py::test_lm_policy_init[True]": [
+         "/workspaces/nemo-rl/nemo_rl/models/policy/dtensor_policy_worker.py"
+       ]
+     }
+
+2) ``.nrl_remote_state.json``
+   - Stores the last-seen modification time (mtime) per tracked file to detect changes.
+   - Example:
+     {
+       "/workspaces/nemo-rl/nemo_rl/distributed/worker_groups.py": 1725369123.456,
+       "/workspaces/nemo-rl/nemo_rl/models/policy/dtensor_policy_worker.py": 1725369187.012
+     }
+"""
+
+import ast
+import json
+import os
+import sys
+from pathlib import Path
+from typing import Iterable
+
+REPO_ROOT: Path = Path(__file__).resolve().parents[3]
+MAP_PATH: Path = REPO_ROOT / ".nrl_remote_map.json"
+STATE_PATH: Path = REPO_ROOT / ".nrl_remote_state.json"
+PROJECT_PREFIXES: tuple[str, ...] = ("nemo_rl",)
+
+
+def _read_text(path: Path) -> str:
+    try:
+        return path.read_text()
+    except Exception:
+        return ""
+
+
+def _parse_imported_modules(py_path: Path) -> set[str]:
+    src = _read_text(py_path)
+    try:
+        tree = ast.parse(src)
+    except Exception:
+        return set()
+    modules: set[str] = set()
+    for node in ast.walk(tree):
+        if isinstance(node, ast.Import):
+            for alias in node.names:
+                modules.add(alias.name)
+        elif isinstance(node, ast.ImportFrom):
+            if node.module:
+                modules.add(node.module)
+    return {m for m in modules if m.startswith(PROJECT_PREFIXES)}
+
+
+def _module_to_file(module_name: str) -> Path | None:
+    mod_path = Path(module_name.replace(".", "/") + ".py")
+    abs_path = (REPO_ROOT / mod_path).resolve()
+    return abs_path if abs_path.exists() else None
+
+
+def _discover_test_nodeids_and_files() -> dict[str, set[str]]:
+    mapping: dict[str, set[str]] = {}
+    tests_root = REPO_ROOT / "tests" / "unit"
+    for test_path in tests_root.rglob("test_*.py"):
+        rel = test_path.relative_to(REPO_ROOT)
+        mod_node_prefix = str(rel)
+        modules = _parse_imported_modules(test_path)
+        files: set[str] = set()
+        for m in modules:
+            f = _module_to_file(m)
+            if f:
+                files.add(str(f))
+        if not files:
+            continue
+        src = _read_text(test_path)
+        try:
+            tree = ast.parse(src)
+        except Exception:
+            continue
+        for node in tree.body:
+            if isinstance(node, ast.FunctionDef) and node.name.startswith("test_"):
+                nodeid = f"{mod_node_prefix}::{node.name}"
+                mapping[nodeid] = set(files)
+            elif isinstance(node, ast.ClassDef) and node.name.startswith("Test"):
+                for sub in node.body:
+                    if isinstance(sub, ast.FunctionDef) and sub.name.startswith(
+                        "test_"
+                    ):
+                        nodeid = f"{mod_node_prefix}::{node.name}::{sub.name}"
+                        mapping[nodeid] = set(files)
+    return mapping
+
+
+def _load_mapping() -> dict[str, set[str]]:
+    if not MAP_PATH.exists():
+        return {}
+    try:
+        data = json.loads(MAP_PATH.read_text())
+        return {k: set(v) for k, v in data.items()}
+    except Exception:
+        return {}
+
+
+def _save_mapping(mapping: dict[str, set[str]]) -> None:
+    MAP_PATH.write_text(
+        json.dumps({k: sorted(v) for k, v in mapping.items()}, indent=2)
+    )
+
+
+def _detect_changed(files: Iterable[str]) -> set[str]:
+    prev: dict[str, float] = {}
+    if STATE_PATH.exists():
+        try:
+            prev = json.loads(STATE_PATH.read_text())
+        except Exception:
+            prev = {}
+    changed: set[str] = set()
+    state: dict[str, float] = {}
+    for f in files:
+        try:
+            mtime = os.path.getmtime(f)
+            state[f] = mtime
+            if prev.get(f, 0) < mtime:
+                changed.add(f)
+        except FileNotFoundError:
+            changed.add(f)
+    if files:
+        STATE_PATH.write_text(json.dumps(state, indent=2))
+    return changed
+
+
+def _has_k_filter(args: list[str]) -> bool:
+    """Return True if -k/--keyword filter is present in CLI args."""
+    if "-k" in args:
+        return True
+    for i, a in enumerate(args):
+        if a.startswith("-k") or a.startswith("--keyword"):
+            return True
+        if a in {"-k", "--keyword"} and i + 1 < len(args):
+            return True
+    return False
+
+
+def pytest_load_initial_conftests(args, early_config, parser):
+    # Only augment when user asked for --testmon and no -k filter is provided
+    if "--testmon" not in args or _has_k_filter(args):
+        return
+
+    affected = _select_affected(None)
+    # None = first run (seed only), empty set = no changes; leave args unchanged
+    if affected is None or affected == set():
+        return
+
+    # Remove --testmon and narrow args to affected nodeids (execute only those tests)
+    while "--testmon" in args:
+        args.remove("--testmon")
+    if not any(not a.startswith("-") for a in args):
+        args[:] = sorted(affected)
+    else:
+        args.extend(sorted(affected))
+
+
+def _effective_mapping() -> dict[str, set[str]]:
+    mapping = _load_mapping()
+    if not mapping:
+        mapping = _discover_test_nodeids_and_files()
+        if mapping:
+            _save_mapping(mapping)
+    return mapping
+
+
+def _select_affected(config) -> set[str] | None:
+    mapping = _effective_mapping()
+    if not mapping:
+        return None
+    file_set: set[str] = set()
+    for files in mapping.values():
+        file_set.update(files)
+    if not file_set:
+        return None
+    if not STATE_PATH.exists():
+        _ = _detect_changed(file_set)
+        return None
+    changed = _detect_changed(file_set)
+    if not changed:
+        return set()
+    affected: set[str] = set()
+    for nodeid, files in mapping.items():
+        if any(f in changed for f in files):
+            affected.add(nodeid)
+    return affected
+
+
+def pytest_configure(config) -> None:
+    # Late-stage fallback in case initial hook didn't capture
+    tm_on = config.pluginmanager.hasplugin("testmon") or "--testmon" in sys.argv
+    if not tm_on:
+        return
+    # Honor -k/--keyword filters
+    if _has_k_filter(sys.argv):
+        return
+    affected = _select_affected(config)
+    if affected is None or affected == set():
+        return
+    try:
+        config.args[:] = sorted(affected)
+    except Exception:
+        pass
+
+
+def pytest_collection_modifyitems(config, items):
+    tm_on = config.pluginmanager.hasplugin("testmon") or "--testmon" in sys.argv
+    if not tm_on:
+        return
+    # Honor -k/--keyword filters
+    if _has_k_filter(sys.argv):
+        return
+    affected = _select_affected(config)
+    if affected is None:
+        return
+    if affected == set():
+        # No changes → deselect all for speed
+        items[:] = []
+        return
+    items[:] = [it for it in items if it.nodeid in affected]
diff --git a/tests/unit/algorithms/test_async_utils.py b/tests/unit/algorithms/test_async_utils.py
new file mode 100644
index 0000000000..c8eb1639bd
--- /dev/null
+++ b/tests/unit/algorithms/test_async_utils.py
@@ -0,0 +1,700 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import tempfile
+import threading
+import unittest.mock as mock
+
+import pytest
+import ray
+import torch
+
+# Set up Ray temp directory before any Ray operations
+# Try multiple approaches to ensure Ray uses a writable directory
+_temp_dir = tempfile.mkdtemp(prefix="ray_async_test_")
+os.environ["RAY_TEMP_DIR"] = _temp_dir
+os.environ["RAY_TMPDIR"] = _temp_dir  # Alternative env var
+os.environ["TMPDIR"] = _temp_dir  # System temp dir
+
+from nemo_rl.algorithms.async_utils import AsyncTrajectoryCollector, ReplayBuffer
+from nemo_rl.algorithms.grpo import MasterConfig
+from nemo_rl.data.interfaces import DatumSpec, LLMMessageLogType
+from nemo_rl.distributed.batched_data_dict import BatchedDataDict
+from nemo_rl.environments.interfaces import (
+    EnvironmentInterface,
+    EnvironmentReturn,
+)
+
+
+@ray.remote(num_cpus=0)
+class MockEnvironment(EnvironmentInterface):
+    """Mock environment for testing async utilities."""
+
+    def __init__(self, rewards: list[float]):
+        self.rewards = rewards
+        self._calls = 0
+
+    def step(
+        self, messages: list[LLMMessageLogType], env_info: list[dict]
+    ) -> EnvironmentReturn:
+        self._calls += 1
+        return (
+            [{"role": "environment", "content": "observation"}] * len(messages),
+            [{}] * len(messages),
+            [[]] * len(messages),
+            self.rewards,
+            [True] * len(messages),
+            [None] * len(messages),
+        )
+
+    def get_calls(self):
+        return self._calls
+
+    def reset_calls(self):
+        self._calls = 0
+        return True
+
+    def global_post_process_and_metrics(
+        self, batch: BatchedDataDict
+    ) -> tuple[BatchedDataDict, dict]:
+        return batch, {}
+
+
+class MockGenerationInterface:
+    """Mock generation interface for testing."""
+
+    def __init__(self):
+        self.prepare_calls = 0
+        self.finish_calls = 0
+
+    def prepare_for_generation(self, **kwargs):
+        self.prepare_calls += 1
+
+    def finish_generation(self):
+        self.finish_calls += 1
+
+
+class TestReplayBuffer:
+    """Test cases for ReplayBuffer."""
+
+    def test_replay_buffer_initialization(self):
+        """Test ReplayBuffer initialization."""
+        buffer = ReplayBuffer.remote(max_size=10)
+        size = ray.get(buffer.size.remote())
+        assert size == 0
+
+        debug_info = ray.get(buffer.get_debug_info.remote())
+        assert debug_info["total_trajectories"] == 0
+        assert debug_info["max_size"] == 10
+        assert debug_info["trajectory_versions"] == []
+        assert debug_info["target_weight_versions"] == []
+
+        ray.kill(buffer)
+
+    def test_replay_buffer_push_and_size(self):
+        """Test pushing trajectories to buffer."""
+        buffer = ReplayBuffer.remote(max_size=3)
+
+        # Create mock trajectories
+        trajectory1 = {"batch": {"data": "test1"}, "rollout_metrics": {"reward": 1.0}}
+        trajectory2 = {"batch": {"data": "test2"}, "rollout_metrics": {"reward": 2.0}}
+
+        # Push trajectories
+        status1 = ray.get(
+            buffer.push_with_wait_signal.remote(
+                trajectory1, weight_version=0, target_weight_version=1
+            )
+        )
+        assert status1 == "success"
+
+        status2 = ray.get(
+            buffer.push_with_wait_signal.remote(
+                trajectory2, weight_version=1, target_weight_version=2
+            )
+        )
+        assert status2 == "success"
+
+        # Check size
+        size = ray.get(buffer.size.remote())
+        assert size == 2
+
+        # Check debug info
+        debug_info = ray.get(buffer.get_debug_info.remote())
+        assert debug_info["total_trajectories"] == 2
+        assert debug_info["trajectory_versions"] == [0, 1]
+        assert debug_info["target_weight_versions"] == [1, 2]
+
+        ray.kill(buffer)
+
+    def test_replay_buffer_max_size_limit(self):
+        """Test that buffer respects max size limit."""
+        buffer = ReplayBuffer.remote(max_size=2)
+
+        # Fill buffer to capacity
+        trajectory1 = {"batch": {"data": "test1"}, "rollout_metrics": {"reward": 1.0}}
+        trajectory2 = {"batch": {"data": "test2"}, "rollout_metrics": {"reward": 2.0}}
+        trajectory3 = {"batch": {"data": "test3"}, "rollout_metrics": {"reward": 3.0}}
+
+        # Push first two trajectories
+        status1 = ray.get(
+            buffer.push_with_wait_signal.remote(
+                trajectory1, weight_version=0, target_weight_version=1
+            )
+        )
+        status2 = ray.get(
+            buffer.push_with_wait_signal.remote(
+                trajectory2, weight_version=1, target_weight_version=2
+            )
+        )
+        assert status1 == "success"
+        assert status2 == "success"
+
+        # Try to push third trajectory (should return "full")
+        status3 = ray.get(
+            buffer.push_with_wait_signal.remote(
+                trajectory3, weight_version=2, target_weight_version=3
+            )
+        )
+        assert status3 == "full"
+
+        # Size should still be 2
+        size = ray.get(buffer.size.remote())
+        assert size == 2
+
+        ray.kill(buffer)
+
+    def test_replay_buffer_sampling_basic(self):
+        """Test basic trajectory sampling."""
+        buffer = ReplayBuffer.remote(max_size=10)
+
+        # Push trajectories with different weight versions
+        trajectories = []
+        for i in range(3):
+            trajectory = {
+                "batch": {"data": f"test{i}"},
+                "rollout_metrics": {"reward": float(i)},
+            }
+            trajectories.append(trajectory)
+            ray.get(
+                buffer.push_with_wait_signal.remote(
+                    trajectory, weight_version=i, target_weight_version=i + 1
+                )
+            )
+
+        # Sample trajectories intended for current step 2
+        sample_result = ray.get(
+            buffer.sample.remote(
+                num_prompt_groups=1,
+                current_weight_version=2,
+                max_age_steps=2,
+            )
+        )
+
+        assert sample_result is not None
+        assert len(sample_result["trajectories"]) == 1
+        assert "avg_trajectory_age" in sample_result
+
+        # The trajectory should be intended for step 2 (target_weight_version=2)
+        # But we pushed with target_weight_version=i+1, so trajectory at i=1 has target=2
+        sampled_trajectory = sample_result["trajectories"][0]
+        assert sampled_trajectory["batch"]["data"] == "test1"
+
+        ray.kill(buffer)
+
+    def test_replay_buffer_sampling_insufficient_trajectories(self):
+        """Test sampling when insufficient trajectories are available."""
+        buffer = ReplayBuffer.remote(max_size=10)
+
+        # Push only one trajectory
+        trajectory = {"batch": {"data": "test"}, "rollout_metrics": {"reward": 1.0}}
+        ray.get(
+            buffer.push_with_wait_signal.remote(
+                trajectory, weight_version=0, target_weight_version=1
+            )
+        )
+
+        # Try to sample more trajectories than available for current step
+        sample_result = ray.get(
+            buffer.sample.remote(
+                num_prompt_groups=2,  # Request 2 but only 1 available
+                current_weight_version=1,
+                max_age_steps=1,
+            )
+        )
+
+        assert sample_result is None  # Should return None when insufficient
+
+        ray.kill(buffer)
+
+    def test_replay_buffer_age_filtering(self):
+        """Test that old trajectories are filtered out."""
+        buffer = ReplayBuffer.remote(max_size=10)
+
+        # Push trajectories with different ages
+        old_trajectory = {"batch": {"data": "old"}, "rollout_metrics": {"reward": 1.0}}
+        recent_trajectory = {
+            "batch": {"data": "recent"},
+            "rollout_metrics": {"reward": 2.0},
+        }
+
+        ray.get(
+            buffer.push_with_wait_signal.remote(
+                old_trajectory, weight_version=0, target_weight_version=1
+            )
+        )
+        ray.get(
+            buffer.push_with_wait_signal.remote(
+                recent_trajectory, weight_version=2, target_weight_version=3
+            )
+        )
+
+        # Sample with current_weight_version=3 and max_age_steps=1
+        # This should filter out the trajectory with weight_version=0 (too old)
+        with pytest.raises(
+            ValueError, match="Found .* trajectories older than min_valid_version"
+        ):
+            ray.get(
+                buffer.sample.remote(
+                    num_prompt_groups=1,
+                    current_weight_version=3,
+                    max_age_steps=1,
+                )
+            )
+
+        ray.kill(buffer)
+
+    def test_replay_buffer_target_weight_matching(self):
+        """Test that sampling only returns trajectories intended for current step."""
+        buffer = ReplayBuffer.remote(max_size=10)
+
+        # Push trajectories intended for different target steps
+        trajectory1 = {
+            "batch": {"data": "for_step_1"},
+            "rollout_metrics": {"reward": 1.0},
+        }
+        trajectory2 = {
+            "batch": {"data": "for_step_2"},
+            "rollout_metrics": {"reward": 2.0},
+        }
+
+        ray.get(
+            buffer.push_with_wait_signal.remote(
+                trajectory1, weight_version=0, target_weight_version=1
+            )
+        )
+        ray.get(
+            buffer.push_with_wait_signal.remote(
+                trajectory2, weight_version=1, target_weight_version=2
+            )
+        )
+
+        # Sample for current step 1 - should only get trajectory intended for step 1
+        sample_result = ray.get(
+            buffer.sample.remote(
+                num_prompt_groups=1,
+                current_weight_version=1,
+                max_age_steps=2,
+            )
+        )
+
+        assert sample_result is not None
+        assert len(sample_result["trajectories"]) == 1
+        assert sample_result["trajectories"][0]["batch"]["data"] == "for_step_1"
+
+        ray.kill(buffer)
+
+    def test_replay_buffer_get_existing_target_weights(self):
+        """Test getting existing target weight versions."""
+        buffer = ReplayBuffer.remote(max_size=10)
+
+        # Initially empty
+        existing_weights = ray.get(buffer.get_existing_target_weights.remote())
+        assert existing_weights == set()
+
+        # Push trajectories with different target weights
+        trajectory1 = {"batch": {"data": "test1"}, "rollout_metrics": {"reward": 1.0}}
+        trajectory2 = {"batch": {"data": "test2"}, "rollout_metrics": {"reward": 2.0}}
+
+        ray.get(
+            buffer.push_with_wait_signal.remote(
+                trajectory1, weight_version=0, target_weight_version=1
+            )
+        )
+        ray.get(
+            buffer.push_with_wait_signal.remote(
+                trajectory2, weight_version=1, target_weight_version=3
+            )
+        )
+
+        existing_weights = ray.get(buffer.get_existing_target_weights.remote())
+        assert existing_weights == {1, 3}
+
+        ray.kill(buffer)
+
+    def test_replay_buffer_clear(self):
+        """Test clearing the buffer."""
+        buffer = ReplayBuffer.remote(max_size=10)
+
+        # Push some trajectories
+        trajectory = {"batch": {"data": "test"}, "rollout_metrics": {"reward": 1.0}}
+        ray.get(
+            buffer.push_with_wait_signal.remote(
+                trajectory, weight_version=0, target_weight_version=1
+            )
+        )
+
+        # Verify buffer has content
+        size = ray.get(buffer.size.remote())
+        assert size == 1
+
+        # Clear buffer
+        ray.get(buffer.clear.remote())
+
+        # Verify buffer is empty
+        size = ray.get(buffer.size.remote())
+        assert size == 0
+
+        debug_info = ray.get(buffer.get_debug_info.remote())
+        assert debug_info["total_trajectories"] == 0
+        assert debug_info["trajectory_versions"] == []
+        assert debug_info["target_weight_versions"] == []
+
+        ray.kill(buffer)
+
+
+class TestAsyncTrajectoryCollector:
+    """Test cases for AsyncTrajectoryCollector."""
+
+    def create_mock_config(self) -> MasterConfig:
+        """Create a mock master config for testing."""
+        return {
+            "grpo": {
+                "num_prompts_per_step": 2,
+                "num_generations_per_prompt": 3,
+                "max_rollout_turns": 1,
+                "async_grpo": {"max_trajectory_age_steps": 2},
+            },
+            "policy": {"max_total_sequence_length": 512},
+        }
+
+    def create_mock_batch(self, size: int = 2) -> BatchedDataDict[DatumSpec]:
+        """Create a mock batch for testing."""
+        message_logs = []
+        for i in range(size):
+            message_logs.append(
+                [
+                    {"role": "user", "content": f"Test prompt {i}"},
+                ]
+            )
+
+        return BatchedDataDict[DatumSpec](
+            {
+                "task_name": ["test"] * size,
+                "message_log": message_logs,
+                "extra_env_info": [{}] * size,
+                "loss_multiplier": torch.ones(size),
+            }
+        )
+
+    def test_async_trajectory_collector_initialization(self):
+        """Test AsyncTrajectoryCollector initialization."""
+        buffer = ReplayBuffer.remote(max_size=10)
+        mock_generation = MockGenerationInterface()
+        mock_tokenizer = mock.MagicMock()
+        mock_env = MockEnvironment.remote(rewards=[1.0, 2.0])
+        task_to_env = {"test": mock_env}
+        master_config = self.create_mock_config()
+
+        collector = AsyncTrajectoryCollector.remote(
+            policy_generation=mock_generation,
+            tokenizer=mock_tokenizer,
+            task_to_env=task_to_env,
+            master_config=master_config,
+            replay_buffer=buffer,
+            start_step=0,
+        )
+
+        # Test basic functionality
+        weight_version = ray.get(collector.get_weight_version.remote())
+        assert weight_version == 0
+
+        ray.kill(collector)
+        ray.kill(buffer)
+        ray.kill(mock_env)
+
+    def test_async_trajectory_collector_weight_version_updates(self):
+        """Test weight version updates in trajectory collector."""
+        buffer = ReplayBuffer.remote(max_size=10)
+        mock_generation = MockGenerationInterface()
+        mock_tokenizer = mock.MagicMock()
+        mock_env = MockEnvironment.remote(rewards=[1.0, 2.0])
+        task_to_env = {"test": mock_env}
+        master_config = self.create_mock_config()
+
+        collector = AsyncTrajectoryCollector.remote(
+            policy_generation=mock_generation,
+            tokenizer=mock_tokenizer,
+            task_to_env=task_to_env,
+            master_config=master_config,
+            replay_buffer=buffer,
+            start_step=0,
+        )
+
+        # Update weight version
+        ray.get(collector.set_weight_version.remote(5))
+        weight_version = ray.get(collector.get_weight_version.remote())
+        assert weight_version == 5
+
+        ray.kill(collector)
+        ray.kill(buffer)
+        ray.kill(mock_env)
+
+    def test_async_trajectory_collector_pause_resume(self):
+        """Test pause and resume functionality."""
+        buffer = ReplayBuffer.remote(max_size=10)
+        mock_generation = MockGenerationInterface()
+        mock_tokenizer = mock.MagicMock()
+        mock_env = MockEnvironment.remote(rewards=[1.0, 2.0])
+        task_to_env = {"test": mock_env}
+        master_config = self.create_mock_config()
+
+        collector = AsyncTrajectoryCollector.remote(
+            policy_generation=mock_generation,
+            tokenizer=mock_tokenizer,
+            task_to_env=task_to_env,
+            master_config=master_config,
+            replay_buffer=buffer,
+            start_step=0,
+        )
+
+        # Test pause and resume (these should not raise errors)
+        ray.get(collector.pause.remote())
+        ray.get(collector.resume.remote())
+
+        ray.kill(collector)
+        ray.kill(buffer)
+        ray.kill(mock_env)
+
+    def test_async_trajectory_collector_prepare_for_refit(self):
+        """Test prepare for refit functionality."""
+        buffer = ReplayBuffer.remote(max_size=10)
+        mock_generation = MockGenerationInterface()
+        mock_tokenizer = mock.MagicMock()
+        mock_env = MockEnvironment.remote(rewards=[1.0, 2.0])
+        task_to_env = {"test": mock_env}
+        master_config = self.create_mock_config()
+
+        collector = AsyncTrajectoryCollector.remote(
+            policy_generation=mock_generation,
+            tokenizer=mock_tokenizer,
+            task_to_env=task_to_env,
+            master_config=master_config,
+            replay_buffer=buffer,
+            start_step=0,
+        )
+
+        # Test prepare for refit (should complete without hanging)
+        ray.get(collector.prepare_for_refit.remote())
+        ray.get(collector.resume_after_refit.remote())
+
+        ray.kill(collector)
+        ray.kill(buffer)
+        ray.kill(mock_env)
+
+    def test_calculate_target_weights(self):
+        """Test target weight calculation logic."""
+        buffer = ReplayBuffer.remote(max_size=10)
+        mock_generation = MockGenerationInterface()
+        mock_tokenizer = mock.MagicMock()
+        mock_env = MockEnvironment.remote(rewards=[1.0, 2.0])
+        task_to_env = {"test": mock_env}
+        master_config = self.create_mock_config()
+
+        collector = AsyncTrajectoryCollector.remote(
+            policy_generation=mock_generation,
+            tokenizer=mock_tokenizer,
+            task_to_env=task_to_env,
+            master_config=master_config,
+            replay_buffer=buffer,
+            start_step=0,
+        )
+
+        # Test target weight calculation with different scenarios
+        # Note: We can't directly test the private method, but we can test its effects
+        # through the public interface behavior
+
+        ray.kill(collector)
+        ray.kill(buffer)
+        ray.kill(mock_env)
+
+    def test_dataloader_state_retrieval(self):
+        """Test getting dataloader state for checkpointing."""
+        buffer = ReplayBuffer.remote(max_size=10)
+        mock_generation = MockGenerationInterface()
+        mock_tokenizer = mock.MagicMock()
+        mock_env = MockEnvironment.remote(rewards=[1.0, 2.0])
+        task_to_env = {"test": mock_env}
+        master_config = self.create_mock_config()
+
+        collector = AsyncTrajectoryCollector.remote(
+            policy_generation=mock_generation,
+            tokenizer=mock_tokenizer,
+            task_to_env=task_to_env,
+            master_config=master_config,
+            replay_buffer=buffer,
+            start_step=0,
+        )
+
+        # Test getting dataloader state (should return empty dict when no dataloader)
+        state = ray.get(collector.get_dataloader_state.remote())
+        assert isinstance(state, dict)
+
+        ray.kill(collector)
+        ray.kill(buffer)
+        ray.kill(mock_env)
+
+
+class TestAsyncUtilsIntegration:
+    """Integration tests for async utilities working together."""
+
+    def create_mock_config(self) -> MasterConfig:
+        """Create a mock master config for testing."""
+        return {
+            "grpo": {
+                "num_prompts_per_step": 2,
+                "num_generations_per_prompt": 2,
+                "max_rollout_turns": 1,
+                "async_grpo": {"max_trajectory_age_steps": 1},
+            },
+            "policy": {"max_total_sequence_length": 512},
+        }
+
+    def create_mock_batch(self, size: int = 2) -> BatchedDataDict[DatumSpec]:
+        """Create a mock batch for testing."""
+        message_logs = []
+        for i in range(size):
+            message_logs.append(
+                [
+                    {"role": "user", "content": f"Test prompt {i}"},
+                ]
+            )
+
+        return BatchedDataDict[DatumSpec](
+            {
+                "task_name": ["test"] * size,
+                "message_log": message_logs,
+                "extra_env_info": [{}] * size,
+                "loss_multiplier": torch.ones(size),
+            }
+        )
+
+    def test_buffer_and_collector_integration(self):
+        """Test that buffer and collector work together correctly."""
+        buffer = ReplayBuffer.remote(max_size=10)
+        mock_generation = MockGenerationInterface()
+        mock_tokenizer = mock.MagicMock()
+        mock_env = MockEnvironment.remote(rewards=[1.0, 2.0])
+        task_to_env = {"test": mock_env}
+        master_config = self.create_mock_config()
+
+        collector = AsyncTrajectoryCollector.remote(
+            policy_generation=mock_generation,
+            tokenizer=mock_tokenizer,
+            task_to_env=task_to_env,
+            master_config=master_config,
+            replay_buffer=buffer,
+            start_step=0,
+        )
+
+        # Verify initial state
+        buffer_size = ray.get(buffer.size.remote())
+        assert buffer_size == 0
+
+        weight_version = ray.get(collector.get_weight_version.remote())
+        assert weight_version == 0
+
+        # Test weight version synchronization
+        ray.get(collector.set_weight_version.remote(3))
+        updated_version = ray.get(collector.get_weight_version.remote())
+        assert updated_version == 3
+
+        ray.kill(collector)
+        ray.kill(buffer)
+        ray.kill(mock_env)
+
+    def test_concurrent_operations(self):
+        """Test that concurrent operations don't cause race conditions."""
+        buffer = ReplayBuffer.remote(max_size=5)
+
+        # Push trajectories concurrently from multiple threads
+        def push_trajectory(buffer, trajectory_id):
+            trajectory = {
+                "batch": {"data": f"test{trajectory_id}"},
+                "rollout_metrics": {"reward": float(trajectory_id)},
+            }
+            return ray.get(
+                buffer.push_with_wait_signal.remote(
+                    trajectory,
+                    weight_version=trajectory_id,
+                    target_weight_version=trajectory_id + 1,
+                )
+            )
+
+        # Use threading to simulate concurrent pushes
+        threads = []
+        results = []
+
+        def worker(traj_id):
+            result = push_trajectory(buffer, traj_id)
+            results.append(result)
+
+        for i in range(3):
+            thread = threading.Thread(target=worker, args=(i,))
+            threads.append(thread)
+            thread.start()
+
+        for thread in threads:
+            thread.join()
+
+        # All pushes should succeed
+        assert all(result == "success" for result in results)
+
+        # Buffer should have correct size
+        final_size = ray.get(buffer.size.remote())
+        assert final_size == 3
+
+        ray.kill(buffer)
+
+    def test_error_handling(self):
+        """Test error handling in async utilities."""
+        # Test with invalid buffer size
+        with pytest.raises(Exception):
+            buffer = ReplayBuffer.remote(max_size=-1)
+            ray.get(buffer.size.remote())
+
+        # Test buffer operations
+        buffer = ReplayBuffer.remote(max_size=1)
+
+        # Test sampling from empty buffer
+        sample_result = ray.get(
+            buffer.sample.remote(
+                num_prompt_groups=1,
+                current_weight_version=0,
+                max_age_steps=1,
+            )
+        )
+        assert sample_result is None
+
+        ray.kill(buffer)
diff --git a/tests/unit/algorithms/test_distillation.py b/tests/unit/algorithms/test_distillation.py
new file mode 100644
index 0000000000..ec8ed37f63
--- /dev/null
+++ b/tests/unit/algorithms/test_distillation.py
@@ -0,0 +1,653 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from unittest.mock import MagicMock, patch
+
+import pytest
+import torch
+from torchdata.stateful_dataloader import StatefulDataLoader
+
+import nemo_rl.algorithms.distillation as distil_mod
+from nemo_rl.algorithms.distillation import (
+    _default_distillation_save_state,
+    check_vocab_equality,
+    distillation_train,
+    validate,
+)
+from nemo_rl.algorithms.loss_functions import DistillationLossFn
+from nemo_rl.data.interfaces import DatumSpec
+from nemo_rl.distributed.batched_data_dict import BatchedDataDict
+
+
+@pytest.fixture
+def mock_components():
+    # Create mock components
+    student_policy = MagicMock()
+    student_policy.train.return_value = {
+        "loss": torch.tensor(0.5),
+        "grad_norm": torch.tensor(1.0),
+        "all_mb_metrics": {"global_valid_toks": [10]},
+    }
+    # Add generate method since student_generation will be set to student_policy
+    student_policy.generate.return_value = {
+        "output_ids": torch.randint(0, 8, (2, 10)),
+        "generation_lengths": torch.tensor([5, 7]),
+        "unpadded_sequence_lengths": torch.tensor([8, 10]),
+        "logprobs": torch.randn(2, 10, 8),
+    }
+
+    teacher_policy = MagicMock()
+    teacher_policy.get_topk_logits.return_value = {
+        "topk_logits": torch.randn(2, 10, 64),
+        "topk_indices": torch.randint(0, 8, (2, 10, 64)),
+    }
+
+    # Set student_generation to None to avoid Ray-related refit issues
+    # This makes NEED_REFIT = False, so refit_policy_generation won't be called
+    student_generation = None
+
+    # Create a proper message log structure with token_ids (similar to SFT)
+    # Use BatchedDataDict instead of regular dict to support repeat_interleave
+    mock_batch = BatchedDataDict[DatumSpec](
+        {
+            "message_log": [
+                [
+                    {
+                        "token_ids": torch.tensor([1, 2, 3]),
+                        "role": "user",
+                        "content": "What is 1+1?",
+                    },
+                    {
+                        "token_ids": torch.tensor([4, 5, 6]),
+                        "role": "assistant",
+                        "content": "The answer is 2.",
+                    },
+                ]
+            ],
+            "loss_multiplier": torch.tensor(
+                [1.0]
+            ),  # Make it 1D tensor for batch dimension
+            "task_name": ["math"],
+            "extra_env_info": [{}],
+            "length": torch.tensor([6]),  # Make it 1D tensor for batch dimension
+            "idx": torch.tensor([0]),  # Make it 1D tensor for batch dimension
+        }
+    )
+
+    # Create mock dataloader with 10 batches that can be iterated multiple times
+    train_dataloader = MagicMock(spec=StatefulDataLoader)
+
+    def train_iter(self):
+        return iter([mock_batch] * 10)
+
+    train_dataloader.__iter__ = train_iter
+    train_dataloader.__len__ = MagicMock(return_value=10)
+
+    val_dataloader = MagicMock(spec=StatefulDataLoader)
+
+    def val_iter(self):
+        return iter([mock_batch] * 10)
+
+    val_dataloader.__iter__ = val_iter
+    val_dataloader.__len__ = MagicMock(return_value=10)
+
+    tokenizer = MagicMock()
+    tokenizer.pad_token_id = 0
+
+    loss_fn = DistillationLossFn(
+        {
+            "kl_type": "forward",
+            "mixed_kl_weight": 0.5,
+            "zero_outside_topk": False,
+        }
+    )
+
+    logger = MagicMock()
+    checkpointer = MagicMock()
+
+    # Create mock environments
+    task_to_env = {"math": MagicMock()}
+    val_task_to_env = {"math": MagicMock()}
+
+    # Create mock master config
+    master_config = {
+        "distillation": {
+            "max_num_steps": 5,
+            "max_num_epochs": 10,
+            "val_period": 100,
+            "val_batch_size": 1,
+            "val_at_start": False,
+            "max_val_samples": 10,
+            "topk_logits_k": 64,
+            "num_prompts_per_step": 1,
+            "num_generations_per_prompt": 1,
+            "max_rollout_turns": 0,  # No environment interaction needed for distillation
+            "seed": 42,
+        },
+        "policy": {
+            "train_global_batch_size": 1,
+            "make_sequence_length_divisible_by": 8,
+            "max_total_sequence_length": 2048,
+            "generation": {
+                "colocated": {
+                    "enabled": False,
+                },
+            },
+        },
+        "teacher": {
+            "model_name": "test-teacher",
+        },
+        "loss_fn": {
+            "kl_type": "forward",
+            "mixed_kl_weight": 0.5,
+            "zero_outside_topk": False,
+        },
+        "data": {
+            "dataset_name": "test_dataset",
+        },
+        "logger": {
+            "num_val_samples_to_print": 5,
+        },
+        "cluster": {
+            "num_nodes": 1,
+            "gpus_per_node": 2,
+        },
+        "checkpointing": {
+            "enabled": False,
+            "checkpoint_must_save_by": None,
+            "save_period": 10,
+            "metric_name": None,
+        },
+    }
+
+    return {
+        "student_policy": student_policy,
+        "teacher_policy": teacher_policy,
+        "student_generation": student_generation,
+        "train_dataloader": train_dataloader,
+        "val_dataloader": val_dataloader,
+        "tokenizer": tokenizer,
+        "loss_fn": loss_fn,
+        "logger": logger,
+        "checkpointer": checkpointer,
+        "task_to_env": task_to_env,
+        "val_task_to_env": val_task_to_env,
+        "master_config": master_config,
+    }
+
+
+def test_distillation_train_max_steps(mock_components):
+    """Test that training terminates correctly when maximum steps are reached."""
+    mock_components["master_config"]["distillation"]["max_num_steps"] = 5
+
+    distillation_save_state = _default_distillation_save_state()
+
+    # Run training
+    distillation_train(
+        mock_components["student_policy"],
+        mock_components["teacher_policy"],
+        mock_components["student_generation"],
+        mock_components["train_dataloader"],
+        mock_components["val_dataloader"],
+        mock_components["tokenizer"],
+        mock_components["loss_fn"],
+        mock_components["task_to_env"],
+        mock_components["val_task_to_env"],
+        mock_components["logger"],
+        mock_components["checkpointer"],
+        distillation_save_state,
+        mock_components["master_config"],
+    )
+
+    assert mock_components["student_policy"].train.call_count == 5
+
+
+def test_exit_on_timeout(mock_components, capsys):
+    """Test that training loop exits when timeout is reached"""
+    # Set max steps to large number
+    mock_components["master_config"]["distillation"]["max_num_steps"] = 100
+
+    distillation_save_state = _default_distillation_save_state()
+
+    # Mock TimeoutChecker to return False for first 7 checks, then True (timeout)
+    with patch("nemo_rl.algorithms.distillation.TimeoutChecker") as mock_timeout_class:
+        mock_timeout_instance = MagicMock()
+        # Create a side_effect that returns False 7 times, then True
+        check_results = [False] * 7 + [True]
+        mock_timeout_instance.check_save.side_effect = check_results
+        mock_timeout_class.return_value = mock_timeout_instance
+
+        # Run training
+        distillation_train(
+            mock_components["student_policy"],
+            mock_components["teacher_policy"],
+            mock_components["student_generation"],
+            mock_components["train_dataloader"],
+            mock_components["val_dataloader"],
+            mock_components["tokenizer"],
+            mock_components["loss_fn"],
+            mock_components["task_to_env"],
+            mock_components["val_task_to_env"],
+            mock_components["logger"],
+            mock_components["checkpointer"],
+            distillation_save_state,
+            mock_components["master_config"],
+        )
+
+        # Verify training stopped at 8 steps (when check_save returned True)
+        assert mock_components["student_policy"].train.call_count == 8
+
+        # Verify the timeout message was printed and training actually stopped
+        captured = capsys.readouterr()
+        output_lines = captured.out.strip().split("\n")
+
+        # Find the timeout message
+        timeout_line_idx = None
+        for i, line in enumerate(output_lines):
+            if "Timeout has been reached, stopping training early" in line:
+                timeout_line_idx = i
+                break
+
+        assert timeout_line_idx is not None, "Timeout message not found in output"
+
+        # For distillation, verify we don't see more step messages after timeout
+        remaining_lines = output_lines[timeout_line_idx:]
+        for line in remaining_lines:
+            # Distillation doesn't have epochs, but check for step markers
+            assert not line.startswith("Step ") or "Step 8" in line, (
+                f"Training continued after timeout: {line}"
+            )
+
+
+def test_validate_function(mock_components):
+    """Test independent validation function to ensure validation logic correctness."""
+    # Run validation
+    val_metrics, validation_timings = validate(
+        mock_components["student_generation"],
+        mock_components["val_dataloader"],
+        mock_components["tokenizer"],
+        mock_components["val_task_to_env"],
+        step=0,
+        master_config=mock_components["master_config"],
+    )
+
+    # Verify validation results
+    assert isinstance(val_metrics, dict)
+    assert isinstance(validation_timings, dict)
+    # For distillation, we don't need environment interaction since max_rollout_turns=0
+    # The validation focuses on generation and teacher-student knowledge transfer
+    # Note: validate() function itself doesn't call logger.log_metrics - that's done by the caller
+
+
+def test_check_vocab_equality_pass(monkeypatch):
+    student_tokenizer = MagicMock()
+    student_tokenizer.get_vocab.return_value = {"a": 0, "b": 1}
+    student_tokenizer.__len__.return_value = 2
+
+    teacher_tokenizer = MagicMock()
+    teacher_tokenizer.get_vocab.return_value = {"a": 0, "b": 1}
+    teacher_tokenizer.__len__.return_value = 2
+
+    student_config = MagicMock()
+    student_config.vocab_size = 2
+    teacher_config = MagicMock()
+    teacher_config.vocab_size = 2
+
+    monkeypatch.setattr(
+        distil_mod.AutoTokenizer,
+        "from_pretrained",
+        lambda name: teacher_tokenizer,
+    )
+    monkeypatch.setattr(
+        distil_mod.AutoConfig,
+        "from_pretrained",
+        lambda name: student_config if name == "student-model" else teacher_config,
+    )
+
+    # Should not raise
+    check_vocab_equality(student_tokenizer, "student-model", "teacher-model")
+
+
+def test_check_vocab_equality_vocab_mismatch_raises(monkeypatch):
+    student_tokenizer = MagicMock()
+    student_tokenizer.get_vocab.return_value = {"a": 0, "b": 1}
+    student_tokenizer.__len__.return_value = 2
+
+    teacher_tokenizer = MagicMock()
+    teacher_tokenizer.get_vocab.return_value = {"a": 0, "c": 2}
+    teacher_tokenizer.__len__.return_value = 2
+
+    student_config = MagicMock()
+    student_config.vocab_size = 2
+    teacher_config = MagicMock()
+    teacher_config.vocab_size = 2
+
+    monkeypatch.setattr(
+        distil_mod.AutoTokenizer,
+        "from_pretrained",
+        lambda name: teacher_tokenizer,
+    )
+    monkeypatch.setattr(
+        distil_mod.AutoConfig,
+        "from_pretrained",
+        lambda name: student_config if name == "student-model" else teacher_config,
+    )
+
+    with pytest.raises(AssertionError):
+        check_vocab_equality(student_tokenizer, "student-model", "teacher-model")
+
+
+def test_check_vocab_equality_length_mismatch_raises(monkeypatch):
+    # Same vocab mapping but different __len__ values
+    vocab = {"a": 0, "b": 1}
+    student_tokenizer = MagicMock()
+    student_tokenizer.get_vocab.return_value = vocab
+    student_tokenizer.__len__.return_value = 2
+
+    teacher_tokenizer = MagicMock()
+    teacher_tokenizer.get_vocab.return_value = vocab
+    teacher_tokenizer.__len__.return_value = 3
+
+    student_config = MagicMock()
+    student_config.vocab_size = 2
+    teacher_config = MagicMock()
+    teacher_config.vocab_size = 2
+
+    monkeypatch.setattr(
+        distil_mod.AutoTokenizer,
+        "from_pretrained",
+        lambda name: teacher_tokenizer,
+    )
+    monkeypatch.setattr(
+        distil_mod.AutoConfig,
+        "from_pretrained",
+        lambda name: student_config if name == "student-model" else teacher_config,
+    )
+
+    with pytest.raises(AssertionError):
+        check_vocab_equality(student_tokenizer, "student-model", "teacher-model")
+
+
+def test_check_vocab_equality_config_vocab_size_mismatch_raises(monkeypatch):
+    vocab = {"a": 0, "b": 1}
+    student_tokenizer = MagicMock()
+    student_tokenizer.get_vocab.return_value = vocab
+    student_tokenizer.__len__.return_value = 2
+
+    teacher_tokenizer = MagicMock()
+    teacher_tokenizer.get_vocab.return_value = vocab
+    teacher_tokenizer.__len__.return_value = 2
+
+    student_config = MagicMock()
+    student_config.vocab_size = 2
+    teacher_config = MagicMock()
+    teacher_config.vocab_size = 3
+
+    monkeypatch.setattr(
+        distil_mod.AutoTokenizer,
+        "from_pretrained",
+        lambda name: teacher_tokenizer,
+    )
+    monkeypatch.setattr(
+        distil_mod.AutoConfig,
+        "from_pretrained",
+        lambda name: student_config if name == "student-model" else teacher_config,
+    )
+
+    with pytest.raises(AssertionError):
+        check_vocab_equality(student_tokenizer, "student-model", "teacher-model")
+
+
+def test_noncolocated_inference_requires_explicit_gpus_per_node_single_node():
+    """Test that non-colocated inference requires explicit gpus_per_node when cluster.num_nodes=1."""
+    from unittest.mock import MagicMock, patch
+
+    from nemo_rl.algorithms.distillation import setup
+
+    # Create minimal config with non-colocated inference but gpus_per_node=None
+    master_config = {
+        "policy": {
+            "generation": {
+                "backend": "vllm",
+                "colocated": {
+                    "enabled": False,  # Non-colocated
+                    "resources": {
+                        "gpus_per_node": None,  # This should trigger error
+                        "num_nodes": None,
+                    },
+                },
+            },
+            "dtensor_cfg": {
+                "enabled": False,
+            },
+        },
+        "teacher": {
+            "dtensor_cfg": {
+                "enabled": False,
+            },
+        },
+        "loss_fn": {},
+        "distillation": {
+            "seed": 42,
+            "topk_logits_k": 64,
+            "num_prompts_per_step": 1,  # Config extraction requires this key
+            "val_period": 0,  # Config extraction requires this key
+            "val_at_start": False,  # Config extraction requires this key
+        },
+        "data": {"shuffle": False},
+        "logger": {},  # Config extraction requires this key
+        "checkpointing": {},  # Config extraction requires this key
+        "cluster": {
+            "num_nodes": 1,  # Single node
+            "gpus_per_node": 8,
+        },
+    }
+
+    tokenizer = MagicMock()
+    dataset = MagicMock()
+    dataset.__len__ = MagicMock(return_value=10)
+
+    # Mock everything we don't need to test
+    with (
+        patch("nemo_rl.algorithms.distillation.Logger") as mock_logger,
+        patch("nemo_rl.algorithms.distillation.CheckpointManager") as mock_checkpointer,
+        patch("nemo_rl.algorithms.distillation.StatefulDataLoader"),
+        pytest.raises(
+            AssertionError,
+            match="policy.generation.colocated.resources.gpus_per_node must be explicitly set",
+        ),
+    ):
+        # Configure mocks to skip checkpoint loading
+        mock_checkpointer.return_value.get_latest_checkpoint_path.return_value = None
+        setup(master_config, tokenizer, dataset, None)
+
+
+def test_distillation_setup_non_colocated_smoke(monkeypatch):
+    """Smoke test: calling setup with a non-colocated config should succeed."""
+    from unittest.mock import MagicMock, patch
+
+    import nemo_rl.algorithms.distillation as distil_mod
+
+    # Single node cluster; inference uses a subset of GPUs on same node
+    master_config = {
+        "policy": {
+            "generation": {
+                "backend": "vllm",
+                "colocated": {
+                    "enabled": False,
+                    "resources": {
+                        "gpus_per_node": 8,  # inference on 8 GPU
+                        "num_nodes": 1,
+                    },
+                },
+            },
+            "dtensor_cfg": {
+                "enabled": False,
+            },
+            "model_name": "test-policy",
+        },
+        "teacher": {
+            "model_name": "test-teacher",
+            "dtensor_cfg": {
+                "enabled": False,
+            },
+        },
+        "loss_fn": {
+            "kl_type": "forward",
+            "mixed_kl_weight": 0.5,
+            "zero_outside_topk": False,
+        },
+        "distillation": {
+            "seed": 42,
+            "topk_logits_k": 64,
+            "num_prompts_per_step": 1,
+            "max_num_epochs": 10,
+            "max_num_steps": 100,
+            "val_period": 0,
+            "val_at_start": False,
+        },
+        "data": {"shuffle": False},
+        "logger": {},
+        "checkpointing": {},
+        "cluster": {"num_nodes": 2, "gpus_per_node": 8},
+    }
+
+    tokenizer = MagicMock()
+    dataset = MagicMock()
+    dataset.__len__ = MagicMock(return_value=1)
+
+    # Skip tokenizer/vocab equality check inside setup
+    monkeypatch.setenv("NRL_SKIP_DISTILLATION_TOKENIZER_CHECK", "1")
+
+    ip_port = ("127.0.0.1", 12345)
+
+    class DummyCluster:
+        def __init__(self, *args, **kwargs):
+            pass
+
+        def world_size(self):
+            return 1
+
+        def get_master_address_and_port(self):
+            return ip_port
+
+    class DummyPolicy:
+        def __init__(self, *args, **kwargs):
+            pass
+
+        def prepare_refit_info(self):
+            return {}
+
+        def offload_after_refit(self):
+            return None
+
+        def init_collective(self, *args, **kwargs):
+            return [MagicMock()]
+
+    class DummyVllmGeneration:
+        def __init__(self, *args, **kwargs):
+            pass
+
+        def finish_generation(self):
+            return None
+
+        def prepare_refit_info(self, *args, **kwargs):
+            return None
+
+        def init_collective(self, *args, **kwargs):
+            return [MagicMock()]
+
+    with (
+        patch.object(distil_mod, "RayVirtualCluster", DummyCluster),
+        patch.object(distil_mod, "Logger"),
+        patch.object(distil_mod, "CheckpointManager") as mock_ckpt_mgr,
+        patch.object(distil_mod, "StatefulDataLoader"),
+        patch.object(distil_mod, "Policy", DummyPolicy),
+        patch.object(distil_mod, "VllmGeneration", DummyVllmGeneration),
+        patch.object(distil_mod, "ray") as mock_ray,
+    ):
+        mock_ckpt_mgr.return_value.get_latest_checkpoint_path.return_value = None
+        mock_ray.get = MagicMock(return_value=None)
+
+        # Should not raise
+        result = distil_mod.setup(master_config, tokenizer, dataset, None)
+
+        # Basic shape check of returned tuple
+        assert isinstance(result, tuple)
+
+
+def test_noncolocated_inference_requires_explicit_gpus_per_node_multi_node():
+    """Test that non-colocated inference requires explicit gpus_per_node when cluster.num_nodes>1."""
+    from unittest.mock import MagicMock, patch
+
+    from nemo_rl.algorithms.distillation import setup
+
+    # Create minimal config with non-colocated inference but gpus_per_node=None
+    master_config = {
+        "policy": {
+            "generation": {
+                "backend": "vllm",
+                "colocated": {
+                    "enabled": False,  # Non-colocated
+                    "resources": {
+                        "gpus_per_node": None,  # This should trigger error
+                        "num_nodes": 1,  # Use 1 node for inference
+                    },
+                },
+            },
+            "dtensor_cfg": {
+                "enabled": False,
+            },
+        },
+        "teacher": {
+            "dtensor_cfg": {
+                "enabled": False,
+            },
+        },
+        "loss_fn": {},
+        "distillation": {
+            "seed": 42,
+            "topk_logits_k": 64,
+            "max_num_epochs": 10,
+            "max_num_steps": 100,
+            "num_prompts_per_step": 1,  # Config extraction requires this key
+            "val_period": 0,  # Config extraction requires this key
+            "val_at_start": False,  # Config extraction requires this key
+        },
+        "data": {"shuffle": False},
+        "logger": {},  # Config extraction requires this key
+        "checkpointing": {},  # Config extraction requires this key
+        "cluster": {
+            "num_nodes": 2,  # Multi-node
+            "gpus_per_node": 8,
+        },
+    }
+
+    tokenizer = MagicMock()
+    dataset = MagicMock()
+    dataset.__len__ = MagicMock(return_value=10)
+
+    # Mock everything we don't need to test
+    with (
+        patch("nemo_rl.algorithms.distillation.Logger") as mock_logger,
+        patch("nemo_rl.algorithms.distillation.CheckpointManager") as mock_checkpointer,
+        patch("nemo_rl.algorithms.distillation.StatefulDataLoader"),
+        pytest.raises(
+            AssertionError,
+            match="policy.generation.colocated.resources.gpus_per_node must be explicitly set",
+        ),
+    ):
+        # Configure mocks to skip checkpoint loading
+        mock_checkpointer.return_value.get_latest_checkpoint_path.return_value = None
+        setup(master_config, tokenizer, dataset, None)
diff --git a/tests/unit/algorithms/test_dpo.py b/tests/unit/algorithms/test_dpo.py
index 76565730b0..45d59fe990 100644
--- a/tests/unit/algorithms/test_dpo.py
+++ b/tests/unit/algorithms/test_dpo.py
@@ -12,16 +12,40 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from unittest.mock import MagicMock
+from unittest.mock import MagicMock, patch
 
+import numpy as np
+import pytest
 import torch
+from torchdata.stateful_dataloader import StatefulDataLoader
 
-from nemo_rl.algorithms.dpo import add_ref_logprobs_to_data
+from nemo_rl.algorithms.dpo import (
+    _default_dpo_save_state,
+    add_ref_logprobs_to_data,
+    dpo_train,
+)
+from nemo_rl.algorithms.loss_functions import PreferenceLoss
+from nemo_rl.distributed.batched_data_dict import BatchedDataDict
+from nemo_rl.distributed.named_sharding import NamedSharding
 
 
 class MockPolicy:
     def __init__(self, logprobs):
         self.logprobs = logprobs
+        self.sharding_annotations = NamedSharding(
+            layout=np.arange(2).reshape(
+                1,  # PP
+                -1,  # DP
+                1,  # CP
+                1,  # TP
+            ),
+            names=[
+                "pipeline_parallel",
+                "data_parallel",
+                "context_parallel",
+                "tensor_parallel",
+            ],
+        )
 
     def get_reference_policy_logprobs(self, batch, micro_batch_size):
         return {"reference_logprobs": self.logprobs}
@@ -30,7 +54,7 @@ def get_reference_policy_logprobs(self, batch, micro_batch_size):
 def test_add_logprobs_to_batch():
     """Test that add_ref_logprobs_to_data correctly adds reference policy logprobs to batches."""
     # Create mock data
-    batch_size = 2
+    batch_size = 8
     seq_len = 4
     vocab_size = 16
 
@@ -45,7 +69,7 @@ def test_add_logprobs_to_batch():
 
     # Create a mock dataloader that yields our mock batch
     mock_dataloader = MagicMock()
-    mock_dataloader.__iter__.return_value = iter([mock_batch])
+    mock_dataloader.__iter__.return_value = iter([BatchedDataDict(mock_batch)])
 
     # Create a mock policy that returns our mock logprobs
     mock_policy = MockPolicy(mock_logprobs)
@@ -73,3 +97,222 @@ def test_add_logprobs_to_batch():
     # Verify the logprobs were rolled by -1 as expected
     expected_logprobs = torch.roll(mock_logprobs, -1, dims=-1)
     assert torch.equal(augmented_batch["reference_policy_logprobs"], expected_logprobs)
+
+
+@pytest.fixture
+def mock_dpo_components():
+    # Create mock components
+    policy = MagicMock()
+    policy.train.return_value = {
+        "loss": torch.tensor(0.5),
+        "grad_norm": torch.tensor(1.0),
+        "all_mb_metrics": {
+            "loss": [0.5],
+            "sft_loss": [0.3],
+            "preference_loss": [0.2],
+            "accuracy": [1.0],
+            "rewards_chosen_mean": [4.5],
+            "rewards_rejected_mean": [3.5],
+            "num_valid_samples": [1.0],
+            "global_valid_seqs": [1.0],
+            "global_valid_toks": [10],
+        },
+    }
+    policy.get_reference_policy_logprobs.return_value = {
+        "reference_logprobs": torch.randn(2, 10)
+    }
+    policy.sharding_annotations = NamedSharding(
+        layout=np.arange(1).reshape(1, -1, 1, 1),  # 1 GPU to match cluster config
+        names=[
+            "pipeline_parallel",
+            "data_parallel",
+            "context_parallel",
+            "tensor_parallel",
+        ],
+    )
+
+    # Create a proper message log structure with token_ids
+    mock_batch = BatchedDataDict(
+        {
+            "message_log": [
+                [  # chosen
+                    {"role": "user", "token_ids": torch.tensor([1, 2, 3])},
+                    {"role": "assistant", "token_ids": torch.tensor([4, 5, 6])},
+                ],
+                [  # rejected
+                    {"role": "user", "token_ids": torch.tensor([1, 2, 3])},
+                    {"role": "assistant", "token_ids": torch.tensor([7, 8, 9, 10, 11])},
+                ],
+            ],
+            "length": torch.tensor([6, 8]),
+            "loss_multiplier": torch.tensor([1.0, 1.0]),
+        }
+    )
+
+    # Create mock dataloader with 10 batches that can be iterated multiple times
+    train_dataloader = MagicMock(spec=StatefulDataLoader)
+
+    def train_iter(self):
+        return iter([mock_batch] * 10)
+
+    train_dataloader.__iter__ = train_iter
+    train_dataloader.__len__ = MagicMock(return_value=10)
+
+    val_dataloader = MagicMock(spec=StatefulDataLoader)
+
+    def val_iter(self):
+        return iter([mock_batch] * 10)
+
+    val_dataloader.__iter__ = val_iter
+    val_dataloader.__len__ = MagicMock(return_value=10)
+
+    tokenizer = MagicMock()
+    tokenizer.pad_token_id = 0
+
+    loss_fn = PreferenceLoss()
+    logger = MagicMock()
+    checkpointer = MagicMock()
+
+    # Create mock master config
+    master_config = {
+        "dpo": {
+            "max_num_steps": 5,
+            "max_num_epochs": 2,
+            "val_period": 100,
+            "val_batches": 1,
+            "val_global_batch_size": 1,
+            "val_micro_batch_size": 1,
+            "val_at_start": False,
+        },
+        "policy": {
+            "train_global_batch_size": 2,
+            "make_sequence_length_divisible_by": 1,
+            "reward_model_cfg": {
+                "enabled": True,
+                "reward_model_type": "bradley_terry",
+            },
+            "train_micro_batch_size": 1,
+        },
+        "checkpointing": {
+            "enabled": False,
+            "checkpoint_must_save_by": None,
+            "save_period": 10,
+        },
+        "cluster": {
+            "num_nodes": 1,
+            "gpus_per_node": 1,
+        },
+    }
+
+    return {
+        "policy": policy,
+        "train_dataloader": train_dataloader,
+        "val_dataloader": val_dataloader,
+        "tokenizer": tokenizer,
+        "loss_fn": loss_fn,
+        "logger": logger,
+        "checkpointer": checkpointer,
+        "master_config": master_config,
+    }
+
+
+def test_exit_on_max_steps(mock_dpo_components):
+    """Test that training loop exits when max_num_steps is reached"""
+    # Set max steps to 12, which is less than len(train_dataloader) * max_num_epochs
+    mock_dpo_components["master_config"]["dpo"]["max_num_steps"] = 12
+
+    dpo_save_state = _default_dpo_save_state()
+
+    # Run training
+    dpo_train(
+        mock_dpo_components["policy"],
+        mock_dpo_components["train_dataloader"],
+        mock_dpo_components["val_dataloader"],
+        mock_dpo_components["tokenizer"],
+        mock_dpo_components["loss_fn"],
+        mock_dpo_components["master_config"],
+        mock_dpo_components["logger"],
+        mock_dpo_components["checkpointer"],
+        dpo_save_state,
+    )
+
+    # Verify we only trained for 12 steps.
+    assert mock_dpo_components["policy"].train.call_count == 12
+
+
+def test_exit_on_max_epochs(mock_dpo_components):
+    """Test that training loop exits when max_num_epochs is reached"""
+    # Set max epochs to 2 and max steps to a large number
+    mock_dpo_components["master_config"]["dpo"]["max_num_epochs"] = 2
+    mock_dpo_components["master_config"]["dpo"]["max_num_steps"] = 100
+
+    dpo_save_state = _default_dpo_save_state()
+
+    # Run training
+    dpo_train(
+        mock_dpo_components["policy"],
+        mock_dpo_components["train_dataloader"],
+        mock_dpo_components["val_dataloader"],
+        mock_dpo_components["tokenizer"],
+        mock_dpo_components["loss_fn"],
+        mock_dpo_components["master_config"],
+        mock_dpo_components["logger"],
+        mock_dpo_components["checkpointer"],
+        dpo_save_state,
+    )
+
+    # Verify we trained for exactly two epochs (20 batches).
+    assert mock_dpo_components["policy"].train.call_count == 20
+
+
+def test_exit_on_timeout(mock_dpo_components, capsys):
+    """Test that training loop exits when timeout is reached"""
+    # Set max steps and epochs to large numbers
+    mock_dpo_components["master_config"]["dpo"]["max_num_steps"] = 100
+    mock_dpo_components["master_config"]["dpo"]["max_num_epochs"] = 10
+
+    dpo_save_state = _default_dpo_save_state()
+
+    # Mock TimeoutChecker to return False for first 7 checks, then True (timeout)
+    with patch("nemo_rl.algorithms.dpo.TimeoutChecker") as mock_timeout_class:
+        mock_timeout_instance = MagicMock()
+        # Create a side_effect that returns False 7 times, then True
+        check_results = [False] * 7 + [True]
+        mock_timeout_instance.check_save.side_effect = check_results
+        mock_timeout_class.return_value = mock_timeout_instance
+
+        # Run training
+        dpo_train(
+            mock_dpo_components["policy"],
+            mock_dpo_components["train_dataloader"],
+            mock_dpo_components["val_dataloader"],
+            mock_dpo_components["tokenizer"],
+            mock_dpo_components["loss_fn"],
+            mock_dpo_components["master_config"],
+            mock_dpo_components["logger"],
+            mock_dpo_components["checkpointer"],
+            dpo_save_state,
+        )
+
+        # Verify training stopped at 8 steps (when check_save returned True)
+        assert mock_dpo_components["policy"].train.call_count == 8
+
+        # Verify the timeout message was printed and is near the end (not followed by more training)
+        captured = capsys.readouterr()
+        output_lines = captured.out.strip().split("\n")
+
+        # Find the timeout message
+        timeout_line_idx = None
+        for i, line in enumerate(output_lines):
+            if "Timeout has been reached, stopping training early" in line:
+                timeout_line_idx = i
+                break
+
+        assert timeout_line_idx is not None, "Timeout message not found in output"
+
+        # Verify no new epoch started after timeout (which would indicate a bug where break was used instead of return)
+        remaining_lines = output_lines[timeout_line_idx:]
+        for line in remaining_lines:
+            assert "Epoch" not in line or "Epoch 1/10" in line, (
+                f"Training continued to next epoch after timeout: {line}"
+            )
diff --git a/tests/unit/algorithms/test_grpo.py b/tests/unit/algorithms/test_grpo.py
index b387d1e2f0..ab61c969fd 100644
--- a/tests/unit/algorithms/test_grpo.py
+++ b/tests/unit/algorithms/test_grpo.py
@@ -12,10 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from unittest.mock import MagicMock, patch
+
 import pytest
 import ray
 import torch
+from torchdata.stateful_dataloader import StatefulDataLoader
 
+from nemo_rl.algorithms.grpo import (
+    _default_grpo_save_state,
+    async_grpo_train,
+    dynamic_sampling,
+    grpo_train,
+    normalize_advantages_with_epsilon,
+)
+from nemo_rl.algorithms.loss_functions import ClippedPGLossFn
 from nemo_rl.data.interfaces import DatumSpec, LLMMessageLogType
 from nemo_rl.distributed.batched_data_dict import BatchedDataDict
 from nemo_rl.environments.interfaces import (
@@ -23,6 +34,214 @@
     EnvironmentReturn,
 )
 from nemo_rl.experience.rollouts import calculate_rewards
+from nemo_rl.utils.timer import Timer
+from tests.unit.algorithms.utils import (
+    create_mock_batch,
+)
+
+# ============================================================================
+# Stub classes for async GRPO testing (non-Ray versions for easy mocking)
+# ============================================================================
+
+
+class StubReplayBuffer:
+    """Non-Ray stub of ReplayBuffer for unit testing
+
+    Each method returns a MagicMock with a 'remote' attribute that can be called.
+    """
+
+    def __init__(self, initial_size=10, mock_batch=None, mock_rollout_metrics=None):
+        self._size = initial_size
+        self._trajectories = []
+        self._mock_batch = mock_batch
+        self._mock_rollout_metrics = mock_rollout_metrics or {}
+
+    @property
+    def size(self):
+        """Return a mock that returns buffer size when .remote() is called"""
+        mock = MagicMock()
+        mock.remote = MagicMock(return_value=self._size)  # ray.get will extract this
+        return mock
+
+    @property
+    def sample(self):
+        """Return a mock that returns sample result when .remote() is called"""
+
+        def _sample(num_prompt_groups, current_weight_version, max_age_steps):
+            # Return proper trajectory structure expected by async GRPO
+            trajectories = [
+                {
+                    "batch": self._mock_batch,
+                    "rollout_metrics": self._mock_rollout_metrics,
+                }
+                for _ in range(num_prompt_groups)
+            ]
+            return {
+                "trajectories": trajectories,
+                "avg_trajectory_age": 0.5,
+            }
+
+        mock = MagicMock()
+        mock.remote = MagicMock(
+            side_effect=lambda *args, **kwargs: _sample(*args, **kwargs)
+        )
+        return mock
+
+    @property
+    def get_debug_info(self):
+        """Return a mock that returns debug info when .remote() is called"""
+        mock = MagicMock()
+        mock.remote = MagicMock(
+            return_value={
+                "total_trajectories": self._size,
+                "trajectory_versions": [0],
+                "target_weight_versions": [0],
+                "max_size": 100,
+            }
+        )
+        return mock
+
+
+class StubAsyncTrajectoryCollector:
+    """Non-Ray stub of AsyncTrajectoryCollector for unit testing
+
+    Each method is a property that returns a MagicMock with a 'remote' attribute.
+    """
+
+    @property
+    def start_collection(self):
+        """Start collection - returns a remote-callable mock"""
+        mock = MagicMock()
+        mock.remote = MagicMock(return_value=MagicMock())  # Returns a fake ObjectRef
+        return mock
+
+    @property
+    def set_weight_version(self):
+        """Set weight version - returns a remote-callable mock"""
+        mock = MagicMock()
+        mock.remote = MagicMock(return_value=MagicMock())
+        return mock
+
+    @property
+    def pause(self):
+        """Pause collection - returns a remote-callable mock"""
+        mock = MagicMock()
+        mock.remote = MagicMock(return_value=MagicMock())
+        return mock
+
+    @property
+    def resume(self):
+        """Resume collection - returns a remote-callable mock"""
+        mock = MagicMock()
+        mock.remote = MagicMock(return_value=MagicMock())
+        return mock
+
+    @property
+    def stop(self):
+        """Stop collection - returns a remote-callable mock"""
+        mock = MagicMock()
+        mock.remote = MagicMock(return_value=MagicMock())
+        return mock
+
+    @property
+    def wait_for_stop(self):
+        """Wait for stop - returns a remote-callable mock"""
+        mock = MagicMock()
+        mock.remote = MagicMock(return_value=MagicMock())
+        return mock
+
+
+def mock_async_grpo_infrastructure(mock_batch, mock_rollout_metrics):
+    """
+    Context manager that mocks all async GRPO infrastructure (Ray actors, venv, etc).
+
+    Returns a dict of patches that can be used as a context manager stack.
+    """
+    from contextlib import ExitStack
+
+    stack = ExitStack()
+
+    # Create stub instances with mock data
+    stub_buffer = StubReplayBuffer(
+        initial_size=10,
+        mock_batch=mock_batch,
+        mock_rollout_metrics=mock_rollout_metrics,
+    )
+    stub_collector = StubAsyncTrajectoryCollector()
+
+    # Patch venv creation
+    stack.enter_context(
+        patch(
+            "nemo_rl.algorithms.grpo.create_local_venv_on_each_node",
+            return_value="/fake/venv",
+        )
+    )
+    stack.enter_context(
+        patch(
+            "nemo_rl.algorithms.grpo.get_actor_python_env", return_value="/fake/python"
+        )
+    )
+
+    # Patch Ray actor classes to return our stubs
+    mock_buffer_cls = MagicMock()
+    mock_buffer_cls.options.return_value.remote.return_value = stub_buffer
+    stack.enter_context(
+        patch("nemo_rl.algorithms.async_utils.ReplayBuffer", mock_buffer_cls)
+    )
+
+    mock_collector_cls = MagicMock()
+    mock_collector_cls.options.return_value.remote.return_value = stub_collector
+    stack.enter_context(
+        patch(
+            "nemo_rl.algorithms.async_utils.AsyncTrajectoryCollector",
+            mock_collector_cls,
+        )
+    )
+
+    # Patch ray.get to return values from our stubs (not remote refs)
+    def mock_ray_get(ref):
+        # If it's already a plain value (from our stubs), return it
+        if isinstance(ref, (int, str, dict, list)):
+            return ref
+        # If it's a MagicMock, return a default response
+        return None
+
+    stack.enter_context(patch("ray.get", side_effect=mock_ray_get))
+    stack.enter_context(
+        patch("ray.wait", side_effect=lambda refs, **kwargs: (refs, []))
+    )
+    stack.enter_context(
+        patch("ray.kill", return_value=None)
+    )  # Mock ray.kill for cleanup
+
+    # Patch the rollout functions used inside async_grpo_train
+    stack.enter_context(
+        patch(
+            "nemo_rl.algorithms.grpo.run_multi_turn_rollout",
+            return_value=(mock_batch, mock_rollout_metrics),
+        )
+    )
+    stack.enter_context(
+        patch(
+            "nemo_rl.algorithms.grpo.run_async_multi_turn_rollout",
+            return_value=(mock_batch, mock_rollout_metrics),
+        )
+    )
+
+    # Patch refit and validate functions
+    stack.enter_context(
+        patch("nemo_rl.algorithms.grpo.refit_policy_generation", return_value=None)
+    )
+    stack.enter_context(
+        patch("nemo_rl.algorithms.grpo.validate", return_value=({}, {}))
+    )
+
+    # Mock print_performance_metrics to avoid needing real timing metrics
+    stack.enter_context(
+        patch("nemo_rl.algorithms.grpo.print_performance_metrics", return_value={})
+    )
+
+    return stack
 
 
 @ray.remote(num_cpus=0)
@@ -41,6 +260,7 @@ def step(
             [[]] * len(messages),
             self.rewards,
             [True] * len(messages),
+            [None] * len(messages),
         )
 
     def get_calls(self):
@@ -56,26 +276,6 @@ def global_post_process_and_metrics(
         return batch, {}
 
 
-def create_mock_batch(
-    num_samples: int,
-    task_names: list[str],
-    message_logs: list[LLMMessageLogType],
-    extra_env_info: list[dict] = None,
-) -> BatchedDataDict[DatumSpec]:
-    """Helper function to create a mock batch for testing."""
-    if extra_env_info is None:
-        extra_env_info = [{} for _ in range(num_samples)]
-
-    return BatchedDataDict[DatumSpec](
-        {
-            "task_name": task_names,
-            "message_log": message_logs,
-            "extra_env_info": extra_env_info,
-            "loss_multiplier": torch.ones(num_samples),
-        }
-    )
-
-
 @pytest.fixture(scope="module")
 def mock_env():
     """Create a mock environment for single task tests."""
@@ -116,7 +316,7 @@ def test_calculate_rewards_single_task(mock_env):
     batch = create_mock_batch(2, task_names, message_logs)
 
     # Calculate rewards
-    env_observations, metadata, next_stop_strings, rewards, terminateds = (
+    env_observations, metadata, next_stop_strings, rewards, terminateds, answers = (
         calculate_rewards(batch, task_to_env)
     )
 
@@ -126,6 +326,7 @@ def test_calculate_rewards_single_task(mock_env):
     assert len(terminateds) == 2
     assert len(next_stop_strings) == 2
     assert len(metadata) == 2
+    assert len(answers) == 2
     assert torch.allclose(rewards, torch.tensor([1.0, 2.0]))
     assert (
         ray.get(mock_env.get_calls.remote()) == 1
@@ -151,7 +352,7 @@ def test_calculate_rewards_multiple_tasks(mock_envs):
     batch = create_mock_batch(4, task_names, message_logs)
 
     # Calculate rewards
-    env_observations, metadata, next_stop_strings, rewards, terminateds = (
+    env_observations, metadata, next_stop_strings, rewards, terminateds, answers = (
         calculate_rewards(batch, mock_envs)
     )
 
@@ -161,6 +362,7 @@ def test_calculate_rewards_multiple_tasks(mock_envs):
     assert len(terminateds) == 4
     assert len(next_stop_strings) == 4
     assert len(metadata) == 4
+    assert len(answers) == 4
     assert torch.allclose(rewards, torch.tensor([1.0, 2.0, 3.0, 4.0]))
     assert (
         ray.get(mock_envs["math"].get_calls.remote()) == 1
@@ -178,7 +380,7 @@ def test_calculate_rewards_empty_batch(mock_env):
     batch = create_mock_batch(0, [], [])
 
     # Calculate rewards
-    env_observations, metadata, next_stop_strings, rewards, terminateds = (
+    env_observations, metadata, next_stop_strings, rewards, terminateds, answers = (
         calculate_rewards(batch, task_to_env)
     )
 
@@ -188,6 +390,7 @@ def test_calculate_rewards_empty_batch(mock_env):
     assert len(terminateds) == 0
     assert len(next_stop_strings) == 0
     assert len(metadata) == 0
+    assert len(answers) == 0
     assert (
         ray.get(mock_env.get_calls.remote()) == 0
     )  # Should not call environment for empty batch
@@ -206,3 +409,940 @@ def test_calculate_rewards_missing_environment():
         ValueError, match="No environment found for task type: unknown_task"
     ):
         calculate_rewards(batch, task_to_env)
+
+
+def test_dapo_dynamic_sampling_filters_nonzero_std():
+    """Test that DAPO dynamic sampling only selects prompts with non-zero standard deviation."""
+    # Create mock batch data with 6 prompts (2 prompts * 3 generations each)
+    batch_size = 6
+    message_logs = [
+        [
+            {"role": "user", "content": f"prompt_{i // 3}"},
+            {"role": "assistant", "content": f"response_{i}"},
+        ]
+        for i in range(batch_size)
+    ]
+    task_names = ["math"] * batch_size
+
+    # Create batch with some prompts having zero std and others non-zero std
+    repeated_batch = create_mock_batch(batch_size, task_names, message_logs)
+    repeated_batch["total_reward"] = torch.tensor([1.0, 0.0, 1.0, 0.5, 0.5, 0.0])
+
+    # Mock prompts tensor (2 unique prompts, each repeated 3 times)
+    prompts = torch.tensor(
+        [
+            [1, 2, 3],  # prompt 0
+            [1, 2, 3],  # prompt 0
+            [1, 2, 3],  # prompt 0
+            [4, 5, 6],  # prompt 1
+            [4, 5, 6],  # prompt 1
+            [4, 5, 6],  # prompt 1
+        ]
+    )
+
+    # First prompt group has std=0.5 (rewards: 1.0, 0.0, 1.0 -> std ≠ 0)
+    # Second prompt group has std=0.25 (rewards: 0.5, 0.5, 0.0 -> std ≠ 0)
+    std = torch.tensor(
+        [0.5, 0.5, 0.5, 0.25, 0.25, 0.25]
+    )  # Both prompts have non-zero std
+    baseline = torch.tensor([0.67, 0.67, 0.67, 0.33, 0.33, 0.33])  # Mock baselines
+
+    # Configuration for dynamic sampling
+    master_config = {
+        "grpo": {
+            "use_dynamic_sampling": True,
+            "num_prompts_per_step": 2,  # Want 2 prompts
+            "num_generations_per_prompt": 3,  # Each with 3 generations
+            "dynamic_sampling_max_gen_batches": 5,
+        }
+    }
+
+    timer = Timer()
+    dynamic_sampling_num_gen_batches = 1
+
+    # Test dynamic sampling
+    result_batch, is_batch_complete, batch_cache, _ = dynamic_sampling(
+        repeated_batch,
+        std,
+        baseline,
+        dynamic_sampling_num_gen_batches,
+        master_config,
+        timer,
+    )
+
+    # Since both prompts have non-zero std, all 6 samples should be selected
+    assert result_batch.size == 6
+    assert is_batch_complete == True
+    assert torch.allclose(result_batch["std"], std)
+    assert torch.allclose(result_batch["baseline"], baseline)
+
+
+def test_dapo_dynamic_sampling_filters_zero_std():
+    """Test that DAPO dynamic sampling filters out prompts with zero standard deviation."""
+    # Create mock batch data
+    batch_size = 6
+    message_logs = [
+        [
+            {"role": "user", "content": f"prompt_{i // 3}"},
+            {"role": "assistant", "content": f"response_{i}"},
+        ]
+        for i in range(batch_size)
+    ]
+    task_names = ["math"] * batch_size
+
+    repeated_batch = create_mock_batch(batch_size, task_names, message_logs)
+    repeated_batch["total_reward"] = torch.tensor(
+        [1.0, 1.0, 1.0, 0.5, 0.5, 0.0]
+    )  # First prompt has same rewards (std=0)
+
+    # Mock prompts tensor
+    prompts = torch.tensor(
+        [
+            [1, 2, 3],  # prompt 0
+            [1, 2, 3],  # prompt 0
+            [1, 2, 3],  # prompt 0
+            [4, 5, 6],  # prompt 1
+            [4, 5, 6],  # prompt 1
+            [4, 5, 6],  # prompt 1
+        ]
+    )
+
+    # First prompt has zero std (all rewards are 1.0)
+    # Second prompt has non-zero std (rewards: 0.5, 0.5, 0.0)
+    std = torch.tensor(
+        [0.0, 0.0, 0.0, 0.25, 0.25, 0.25]
+    )  # First prompt has zero std, second has non-zero
+    baseline = torch.tensor([1.0, 1.0, 1.0, 0.33, 0.33, 0.33])
+
+    master_config = {
+        "grpo": {
+            "use_dynamic_sampling": True,
+            "num_prompts_per_step": 1,  # Want 1 prompt only
+            "num_generations_per_prompt": 3,
+            "dynamic_sampling_max_gen_batches": 5,
+        }
+    }
+
+    timer = Timer()
+    dynamic_sampling_num_gen_batches = 1
+
+    # Test dynamic sampling
+    result_batch, is_batch_complete, batch_cache, _ = dynamic_sampling(
+        repeated_batch,
+        std,
+        baseline,
+        dynamic_sampling_num_gen_batches,
+        master_config,
+        timer,
+    )
+
+    # Only the second prompt (indices 3,4,5) should be selected since first has zero std
+    assert result_batch.size == 3  # Only 3 samples from the second prompt
+    assert is_batch_complete == True
+    assert torch.allclose(
+        result_batch["std"], torch.tensor([0.25, 0.25, 0.25])
+    )  # Only non-zero std
+    assert torch.allclose(result_batch["baseline"], torch.tensor([0.33, 0.33, 0.33]))
+
+    ## verify that only prompt_1 is selected
+    prompts = [
+        result_batch["message_log"][i][0]["content"] for i in range(result_batch.size)
+    ]
+    assert prompts == ["prompt_1", "prompt_1", "prompt_1"]
+
+    # Verify that filtered rewards are correct
+    expected_filtered_rewards = torch.tensor(
+        [
+            0.5,
+            0.5,
+            0.0,
+        ]
+    )
+    assert torch.allclose(result_batch["filtered_reward"], expected_filtered_rewards)
+
+
+def test_dapo_dynamic_sampling_batch_caching():
+    """Test that DAPO dynamic sampling uses batch caching when insufficient non-zero std prompts are found."""
+    # Create mock batch with only 1 prompt having non-zero std, but we need 2
+    batch_size = 3
+    message_logs = [
+        [
+            {"role": "user", "content": "prompt_0"},
+            {"role": "assistant", "content": f"response_{i}"},
+        ]
+        for i in range(batch_size)
+    ]
+    task_names = ["math"] * batch_size
+
+    repeated_batch = create_mock_batch(batch_size, task_names, message_logs)
+    repeated_batch["total_reward"] = torch.tensor([1.0, 0.0, 0.5])  # Non-zero std
+
+    prompts = torch.tensor(
+        [
+            [1, 2, 3],  # prompt 0
+            [1, 2, 3],  # prompt 0
+            [1, 2, 3],  # prompt 0
+        ]
+    )
+
+    std = torch.tensor([0.4, 0.4, 0.4])  # Only one prompt with non-zero std
+    baseline = torch.tensor([0.5, 0.5, 0.5])
+
+    master_config = {
+        "grpo": {
+            "use_dynamic_sampling": True,
+            "num_prompts_per_step": 2,  # Need 2 prompts but only have 1
+            "num_generations_per_prompt": 3,
+            "dynamic_sampling_max_gen_batches": 5,
+        }
+    }
+
+    timer = Timer()
+    dynamic_sampling_num_gen_batches = 1
+
+    # Test dynamic sampling - should indicate batch is not complete
+    result_batch, is_batch_complete, batch_cache, _ = dynamic_sampling(
+        repeated_batch,
+        std,
+        baseline,
+        dynamic_sampling_num_gen_batches,
+        master_config,
+        timer,
+    )
+
+    # Should have cached the batch but marked as incomplete
+    assert (
+        result_batch.size == 3
+    )  # All samples from the single prompt with non-zero std
+    assert is_batch_complete == False  # Not enough prompts, need to continue sampling
+    assert batch_cache is not None
+    assert batch_cache == result_batch
+
+    # Run dynamic sampling again with the cached batch
+    result_batch, is_batch_complete, batch_cache, _ = dynamic_sampling(
+        repeated_batch,
+        std,
+        baseline,
+        dynamic_sampling_num_gen_batches,
+        master_config,
+        timer,
+        batch_cache,
+    )
+
+    # After running dynamic sampling again, the batch should be complete
+    assert (
+        result_batch.size == 6
+    )  # All samples from the single prompt with non-zero std
+    assert is_batch_complete == True
+    assert batch_cache is not None
+
+
+def test_dapo_dynamic_sampling_disabled():
+    """Test that when dynamic sampling is disabled, all prompts are kept regardless of std."""
+    batch_size = 6
+    message_logs = [
+        [
+            {"role": "user", "content": f"prompt_{i // 3}"},
+            {"role": "assistant", "content": f"response_{i}"},
+        ]
+        for i in range(batch_size)
+    ]
+    task_names = ["math"] * batch_size
+
+    repeated_batch = create_mock_batch(batch_size, task_names, message_logs)
+    repeated_batch["total_reward"] = torch.tensor([1.0, 1.0, 1.0, 0.5, 0.5, 0.0])
+
+    prompts = torch.tensor(
+        [
+            [1, 2, 3],
+            [1, 2, 3],
+            [1, 2, 3],
+            [4, 5, 6],
+            [4, 5, 6],
+            [4, 5, 6],
+        ]
+    )
+
+    # Mix of zero and non-zero std
+    std = torch.tensor([0.0, 0.0, 0.0, 0.25, 0.25, 0.25])
+    baseline = torch.tensor([1.0, 1.0, 1.0, 0.33, 0.33, 0.33])
+
+    # Disable dynamic sampling
+    master_config = {
+        "grpo": {
+            "use_dynamic_sampling": False,
+            "num_prompts_per_step": 2,
+            "num_generations_per_prompt": 3,
+            "dynamic_sampling_max_gen_batches": 5,
+        }
+    }
+
+    timer = Timer()
+    dynamic_sampling_num_gen_batches = 1
+
+    # Test that dynamic sampling is bypassed
+    result_batch, is_batch_complete, batch_cache, _ = dynamic_sampling(
+        repeated_batch,
+        std,
+        baseline,
+        dynamic_sampling_num_gen_batches,
+        master_config,
+        timer,
+    )
+
+    # All samples should be kept when dynamic sampling is disabled
+    assert result_batch.size == 6
+    assert is_batch_complete == True
+    assert batch_cache is None  # No caching when disabled
+
+
+def test_noncolocated_inference_requires_explicit_gpus_per_node_single_node():
+    """Test that non-colocated inference requires explicit gpus_per_node when policy_nodes=1."""
+    from unittest.mock import MagicMock, patch
+
+    from nemo_rl.algorithms.grpo import setup
+
+    # Create minimal config - only what's needed before the validation we're testing
+    master_config = {
+        "policy": {
+            "generation": {
+                "backend": "vllm",
+                "colocated": {
+                    "enabled": False,  # Non-colocated
+                    "resources": {
+                        "gpus_per_node": None,  # This should trigger error
+                        "num_nodes": None,
+                    },
+                },
+            },
+        },
+        "loss_fn": {},  # Config extraction requires this key
+        "env": {},  # Config extraction requires this key
+        "grpo": {
+            "seed": 42,
+            "num_prompts_per_step": 1,
+            "val_period": 0,
+            "val_at_start": False,
+            "use_dynamic_sampling": False,
+            "batch_multiplier": 1,
+        },
+        "data": {"shuffle": False, "num_workers": 1},
+        "logger": {},  # Config extraction requires this key
+        "checkpointing": {},  # Config extraction requires this key
+        "cluster": {
+            "num_nodes": 1,  # Single node, so policy_nodes=1
+            "gpus_per_node": 8,
+        },
+    }
+
+    tokenizer = MagicMock()
+    dataset = MagicMock()
+    dataset.__len__ = MagicMock(return_value=10)
+
+    # Mock everything we don't need to test
+    with (
+        patch("nemo_rl.algorithms.grpo.Logger") as mock_logger,
+        patch("nemo_rl.algorithms.grpo.CheckpointManager") as mock_checkpointer,
+        patch("nemo_rl.algorithms.grpo.StatefulDataLoader"),
+        pytest.raises(
+            AssertionError,
+            match="policy.generation.colocated.resources.gpus_per_node must be explicitly set",
+        ),
+    ):
+        # Configure mocks to skip checkpoint loading
+        mock_checkpointer.return_value.get_latest_checkpoint_path.return_value = None
+        setup(master_config, tokenizer, dataset, None)
+
+
+def test_noncolocated_inference_requires_explicit_gpus_per_node_multi_node():
+    """Test that non-colocated inference requires explicit gpus_per_node when policy_nodes>1."""
+    from unittest.mock import MagicMock, patch
+
+    from nemo_rl.algorithms.grpo import setup
+
+    # Create minimal config - only what's needed before the validation we're testing
+    master_config = {
+        "policy": {
+            "generation": {
+                "backend": "vllm",
+                "colocated": {
+                    "enabled": False,  # Non-colocated
+                    "resources": {
+                        "gpus_per_node": None,  # This should trigger error
+                        "num_nodes": 1,  # Use 1 node for inference
+                    },
+                },
+            },
+        },
+        "loss_fn": {},  # Config extraction requires this key
+        "env": {},  # Config extraction requires this key
+        "grpo": {
+            "seed": 42,
+            "num_prompts_per_step": 1,
+            "val_period": 0,
+            "val_at_start": False,
+            "use_dynamic_sampling": False,
+            "batch_multiplier": 1,
+        },
+        "data": {"shuffle": False, "num_workers": 1},
+        "logger": {},  # Config extraction requires this key
+        "checkpointing": {},  # Config extraction requires this key
+        "cluster": {
+            "num_nodes": 2,  # Multi-node, so policy_nodes=1 after subtracting inference
+            "gpus_per_node": 8,
+        },
+    }
+
+    tokenizer = MagicMock()
+    dataset = MagicMock()
+    dataset.__len__ = MagicMock(return_value=10)
+
+    # Mock everything we don't need to test
+    with (
+        patch("nemo_rl.algorithms.grpo.Logger") as mock_logger,
+        patch("nemo_rl.algorithms.grpo.CheckpointManager") as mock_checkpointer,
+        patch("nemo_rl.algorithms.grpo.StatefulDataLoader"),
+        pytest.raises(
+            AssertionError,
+            match="policy.generation.colocated.resources.gpus_per_node must be explicitly set",
+        ),
+    ):
+        # Configure mocks to skip checkpoint loading
+        mock_checkpointer.return_value.get_latest_checkpoint_path.return_value = None
+        setup(master_config, tokenizer, dataset, None)
+
+
+@pytest.fixture
+def mock_grpo_components():
+    # Create mock components
+    policy = MagicMock()
+    policy.train.return_value = {
+        "loss": torch.tensor(0.5),
+        "grad_norm": torch.tensor(1.0),
+        "all_mb_metrics": {
+            "loss": [0.5],
+            "policy_gradient_loss": [0.3],
+            "value_loss": [0.2],
+            "global_valid_toks": [10],
+            "token_mult_prob_error": [
+                1.0
+            ],  # Must be <= 1.05 to avoid logging extra plots
+            "gen_kl_error": [0.0001],
+        },
+    }
+    policy.generate.return_value = {
+        "output_ids": torch.randint(0, 100, (2, 20)),
+        "generation_lengths": torch.tensor([10, 15]),
+        "unpadded_sequence_lengths": torch.tensor([12, 18]),
+        "logprobs": torch.randn(2, 20),
+    }
+    policy.prepare_for_training.return_value = None
+    # Mock sharding annotations for async GRPO
+    policy.sharding_annotations.get_axis_size.return_value = 1  # data_parallel size
+
+    # Create mock batch with proper structure
+    mock_batch = BatchedDataDict[DatumSpec](
+        {
+            "message_log": [
+                [
+                    {
+                        "role": "user",
+                        "content": "test",
+                        "token_ids": torch.tensor([1, 2, 3]),
+                    },
+                ]
+            ],
+            "task_name": ["math"],
+            "extra_env_info": [{}],
+            "loss_multiplier": torch.tensor([1.0]),
+            "idx": torch.tensor([0]),
+            "length": torch.tensor([3]),  # Add length field for GRPO
+            "total_reward": torch.tensor(
+                [1.0]
+            ),  # Add total_reward for rollout processing
+        }
+    )
+
+    # Create mock dataloader with 10 batches
+    train_dataloader = MagicMock(spec=StatefulDataLoader)
+
+    def train_iter(self):
+        return iter([mock_batch] * 10)
+
+    train_dataloader.__iter__ = train_iter
+    train_dataloader.__len__ = MagicMock(return_value=10)
+
+    val_dataloader = MagicMock(spec=StatefulDataLoader)
+
+    def val_iter(self):
+        return iter([mock_batch] * 10)
+
+    val_dataloader.__iter__ = val_iter
+    val_dataloader.__len__ = MagicMock(return_value=10)
+
+    tokenizer = MagicMock()
+    tokenizer.pad_token_id = 0
+
+    loss_fn = ClippedPGLossFn(
+        {
+            "reference_policy_kl_penalty": 0.01,
+            "reference_policy_kl_type": "k3",
+            "kl_input_clamp_value": 20.0,
+            "kl_output_clamp_value": 10.0,
+            "ratio_clip_min": 0.8,
+            "ratio_clip_max": 1.2,
+            "ratio_clip_c": 1.0,
+            "use_on_policy_kl_approximation": False,
+            "use_importance_sampling_correction": False,
+            "truncated_importance_sampling_ratio": None,
+            "sequence_level_importance_ratios": False,
+            "token_level_loss": True,
+        }
+    )
+    logger = MagicMock()
+    checkpointer = MagicMock()
+
+    # Create mock environment
+    task_to_env = {"math": MagicMock()}
+    val_task_to_env = {"math": MagicMock()}
+
+    # Mock environment return values
+    for env in [task_to_env["math"], val_task_to_env["math"]]:
+        env.step.return_value = (
+            [{"role": "environment", "content": "correct"}],  # observations
+            [{}],  # metadata
+            [[]],  # next_stop_strings
+            [1.0],  # rewards
+            [True],  # terminateds
+            [None],  # answers
+        )
+        env.global_post_process_and_metrics.return_value = (mock_batch, {})
+
+    # Create mock master config
+    master_config = {
+        "grpo": {
+            "max_num_steps": 5,
+            "max_num_epochs": 2,
+            "num_prompts_per_step": 1,
+            "num_generations_per_prompt": 1,
+            "max_rollout_turns": 1,
+            "val_period": 100,
+            "val_batch_size": 1,
+            "val_at_start": False,
+            "max_val_samples": 10,
+            "seed": 42,
+            "advantage_normalization": "global",
+            "use_leave_one_out_baseline": False,
+            "normalize_rewards": False,
+            "overlong_filtering": False,
+            "reward_scaling": {"enabled": False},
+            "reward_shaping": {"enabled": False},
+            "use_dynamic_sampling": False,
+            "async_grpo": {
+                "enabled": False,
+                "max_trajectory_age_steps": 1,
+            },
+        },
+        "policy": {
+            "train_global_batch_size": 1,
+            "train_micro_batch_size": 1,
+            "max_total_sequence_length": 2048,
+            "make_sequence_length_divisible_by": 1,
+            "generation": {
+                "backend": "vllm",
+                "colocated": {"enabled": True},
+                "vllm_cfg": {"async_engine": True},  # Support async mode
+            },
+        },
+        "loss_fn": {
+            "use_importance_sampling_correction": True,  # Required for async mode
+        },
+        "checkpointing": {
+            "enabled": False,
+            "checkpoint_must_save_by": None,
+            "save_period": 10,
+        },
+        "cluster": {
+            "num_nodes": 1,
+            "gpus_per_node": 2,
+        },
+        "logger": {
+            "num_val_samples_to_print": 5,
+        },
+    }
+
+    return {
+        "policy": policy,
+        "train_dataloader": train_dataloader,
+        "val_dataloader": val_dataloader,
+        "tokenizer": tokenizer,
+        "loss_fn": loss_fn,
+        "logger": logger,
+        "checkpointer": checkpointer,
+        "task_to_env": task_to_env,
+        "val_task_to_env": val_task_to_env,
+        "master_config": master_config,
+    }
+
+
+@pytest.mark.parametrize("train_func", [grpo_train, async_grpo_train])
+def test_grpo_exit_on_max_steps(mock_grpo_components, train_func):
+    """Test that GRPO training loop exits when max_num_steps is reached"""
+    # Set max steps to 12
+    mock_grpo_components["master_config"]["grpo"]["max_num_steps"] = 12
+    grpo_save_state = _default_grpo_save_state()
+
+    # Async GRPO requires non-colocated inference
+    if train_func == async_grpo_train:
+        mock_grpo_components["master_config"]["policy"]["generation"]["colocated"][
+            "enabled"
+        ] = False
+
+    # Prepare mock data
+    mock_rollout_metrics = {
+        "mean_gen_tokens_per_sample": 10.0,
+        "max_gen_tokens": 20,
+        "min_gen_tokens": 5,
+    }
+    mock_batch = next(iter(mock_grpo_components["train_dataloader"]))
+
+    # Use our helper to mock async infrastructure if needed
+    if train_func == async_grpo_train:
+        with mock_async_grpo_infrastructure(mock_batch, mock_rollout_metrics):
+            train_func(
+                mock_grpo_components["policy"],
+                None,  # policy_generation
+                mock_grpo_components["train_dataloader"],
+                mock_grpo_components["val_dataloader"],
+                mock_grpo_components["tokenizer"],
+                mock_grpo_components["loss_fn"],
+                mock_grpo_components["task_to_env"],
+                mock_grpo_components["val_task_to_env"],
+                mock_grpo_components["logger"],
+                mock_grpo_components["checkpointer"],
+                grpo_save_state,
+                mock_grpo_components["master_config"],
+            )
+    else:
+        # For sync grpo_train, just mock the rollout functions
+        with patch(
+            "nemo_rl.algorithms.grpo.run_multi_turn_rollout",
+            return_value=(mock_batch, mock_rollout_metrics),
+        ):
+            with patch(
+                "nemo_rl.algorithms.grpo.run_async_multi_turn_rollout",
+                return_value=(mock_batch, mock_rollout_metrics),
+            ):
+                train_func(
+                    mock_grpo_components["policy"],
+                    None,  # policy_generation
+                    mock_grpo_components["train_dataloader"],
+                    mock_grpo_components["val_dataloader"],
+                    mock_grpo_components["tokenizer"],
+                    mock_grpo_components["loss_fn"],
+                    mock_grpo_components["task_to_env"],
+                    mock_grpo_components["val_task_to_env"],
+                    mock_grpo_components["logger"],
+                    mock_grpo_components["checkpointer"],
+                    grpo_save_state,
+                    mock_grpo_components["master_config"],
+                )
+
+    # Verify we trained for exactly 12 steps
+    assert mock_grpo_components["policy"].train.call_count == 12
+
+
+@pytest.mark.parametrize(
+    "train_func", [grpo_train]
+)  # Only test sync version for epochs (async uses steps)
+def test_grpo_exit_on_max_epochs(mock_grpo_components, train_func):
+    """Test that GRPO training loop exits when max_num_epochs is reached"""
+    # Set max epochs to 2 and max steps to a large number
+    mock_grpo_components["master_config"]["grpo"]["max_num_epochs"] = 2
+    mock_grpo_components["master_config"]["grpo"]["max_num_steps"] = 100
+
+    grpo_save_state = _default_grpo_save_state()
+
+    # Mock rollout functions to return proper metrics
+    mock_rollout_metrics = {
+        "mean_gen_tokens_per_sample": 10.0,
+        "max_gen_tokens": 20,
+        "min_gen_tokens": 5,
+    }
+
+    # Get a mock batch to return
+    mock_batch = next(iter(mock_grpo_components["train_dataloader"]))
+
+    with patch("nemo_rl.algorithms.grpo.run_multi_turn_rollout") as mock_rollout:
+        mock_rollout.return_value = (mock_batch, mock_rollout_metrics)
+
+        with patch(
+            "nemo_rl.algorithms.grpo.run_async_multi_turn_rollout"
+        ) as mock_async_rollout:
+            mock_async_rollout.return_value = (mock_batch, mock_rollout_metrics)
+
+            # Run training
+            train_func(
+                mock_grpo_components["policy"],
+                None,  # policy_generation
+                mock_grpo_components["train_dataloader"],
+                mock_grpo_components["val_dataloader"],
+                mock_grpo_components["tokenizer"],
+                mock_grpo_components["loss_fn"],
+                mock_grpo_components["task_to_env"],
+                mock_grpo_components["val_task_to_env"],
+                mock_grpo_components["logger"],
+                mock_grpo_components["checkpointer"],
+                grpo_save_state,
+                mock_grpo_components["master_config"],
+            )
+
+    # Verify we trained for exactly two epochs (20 batches)
+    assert mock_grpo_components["policy"].train.call_count == 20
+
+
+@pytest.mark.parametrize("train_func", [grpo_train, async_grpo_train])
+def test_grpo_exit_on_timeout(mock_grpo_components, train_func, capsys):
+    """Test that GRPO training loop exits when timeout is reached"""
+    # Set max steps and epochs to large numbers
+    mock_grpo_components["master_config"]["grpo"]["max_num_steps"] = 100
+    mock_grpo_components["master_config"]["grpo"]["max_num_epochs"] = 10
+    grpo_save_state = _default_grpo_save_state()
+
+    # Async GRPO requires non-colocated inference
+    if train_func == async_grpo_train:
+        mock_grpo_components["master_config"]["policy"]["generation"]["colocated"][
+            "enabled"
+        ] = False
+
+    # Prepare mock data
+    mock_rollout_metrics = {
+        "mean_gen_tokens_per_sample": 10.0,
+        "max_gen_tokens": 20,
+        "min_gen_tokens": 5,
+    }
+    mock_batch = next(iter(mock_grpo_components["train_dataloader"]))
+
+    # Mock TimeoutChecker to return False for first 7 checks, then True (timeout)
+    with patch("nemo_rl.algorithms.grpo.TimeoutChecker") as mock_timeout_class:
+        mock_timeout_instance = MagicMock()
+        check_results = [False] * 7 + [True]
+        mock_timeout_instance.check_save.side_effect = check_results
+        mock_timeout_class.return_value = mock_timeout_instance
+
+        # Use our helper for async, or simple mocking for sync
+        if train_func == async_grpo_train:
+            with mock_async_grpo_infrastructure(mock_batch, mock_rollout_metrics):
+                train_func(
+                    mock_grpo_components["policy"],
+                    None,  # policy_generation
+                    mock_grpo_components["train_dataloader"],
+                    mock_grpo_components["val_dataloader"],
+                    mock_grpo_components["tokenizer"],
+                    mock_grpo_components["loss_fn"],
+                    mock_grpo_components["task_to_env"],
+                    mock_grpo_components["val_task_to_env"],
+                    mock_grpo_components["logger"],
+                    mock_grpo_components["checkpointer"],
+                    grpo_save_state,
+                    mock_grpo_components["master_config"],
+                )
+        else:
+            with patch(
+                "nemo_rl.algorithms.grpo.run_multi_turn_rollout",
+                return_value=(mock_batch, mock_rollout_metrics),
+            ):
+                with patch(
+                    "nemo_rl.algorithms.grpo.run_async_multi_turn_rollout",
+                    return_value=(mock_batch, mock_rollout_metrics),
+                ):
+                    train_func(
+                        mock_grpo_components["policy"],
+                        None,  # policy_generation
+                        mock_grpo_components["train_dataloader"],
+                        mock_grpo_components["val_dataloader"],
+                        mock_grpo_components["tokenizer"],
+                        mock_grpo_components["loss_fn"],
+                        mock_grpo_components["task_to_env"],
+                        mock_grpo_components["val_task_to_env"],
+                        mock_grpo_components["logger"],
+                        mock_grpo_components["checkpointer"],
+                        grpo_save_state,
+                        mock_grpo_components["master_config"],
+                    )
+
+        # Verify training stopped at 8 steps (when check_save returned True)
+        assert mock_grpo_components["policy"].train.call_count == 8
+
+        # Verify the timeout message was printed and training actually stopped
+        captured = capsys.readouterr()
+        output_lines = captured.out.strip().split("\n")
+
+        # Find the timeout message
+        timeout_line_idx = None
+        for i, line in enumerate(output_lines):
+            if "Timeout has been reached, stopping training early" in line:
+                timeout_line_idx = i
+                break
+
+        assert timeout_line_idx is not None, "Timeout message not found in output"
+
+        # Check what comes after the timeout message
+        remaining_lines = output_lines[timeout_line_idx + 1 :]
+
+        # For async_grpo_train, we expect cleanup messages in the finally block
+        if train_func.__name__ == "async_grpo_train":
+            cleanup_found = any(
+                "Stopping trajectory collection" in line
+                or "Async GRPO training complete" in line
+                for line in remaining_lines
+            )
+            assert cleanup_found, (
+                "Expected cleanup messages after timeout in async mode"
+            )
+
+        # Verify no new epoch/step started after timeout
+        for line in remaining_lines:
+            assert "Epoch" not in line or "Epoch 1/10" in line, (
+                f"Training continued to next epoch after timeout: {line}"
+            )
+            assert not (line.startswith("Step ") and "Step 9" in line), (
+                f"Training continued to next step after timeout: {line}"
+            )
+
+
+# ============================================================================
+# Tests for normalize_advantages_with_epsilon function
+# ============================================================================
+
+
+def test_normalize_advantages_with_epsilon_basic():
+    """Test basic functionality of normalize_advantages_with_epsilon."""
+    # Test case with normal values
+    advantages = torch.tensor([[2.0], [4.0], [6.0]])
+    std = torch.tensor([1.0, 2.0, 3.0])
+    epsilon = 1e-6
+
+    result = normalize_advantages_with_epsilon(advantages, std, epsilon)
+
+    expected = torch.tensor([[2.0], [2.0], [2.0]])
+    assert torch.allclose(result, expected, rtol=1e-5)
+
+
+def test_normalize_advantages_with_epsilon_zero_std():
+    """Test normalize_advantages_with_epsilon when std contains zeros."""
+    advantages = torch.tensor([[1.0], [2.0], [3.0]])
+    std = torch.tensor([0.0, 1.0, 0.0])  # Zero std for indices 0 and 2
+    epsilon = 1e-6
+
+    result = normalize_advantages_with_epsilon(advantages, std, epsilon)
+
+    # When std=0 AND advantage!=0, normalization is skipped (advantages unchanged)
+    # When std>0, normal normalization occurs
+    expected = torch.tensor(
+        [[1.0], [2.0], [3.0]]
+    )  # Samples 0,2 unchanged; sample 1 normalized
+    assert torch.allclose(result, expected, rtol=1e-5)
+
+
+def test_normalize_advantages_with_epsilon_all_zero_std():
+    """Test normalize_advantages_with_epsilon when all std values are zero."""
+    advantages = torch.tensor([[1.5], [2.5], [3.5]])
+    std = torch.tensor([0.0, 0.0, 0.0])
+    epsilon = 1e-8
+
+    # Save expected values BEFORE calling function (since it modifies in-place)
+    expected = advantages.clone()
+
+    result = normalize_advantages_with_epsilon(advantages, std, epsilon)
+
+    # When std=0 AND advantage!=0, normalization is skipped (all unchanged)
+    assert torch.allclose(result, expected, rtol=1e-5)
+
+
+def test_normalize_advantages_with_epsilon_tensor_shapes():
+    """Test normalize_advantages_with_epsilon with different tensor shapes."""
+    # Test with batch size 1
+    advantages = torch.tensor([[5.0]])
+    std = torch.tensor([2.0])
+    result = normalize_advantages_with_epsilon(advantages, std)
+    expected = torch.tensor([[2.5]])
+    assert torch.allclose(result, expected, rtol=1e-5)
+
+    # Test with larger batch
+    batch_size = 10
+    advantages = torch.ones(batch_size, 1) * 3.0
+    std = torch.ones(batch_size) * 1.5
+    result = normalize_advantages_with_epsilon(advantages, std)
+    expected = torch.ones(batch_size, 1) * 2.0
+    assert torch.allclose(result, expected, rtol=1e-5)
+
+
+def test_normalize_advantages_with_epsilon_negative_advantages():
+    """Test normalize_advantages_with_epsilon with negative advantages."""
+    advantages = torch.tensor([[-2.0], [3.0], [-1.5]])
+    std = torch.tensor([1.0, 1.5, 0.5])
+
+    result = normalize_advantages_with_epsilon(advantages, std)
+
+    expected = torch.tensor([[-2.0], [2.0], [-3.0]])
+    assert torch.allclose(result, expected, rtol=1e-5)
+
+
+def test_normalize_advantages_with_zero_std_from_leave_one_out():
+    """Test that zero std (from leave-one-out baseline) is handled gracefully by skipping normalization."""
+    # Simulate the leave-one-out case: rewards [1.0, 0.0, 0.0, 0.0]
+    # Sample 0 has baseline from [0, 0, 0] -> std=0, advantage=1.0
+    # Samples 1-3 have baseline from [1, 0, 0] -> std≈0.577, advantage≈-0.333
+    advantages = torch.tensor([[1.0], [-0.333], [-0.333], [-0.333]])
+    std = torch.tensor([0.0, 0.577, 0.577, 0.577])
+    epsilon = 1e-6
+
+    # Compute expected values BEFORE calling function (since it modifies in-place)
+    expected_sample_0 = advantages[0].clone()
+    expected_normalized = advantages[1:].clone() / (std[1:].unsqueeze(-1) + epsilon)
+
+    result = normalize_advantages_with_epsilon(advantages, std, epsilon)
+
+    # Sample 0: std=0 -> advantage unchanged (skip normalization)
+    assert torch.allclose(result[0], expected_sample_0, rtol=1e-5)
+
+    # Samples 1-3: std>0 -> normalized with epsilon
+    assert torch.allclose(result[1:], expected_normalized, rtol=1e-5)
+
+
+def test_normalize_advantages_with_zero_std_and_zero_advantage():
+    """Test that zero std with zero advantage is left unchanged."""
+    advantages = torch.tensor([[0.0], [1.0], [0.0]])
+    std = torch.tensor([0.0, 0.0, 1.0])
+    epsilon = 1e-6
+
+    # Compute expected values BEFORE calling function (since it modifies in-place)
+    expected_sample_0 = advantages[0].clone()
+    expected_sample_1 = advantages[1].clone()
+    expected_sample_2 = advantages[2].clone() / (std[2] + epsilon)
+
+    result = normalize_advantages_with_epsilon(advantages, std, epsilon)
+
+    # Sample 0: std=0, advantage=0 -> unchanged (skip normalization)
+    assert torch.allclose(result[0], expected_sample_0, rtol=1e-5)
+
+    # Sample 1: std=0, advantage!=0 -> unchanged (skip normalization)
+    assert torch.allclose(result[1], expected_sample_1, rtol=1e-5)
+
+    # Sample 2: std>0 -> normalize with epsilon
+    assert torch.allclose(result[2], expected_sample_2, rtol=1e-5)
+
+
+def test_normalize_advantages_with_small_nonzero_std():
+    """Test that small but non-zero std values still get normalized (no threshold)."""
+    advantages = torch.tensor([[2.0], [3.0], [-1.0]])
+    std = torch.tensor([0.001, 0.01, 0.0001])  # All small but non-zero
+
+    # Compute expected values BEFORE calling function (since it modifies in-place)
+    expected = advantages.clone() / (std.unsqueeze(-1) + 1e-6)
+
+    result = normalize_advantages_with_epsilon(advantages, std)
+
+    # All should be normalized since std > 0
+    assert torch.allclose(result, expected, rtol=1e-5)
diff --git a/tests/unit/algorithms/test_loss_functions.py b/tests/unit/algorithms/test_loss_functions.py
index 65ed834625..14c4e53880 100644
--- a/tests/unit/algorithms/test_loss_functions.py
+++ b/tests/unit/algorithms/test_loss_functions.py
@@ -11,17 +11,38 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import itertools
+from copy import deepcopy
+
 import pytest
 import torch
 
 from nemo_rl.algorithms.loss_functions import (
+    ClippedPGLossConfig,
     ClippedPGLossFn,
+    DistillationLossFn,
     DPOLossFn,
     NLLLoss,
 )
-from nemo_rl.algorithms.utils import masked_mean
+from nemo_rl.algorithms.utils import calculate_kl, masked_mean
 from nemo_rl.distributed.batched_data_dict import BatchedDataDict
 
+basic_pg_loss_test_config: ClippedPGLossConfig = {
+    "ratio_clip_min": 0.2,
+    "ratio_clip_max": 0.2,
+    "ratio_clip_c": None,
+    "disable_ppo_ratio": False,
+    "reference_policy_kl_penalty": 0.0,  # Disable KL
+    "reference_policy_kl_type": "k3",
+    "kl_input_clamp_value": 20.0,
+    "kl_output_clamp_value": 10.0,
+    "use_on_policy_kl_approximation": False,
+    "use_importance_sampling_correction": False,
+    "truncated_importance_sampling_ratio": None,  # Disable TIS
+    "sequence_level_importance_ratios": False,
+    "token_level_loss": True,
+}
+
 
 def setup_dpo_loss_test_data(vocab_size=16, batch_size=1):
     seq_len = 4
@@ -371,41 +392,45 @@ def _setup_clipped_pg_test_data(batch_size=1, seq_len=4, vocab_size=8, device="c
         }
     )
     # Return seq_len and vocab_size needed by tests
-    return data, seq_len, vocab_size
+    return data, batch_size, seq_len, vocab_size
 
 
 # Helper to create logits that yield specific target log probs after log_softmax
-def _create_exact_logits(target_curr_lp_masked, input_ids, seq_len, vocab_size, device):
+def _create_exact_logits(
+    target_curr_lp_masked, input_ids, batch_size, seq_len, vocab_size, device
+):
     """Constructs logits such that log_softmax results in target_curr_lp_masked."""
     dummy_logits = torch.full(
-        (1, seq_len, vocab_size), -100.0, device=device
+        (batch_size, seq_len, vocab_size), -100.0, device=device
     )  # Start very low
 
     # Loss fn uses logits[:, :-1] and gathers based on next_tokens = input_ids[:, 1:]
     # We need to set logits for indices i=0..S-2 of the sliced logits tensor.
     # These correspond to target logprobs at indices 0..S-2 of target_curr_lp_masked.
     num_effective_pos = target_curr_lp_masked.shape[1]
-    for i in range(num_effective_pos):
+    for batch_idx, i in itertools.product(range(batch_size), range(num_effective_pos)):
         logit_idx = i  # Index in the sliced logits tensor (dummy_logits[:, 0:S-1, :])
         data_idx = i + 1  # Index in the original input_ids to find the target token
 
-        target_token_id = input_ids[0, data_idx].item()
+        target_token_id = input_ids[batch_idx, data_idx].item()
         # Keep target_lp as a 0-dim tensor for torch ops
-        target_lp = target_curr_lp_masked[0, i]
+        target_lp = target_curr_lp_masked[batch_idx, i]
 
         # Handle target_lp = 0 case separately
         if torch.isclose(target_lp, torch.tensor(0.0, device=device)):
-            dummy_logits[0, logit_idx, target_token_id] = 100.0  # Large positive logit
+            dummy_logits[batch_idx, logit_idx, target_token_id] = (
+                100.0  # Large positive logit
+            )
         elif target_lp < 0:
             # Set target token logit to 0
-            dummy_logits[0, logit_idx, target_token_id] = 0.0
+            dummy_logits[batch_idx, logit_idx, target_token_id] = 0.0
             # Set one distractor token logit using the formula
             distractor_token_id = (target_token_id + 1) % vocab_size
             # Ensure distractor isn't same as target if vocab_size=1 (edge case)
             if distractor_token_id == target_token_id:
                 distractor_token_id = (target_token_id + 2) % vocab_size
             distractor_logit = torch.log(torch.exp(-target_lp) - 1.0)
-            dummy_logits[0, logit_idx, distractor_token_id] = distractor_logit
+            dummy_logits[batch_idx, logit_idx, distractor_token_id] = distractor_logit
         else:  # target_lp > 0 is not supported by this method
             raise ValueError(
                 "Target log probability must be negative or zero for this construction"
@@ -420,19 +445,9 @@ def test_clipped_pg_loss_ppo_clipping():
         pytest.skip("No GPU available")
 
     device = "cuda"
-    data, seq_len, vocab_size = _setup_clipped_pg_test_data(device=device)
-
-    ratio_clip = 0.2
-    cfg = {
-        "ratio_clip_min": ratio_clip,
-        "ratio_clip_max": ratio_clip,
-        "ratio_clip_c": None,
-        "reference_policy_kl_penalty": 0.0,  # Disable KL
-        "disable_ppo_ratio": False,
-        "use_on_policy_kl_approximation": False,
-        "use_importance_sampling_correction": False,
-        "token_level_loss": True,
-    }
+    data, batch_size, seq_len, vocab_size = _setup_clipped_pg_test_data(device=device)
+
+    cfg = basic_pg_loss_test_config
     loss_fn = ClippedPGLossFn(cfg)
 
     adv_masked = torch.tensor([[1.0, -1.0, 2.0]], device=device)
@@ -456,7 +471,7 @@ def test_clipped_pg_loss_ppo_clipping():
     )
 
     ratios_clamped = torch.clamp(
-        ratios, 1.0 - ratio_clip, 1.0 + ratio_clip
+        ratios, 1.0 - cfg["ratio_clip_min"], 1.0 + cfg["ratio_clip_max"]
     )  # [0.8, 1.0, 1.2]
     assert torch.allclose(
         ratios_clamped, torch.tensor([[0.8, 1.0, 1.2]], device=device), rtol=1e-3
@@ -486,7 +501,7 @@ def test_clipped_pg_loss_ppo_clipping():
 
     input_ids = data["input_ids"]
     dummy_logits = _create_exact_logits(
-        curr_lp_masked, input_ids, seq_len, vocab_size, device
+        curr_lp_masked, input_ids, batch_size, seq_len, vocab_size, device
     )
 
     actual_loss, _ = loss_fn(
@@ -505,18 +520,12 @@ def test_clipped_pg_loss_reinforce_mode():
         pytest.skip("No GPU available")
 
     device = "cuda"
-    data, seq_len, vocab_size = _setup_clipped_pg_test_data(device=device)
-
-    cfg = {
-        "disable_ppo_ratio": True,
-        "reference_policy_kl_penalty": 0.0,
-        "ratio_clip_min": 0.0,  # Placeholder, ignored
-        "ratio_clip_max": 0.0,  # Placeholder, ignored
-        "ratio_clip_c": None,
-        "use_on_policy_kl_approximation": False,
-        "use_importance_sampling_correction": False,
-        "token_level_loss": True,
-    }
+    data, batch_size, seq_len, vocab_size = _setup_clipped_pg_test_data(device=device)
+
+    cfg = deepcopy(basic_pg_loss_test_config)
+    cfg["disable_ppo_ratio"] = True
+    cfg["ratio_clip_min"] = 0.0
+    cfg["ratio_clip_max"] = 0.0
     loss_fn = ClippedPGLossFn(cfg)
 
     adv_masked = torch.tensor([[1.0, -1.0, 2.0]], device=device)
@@ -539,7 +548,7 @@ def test_clipped_pg_loss_reinforce_mode():
 
     input_ids = data["input_ids"]
     dummy_logits = _create_exact_logits(
-        curr_lp_masked, input_ids, seq_len, vocab_size, device
+        curr_lp_masked, input_ids, batch_size, seq_len, vocab_size, device
     )
 
     actual_loss, _ = loss_fn(
@@ -553,6 +562,47 @@ def test_clipped_pg_loss_reinforce_mode():
     torch.testing.assert_close(actual_loss, expected_loss)
 
 
+@pytest.mark.parametrize("kl_type", ["k1", "k2", "k3"])
+def test_calculate_kl(kl_type):
+    """Tests KL calculations."""
+    if not torch.cuda.is_available():
+        pytest.skip("No GPU available")
+
+    device = "cuda"
+    logprobs = torch.tensor([[-1.0, -1.0, -1.0]], device=device)
+    logprobs_reference = torch.tensor([[-0.0, -15.0, -30.0]], device=device)
+
+    # test un-clamped KL
+    expected_kl = {
+        "k1": torch.tensor([[-1.0, 14.0, 29.0]], device=device),
+        "k2": torch.tensor([[0.5, 98.0, 420.5]], device=device),
+        "k3": torch.tensor([[0.7183, 13.0, 28.0]], device=device),
+    }
+    kl = calculate_kl(
+        logprobs=logprobs,
+        logprobs_reference=logprobs_reference,
+        kl_type=kl_type,
+        input_clamp_value=None,
+        output_clamp_value=None,
+    )
+    assert torch.allclose(kl, expected_kl[kl_type], rtol=1e-3)
+
+    # test clamped KL
+    expected_kl_clamped = {
+        "k1": torch.tensor([[-1.0, 10.0, 10.0]], device=device),
+        "k2": torch.tensor([[0.5, 10.0, 10.0]], device=device),
+        "k3": torch.tensor([[0.7183, 10.0, 10.0]], device=device),
+    }
+    kl_clamped = calculate_kl(
+        logprobs=logprobs,
+        logprobs_reference=logprobs_reference,
+        kl_type=kl_type,
+        input_clamp_value=20.0,
+        output_clamp_value=10.0,
+    )
+    assert torch.allclose(kl_clamped, expected_kl_clamped[kl_type], rtol=1e-3)
+
+
 # Simplified KL Penalty Test using original Loss
 def test_clipped_pg_loss_kl_penalty():
     """Tests KL penalty calculations directly."""
@@ -560,20 +610,11 @@ def test_clipped_pg_loss_kl_penalty():
         pytest.skip("No GPU available")
 
     device = "cuda"
-    data, seq_len, vocab_size = _setup_clipped_pg_test_data(device=device)
+    data, batch_size, seq_len, vocab_size = _setup_clipped_pg_test_data(device=device)
 
     # --- Test Setup ---
-    kl_beta = 0.1
-    cfg = {
-        "reference_policy_kl_penalty": kl_beta,
-        "ratio_clip_min": 0.2,
-        "ratio_clip_max": 0.2,
-        "ratio_clip_c": None,
-        "disable_ppo_ratio": False,
-        "use_on_policy_kl_approximation": False,
-        "use_importance_sampling_correction": False,
-        "token_level_loss": True,
-    }
+    cfg = deepcopy(basic_pg_loss_test_config)
+    cfg["reference_policy_kl_penalty"] = 0.1
     loss_fn = ClippedPGLossFn(cfg)
 
     adv_masked = torch.tensor([[0.0, 0.0, 0.0]], device=device)
@@ -602,12 +643,12 @@ def test_clipped_pg_loss_kl_penalty():
         expected_kl_mean, torch.tensor(0.362, device=device), rtol=1e-3
     )
 
-    expected_loss = kl_beta * expected_kl_mean  # 0.0362
+    expected_loss = cfg["reference_policy_kl_penalty"] * expected_kl_mean  # 0.0362
     assert torch.allclose(expected_loss, torch.tensor(0.0362, device=device), rtol=1e-3)
 
     input_ids = data["input_ids"]
     dummy_logits = _create_exact_logits(
-        curr_lp_masked, input_ids, seq_len, vocab_size, device
+        curr_lp_masked, input_ids, batch_size, seq_len, vocab_size, device
     )
 
     actual_loss, _ = loss_fn(
@@ -632,7 +673,7 @@ def test_clipped_pg_loss_masking():
     device = "cuda"
     # Use original loss function for masking tests, as it involves interactions
     # that the Testable class might obscure slightly.
-    data, seq_len, vocab_size = _setup_clipped_pg_test_data(
+    data, batch_size, seq_len, vocab_size = _setup_clipped_pg_test_data(
         batch_size=batch_size, seq_len=seq_len, device=device
     )
     # Need some realistic-ish logits and logprobs for masking test
@@ -645,16 +686,8 @@ def test_clipped_pg_loss_masking():
     # Make advantages non-zero
     data["advantages"] = torch.randn_like(data["advantages"]) + 1.0
 
-    cfg = {
-        "ratio_clip_min": 0.2,
-        "ratio_clip_max": 0.2,
-        "ratio_clip_c": None,
-        "reference_policy_kl_penalty": 0.1,
-        "disable_ppo_ratio": False,
-        "use_on_policy_kl_approximation": False,
-        "use_importance_sampling_correction": False,
-        "token_level_loss": True,
-    }
+    cfg = deepcopy(basic_pg_loss_test_config)
+    cfg["reference_policy_kl_penalty"] = 0.1
     loss_fn = ClippedPGLossFn(cfg)  # Use original loss fn
 
     # --- Test 1: Token Mask ---
@@ -734,20 +767,12 @@ def test_clipped_pg_loss_zero_mask():
         pytest.skip("No GPU available")
 
     device = "cuda"
-    data, seq_len, vocab_size = _setup_clipped_pg_test_data(device=device)
+    data, batch_size, seq_len, vocab_size = _setup_clipped_pg_test_data(device=device)
     # Need dummy logits
     dummy_logits = torch.randn(1, seq_len, vocab_size, device=device)
 
-    cfg = {
-        "ratio_clip_min": 0.2,
-        "ratio_clip_max": 0.2,
-        "ratio_clip_c": None,
-        "reference_policy_kl_penalty": 0.1,
-        "disable_ppo_ratio": False,
-        "use_on_policy_kl_approximation": False,
-        "use_importance_sampling_correction": False,
-        "token_level_loss": True,
-    }
+    cfg = deepcopy(basic_pg_loss_test_config)
+    cfg["reference_policy_kl_penalty"] = 0.1
     loss_fn = ClippedPGLossFn(cfg)  # Use original loss fn
 
     # Set token mask to all zeros
@@ -772,21 +797,11 @@ def test_clipped_pg_loss_on_policy_kl_importance_sampling():
         pytest.skip("No GPU available")
 
     device = "cuda"
-    data, seq_len, vocab_size = _setup_clipped_pg_test_data(device=device)
-
-    ratio_clip = 0.2
-    kl_beta = 0.1
-
-    cfg = {
-        "ratio_clip_min": ratio_clip,
-        "ratio_clip_max": ratio_clip,
-        "ratio_clip_c": None,
-        "reference_policy_kl_penalty": kl_beta,
-        "disable_ppo_ratio": False,
-        "use_on_policy_kl_approximation": True,
-        "use_importance_sampling_correction": True,
-        "token_level_loss": True,
-    }
+    data, batch_size, seq_len, vocab_size = _setup_clipped_pg_test_data(device=device)
+
+    cfg = deepcopy(basic_pg_loss_test_config)
+    cfg["use_on_policy_kl_approximation"] = True
+    cfg["use_importance_sampling_correction"] = True
     loss_fn = ClippedPGLossFn(cfg)
 
     adv_masked = torch.tensor([[1.0, -1.0, 2.0]], device=device)
@@ -823,7 +838,7 @@ def test_clipped_pg_loss_on_policy_kl_importance_sampling():
     )
 
     ratios_clamped = torch.clamp(
-        ratios, 1.0 - ratio_clip, 1.0 + ratio_clip
+        ratios, 1.0 - cfg["ratio_clip_min"], 1.0 + cfg["ratio_clip_max"]
     )  # [0.8, 1.0, 1.2]
     assert torch.allclose(
         ratios_clamped, torch.tensor([[0.8, 1.0, 1.2]], device=device), rtol=1e-3
@@ -897,7 +912,9 @@ def test_clipped_pg_loss_on_policy_kl_importance_sampling():
     expected_kl_mean = torch.mean(
         importance_weighted_kl_term_per_token
     )  # mean([0.09308, 0.0, 0.08855]) = 0.060543
-    expected_kl_loss = kl_beta * expected_kl_mean  # 0.1 * 0.060543 = 0.0060543
+    expected_kl_loss = (
+        cfg["reference_policy_kl_penalty"] * expected_kl_mean
+    )  # 0.1 * 0.060543 = 0.0060543
 
     expected_total_loss = (
         expected_actor_loss + expected_kl_loss
@@ -905,7 +922,7 @@ def test_clipped_pg_loss_on_policy_kl_importance_sampling():
 
     input_ids = data["input_ids"]
     dummy_logits = _create_exact_logits(
-        curr_lp_masked, input_ids, seq_len, vocab_size, device
+        curr_lp_masked, input_ids, batch_size, seq_len, vocab_size, device
     )
 
     actual_loss, _ = loss_fn(
@@ -917,6 +934,137 @@ def test_clipped_pg_loss_on_policy_kl_importance_sampling():
     torch.testing.assert_close(actual_loss, expected_total_loss, atol=1e-4, rtol=1e-3)
 
 
+@pytest.mark.parametrize("sequence_level_importance_ratios", [True, False])
+def test_clipped_pg_loss_on_policy_truncated_importance_sampling(
+    sequence_level_importance_ratios,
+):
+    """Tests PPO loss with truncated importance sampling enabled."""
+    if not torch.cuda.is_available():
+        pytest.skip("No GPU available")
+
+    device = "cuda"
+    data, batch_size, seq_len, vocab_size = _setup_clipped_pg_test_data(device=device)
+
+    cfg = deepcopy(basic_pg_loss_test_config)
+    cfg["use_importance_sampling_correction"] = True
+    cfg["truncated_importance_sampling_ratio"] = 0.8
+    if sequence_level_importance_ratios:
+        cfg["sequence_level_importance_ratios"] = True
+        cfg["token_level_loss"] = False
+    loss_fn = ClippedPGLossFn(cfg)
+
+    adv_masked = torch.tensor([[1.0, -1.0, 2.0]], device=device)
+    prev_lp_masked = torch.tensor([[-1.0, -1.0, -1.0]], device=device)
+    # approx log(0.5)-1, log(1)-1, log(1.5)-1
+    curr_lp_masked = torch.tensor([[-1.69315, -1.0, -0.59453]], device=device)
+    ref_lp_masked = torch.tensor([[-1.0, -1.0, -1.0]], device=device)
+    # for importance sampling
+    gen_lp_masked = torch.tensor([[-0.5, -1.5, -0.8]], device=device)
+
+    # Fill full tensors
+    data["advantages"][0, 1:] = adv_masked
+    data["prev_logprobs"][0, 1:] = prev_lp_masked
+    data["generation_logprobs"][0, 1:] = gen_lp_masked
+    data["reference_policy_logprobs"][0, 1:] = ref_lp_masked
+
+    # --- Hand Calculation ---
+
+    # sequence-level: [[0.9086, 0.9086, 0.9086]]
+    # token-level: [[0.5, 1.0, 1.5]]
+    if sequence_level_importance_ratios:
+        log_ratios = curr_lp_masked - prev_lp_masked
+        seq_log_ratios_mean = torch.mean(log_ratios, dim=-1).unsqueeze(-1)
+        ratios = seq_log_ratios_mean.exp().repeat(1, adv_masked.shape[1])
+    else:
+        ratios = torch.exp(curr_lp_masked - prev_lp_masked)
+
+    # sequence-level: [[0.9086, 0.9086, 0.9086]]
+    # token-level: [[0.8, 1.0, 1.2]]
+    clip_min = cfg["ratio_clip_min"]
+    clip_max = cfg["ratio_clip_max"]
+    ratios_clamped = torch.clamp(ratios, 1.0 - clip_min, 1.0 + clip_max)
+
+    # sequence-level: [[-0.9086, 0.9086, -1.8171]]
+    # token-level: [[-0.5, 1.0, -3.0]]
+    loss1 = -adv_masked * ratios
+
+    # sequence-level: [[-0.9086, 0.9086, -1.8171]]
+    # token-level: [[-0.8, 1.0, -2.4]]
+    loss2 = -adv_masked * ratios_clamped
+
+    # sequence-level: [[-0.9086, 0.9086, -1.8171]]
+    # token-level: [[-0.5, 1.0, -2.4]]
+    max_loss = torch.maximum(loss1, loss2)
+    if sequence_level_importance_ratios:
+        assert torch.allclose(
+            max_loss,
+            torch.tensor([[-0.9086, 0.9086, -1.8171]], device=device),
+            rtol=1e-3,
+        )
+    else:
+        assert torch.allclose(
+            max_loss,
+            torch.tensor([[-0.5, 1.0, -2.4]], device=device),
+            rtol=1e-3,
+        )
+
+    # sequence-level: [[0.8187]]
+    # token-level: [[0.6065, 1.6487, 0.8187]]
+    if sequence_level_importance_ratios:
+        actor_importance_weights = torch.exp(
+            (prev_lp_masked - gen_lp_masked).sum(dim=-1).unsqueeze(-1)
+        )
+    else:
+        actor_importance_weights = torch.exp(prev_lp_masked - gen_lp_masked)
+
+    # sequence-level: [[0.8000]]
+    # token-level: [[0.6065, 0.8000, 0.8000]]
+    truncated_actor_importance_weights = torch.clamp(
+        actor_importance_weights, max=cfg["truncated_importance_sampling_ratio"]
+    )
+
+    # sequence-level: [[-0.7268, 0.7268, -1.4537]]
+    # token-level: [[-0.3033, 0.8000, -1.9200]]
+    importance_weighted_max_loss = truncated_actor_importance_weights * max_loss
+    if sequence_level_importance_ratios:
+        assert torch.allclose(
+            importance_weighted_max_loss,
+            torch.tensor([[-0.7268, 0.7268, -1.4537]], device=device),
+            rtol=1e-3,
+        )
+    else:
+        assert torch.allclose(
+            importance_weighted_max_loss,
+            torch.tensor([[-0.3033, 0.8000, -1.9200]], device=device),
+            rtol=1e-3,
+        )
+
+    # sequence-level: -0.4846
+    # token-level: -0.4744
+    expected_loss = torch.mean(importance_weighted_max_loss)
+    if sequence_level_importance_ratios:
+        assert torch.allclose(
+            expected_loss, torch.tensor(-0.4846, device=device), rtol=1e-3
+        )
+    else:
+        assert torch.allclose(
+            expected_loss, torch.tensor(-0.4744, device=device), rtol=1e-3
+        )
+
+    input_ids = data["input_ids"]
+    dummy_logits = _create_exact_logits(
+        curr_lp_masked, input_ids, batch_size, seq_len, vocab_size, device
+    )
+
+    actual_loss, _ = loss_fn(
+        dummy_logits,
+        data,
+        global_valid_seqs=torch.sum(data["sample_mask"]),
+        global_valid_toks=torch.sum(data["sample_mask"] * data["token_mask"]),
+    )
+    torch.testing.assert_close(actual_loss, expected_loss, atol=1e-4, rtol=1e-3)
+
+
 def test_masked_mean_all_zeros():
     """Test masked_mean function with all zeros mask."""
     values = torch.tensor([1.0, 2.0, 3.0, 4.0])
@@ -954,20 +1102,10 @@ def test_clipped_pg_loss_dual_clip():
         pytest.skip("No GPU available")
 
     device = "cuda"
-    data, seq_len, vocab_size = _setup_clipped_pg_test_data(device=device)
-
-    ratio_clip = 0.2
-    ratio_clip_c = 3.0
-    cfg = {
-        "ratio_clip_min": ratio_clip,
-        "ratio_clip_max": ratio_clip,
-        "ratio_clip_c": ratio_clip_c,
-        "reference_policy_kl_penalty": 0.0,  # Disable KL
-        "disable_ppo_ratio": False,
-        "use_on_policy_kl_approximation": False,
-        "use_importance_sampling_correction": False,
-        "token_level_loss": True,
-    }
+    data, batch_size, seq_len, vocab_size = _setup_clipped_pg_test_data(device=device)
+
+    cfg = deepcopy(basic_pg_loss_test_config)
+    cfg["ratio_clip_c"] = 3.0
     loss_fn = ClippedPGLossFn(cfg)
 
     # Create test data with a mix of advantages: positive, slightly negative, strongly negative
@@ -991,7 +1129,7 @@ def test_clipped_pg_loss_dual_clip():
     # --- Hand Calculation ---
     # Actor Loss Calculation
     ratios_clamped = torch.clamp(
-        ratios, 1.0 - ratio_clip, 1.0 + ratio_clip
+        ratios, 1.0 - cfg["ratio_clip_min"], 1.0 + cfg["ratio_clip_max"]
     )  # [0.8, 1.0, 1.2]
     assert torch.allclose(
         ratios_clamped, torch.tensor([[0.8, 1.0, 1.2]], device=device), rtol=1e-3
@@ -1014,7 +1152,9 @@ def test_clipped_pg_loss_dual_clip():
     )
 
     # Dual clipping
-    loss3 = -adv_masked * ratio_clip_c  # -[1*3.0, -1*3.0, -4*3.0] = [-3.0, 3.0, 12.0]
+    loss3 = (
+        -adv_masked * cfg["ratio_clip_c"]
+    )  # -[1*3.0, -1*3.0, -4*3.0] = [-3.0, 3.0, 12.0]
     assert torch.allclose(
         loss3, torch.tensor([[-3.0, 3.0, 12.0]], device=device), rtol=1e-3
     )
@@ -1034,7 +1174,7 @@ def test_clipped_pg_loss_dual_clip():
 
     input_ids = data["input_ids"]
     dummy_logits = _create_exact_logits(
-        curr_lp_masked, input_ids, seq_len, vocab_size, device
+        curr_lp_masked, input_ids, batch_size, seq_len, vocab_size, device
     )
 
     actual_loss, _ = loss_fn(
@@ -1054,18 +1194,9 @@ def test_clipped_pg_loss_entropy():
         pytest.skip("No GPU available")
 
     device = "cuda"
-    data, seq_len, vocab_size = _setup_clipped_pg_test_data(device=device)
-
-    cfg = {
-        "ratio_clip_min": 0.2,
-        "ratio_clip_max": 0.2,
-        "ratio_clip_c": None,
-        "reference_policy_kl_penalty": 0.0,  # Disable KL for simplicity
-        "disable_ppo_ratio": False,
-        "use_on_policy_kl_approximation": False,
-        "use_importance_sampling_correction": False,  # This flag does not affect entropy calculation
-        "token_level_loss": True,
-    }
+    data, batch_size, seq_len, vocab_size = _setup_clipped_pg_test_data(device=device)
+
+    cfg = basic_pg_loss_test_config
     loss_fn = ClippedPGLossFn(cfg)
 
     # Log probs for 3 tokens (default token_mask is [0, 1, 1, 1], so 3 unmasked after slicing)
@@ -1092,7 +1223,7 @@ def test_clipped_pg_loss_entropy():
     )  # torch.mean because default mask applies to these 3 terms
 
     dummy_logits = _create_exact_logits(
-        curr_lp_masked, data["input_ids"], seq_len, vocab_size, device
+        curr_lp_masked, data["input_ids"], batch_size, seq_len, vocab_size, device
     )
     _, metrics = loss_fn(
         dummy_logits,
@@ -1107,3 +1238,658 @@ def test_clipped_pg_loss_entropy():
         rtol=1e-3,
         atol=1e-5,
     )
+
+
+def test_clipped_pg_loss_gspo():
+    """Tests GSPO path in ClippedPGLossFn."""
+    if not torch.cuda.is_available():
+        pytest.skip("No GPU available")
+
+    device = "cuda"
+    data, batch_size, seq_len, vocab_size = _setup_clipped_pg_test_data(device=device)
+
+    cfg = deepcopy(basic_pg_loss_test_config)
+    cfg["sequence_level_importance_ratios"] = True
+    cfg["token_level_loss"] = False
+    loss_fn = ClippedPGLossFn(cfg)
+
+    adv_masked = torch.tensor([[1.0, -1.0, 2.0]], device=device)
+    # Use non-zero prev_lp to allow ratios > 1 with valid curr_lp <= 0
+    prev_lp_masked = torch.tensor([[-1.0, -1.0, -1.0]], device=device)
+    # Target Curr logprobs (masked pos 1, 2, 3) - design for clipping
+    # Target ratios: 0.5 (<0.8), 1.0 (in [0.8, 1.2]), 1.5 (>1.2)
+    # Curr = log(Ratio) + Prev
+    curr_lp_masked = torch.tensor(
+        [[-1.69315, -1.0, -0.59453]], device=device
+    )  # approx log(0.5)-1, log(1)-1, log(1.5)-1
+
+    # Fill full tensors (only need first dim for B=1)
+    data["advantages"][0, 1:] = adv_masked
+    data["prev_logprobs"][0, 1:] = prev_lp_masked
+
+    # --- Hand Calculation ---
+    log_ratios = curr_lp_masked - prev_lp_masked
+    seq_log_ratios_mean = torch.mean(log_ratios, dim=-1).unsqueeze(-1)
+    ratios = seq_log_ratios_mean.exp().repeat(1, 3)
+    assert torch.allclose(
+        ratios, torch.tensor([[0.9086, 0.9086, 0.9086]], device=device), rtol=1e-3
+    )
+
+    ratios_clamped = torch.clamp(
+        ratios, 1.0 - cfg["ratio_clip_min"], 1.0 + cfg["ratio_clip_max"]
+    )
+    assert torch.allclose(
+        ratios_clamped,
+        torch.tensor([[0.9086, 0.9086, 0.9086]], device=device),
+        rtol=1e-3,
+    )
+
+    loss1 = -adv_masked * ratios
+    assert torch.allclose(
+        loss1, torch.tensor([[-0.9086, 0.9086, -1.8171]], device=device), rtol=1e-3
+    )
+
+    loss2 = -adv_masked * ratios_clamped
+    assert torch.allclose(
+        loss2, torch.tensor([[-0.9086, 0.9086, -1.8171]], device=device), rtol=1e-3
+    )
+
+    max_loss = torch.maximum(loss1, loss2)
+    assert torch.allclose(
+        max_loss, torch.tensor([[-0.9086, 0.9086, -1.8171]], device=device), rtol=1e-3
+    )
+
+    expected_loss = torch.mean(max_loss)
+    assert torch.allclose(
+        expected_loss, torch.tensor(-0.6057, device=device), rtol=1e-3
+    )
+
+    input_ids = data["input_ids"]
+    dummy_logits = _create_exact_logits(
+        curr_lp_masked, input_ids, batch_size, seq_len, vocab_size, device
+    )
+
+    actual_loss, _ = loss_fn(
+        dummy_logits,
+        data,
+        global_valid_seqs=torch.sum(data["sample_mask"]),
+        global_valid_toks=torch.sum(data["sample_mask"] * data["token_mask"]),
+    )
+    torch.testing.assert_close(actual_loss, expected_loss)
+
+
+def test_clipped_pg_loss_gspo_batch_size_2():
+    """Tests non-unit batch size GSPO path in ClippedPGLossFn."""
+    if not torch.cuda.is_available():
+        pytest.skip("No GPU available")
+
+    device = "cuda"
+    data, batch_size, seq_len, vocab_size = _setup_clipped_pg_test_data(
+        batch_size=2, device=device
+    )
+
+    cfg = deepcopy(basic_pg_loss_test_config)
+    cfg["sequence_level_importance_ratios"] = True
+    cfg["token_level_loss"] = False
+    loss_fn = ClippedPGLossFn(cfg)
+
+    adv_masked = torch.tensor([[1.0, -1.0, 2.0], [1.0, -1.0, 2.0]], device=device)
+    # Use non-zero prev_lp to allow ratios > 1 with valid curr_lp <= 0
+    prev_lp_masked = torch.tensor(
+        [[-1.0, -1.0, -1.0], [-2.0, -2.0, -2.0]], device=device
+    )
+    # Target Curr logprobs (masked pos 1, 2, 3) - design for clipping
+    # Target ratios: 0.5 (<0.8), 1.0 (in [0.8, 1.2]), 1.5 (>1.2)
+    # Curr = log(Ratio) + Prev
+    curr_lp_masked = torch.tensor(
+        [[-1.69315, -1.0, -0.59453], [-1.69315, -1.0, -0.59453]], device=device
+    )  # approx log(0.5)-1, log(1)-1, log(1.5)-1
+
+    # Fill full tensors (only need first dim for B=1)
+    data["advantages"][:, 1:] = adv_masked
+    data["prev_logprobs"][:, 1:] = prev_lp_masked
+
+    # --- Hand Calculation ---
+    log_ratios = curr_lp_masked - prev_lp_masked
+    seq_log_ratios_mean = torch.mean(log_ratios, dim=-1).unsqueeze(-1)
+    ratios = seq_log_ratios_mean.exp().repeat(1, 3)
+    assert torch.allclose(
+        ratios,
+        torch.tensor(
+            [[0.9086, 0.9086, 0.9086], [2.4697, 2.4697, 2.4697]], device=device
+        ),
+        rtol=1e-3,
+    )
+
+    ratios_clamped = torch.clamp(
+        ratios, 1.0 - cfg["ratio_clip_min"], 1.0 + cfg["ratio_clip_max"]
+    )
+    assert torch.allclose(
+        ratios_clamped,
+        torch.tensor([[0.9086, 0.9086, 0.9086], [1.2, 1.2, 1.2]], device=device),
+        rtol=1e-3,
+    )
+
+    loss1 = -adv_masked * ratios
+    assert torch.allclose(
+        loss1,
+        torch.tensor(
+            [[-0.9086, 0.9086, -1.8171], [-2.4697, 2.4697, -4.9394]], device=device
+        ),
+        rtol=1e-3,
+    )
+
+    loss2 = -adv_masked * ratios_clamped
+    assert torch.allclose(
+        loss2,
+        torch.tensor(
+            [[-0.9086, 0.9086, -1.8171], [-1.2000, 1.2000, -2.4000]], device=device
+        ),
+        rtol=1e-3,
+    )
+
+    max_loss = torch.maximum(loss1, loss2)
+    assert torch.allclose(
+        max_loss,
+        torch.tensor(
+            [[-0.9086, 0.9086, -1.8171], [-1.2000, 2.4697, -2.4000]], device=device
+        ),
+        rtol=1e-3,
+    )
+
+    expected_loss = torch.mean(max_loss)
+    assert torch.allclose(
+        expected_loss, torch.tensor(-0.4912, device=device), rtol=1e-3
+    )
+
+    input_ids = data["input_ids"]
+    dummy_logits = _create_exact_logits(
+        curr_lp_masked, input_ids, batch_size, seq_len, vocab_size, device
+    )
+
+    actual_loss, _ = loss_fn(
+        dummy_logits,
+        data,
+        global_valid_seqs=torch.sum(data["sample_mask"]),
+        global_valid_toks=torch.sum(
+            data["sample_mask"].unsqueeze(1) * data["token_mask"]
+        ),
+    )
+    torch.testing.assert_close(actual_loss, expected_loss)
+
+
+def test_clipped_pg_loss_gspo_importance_sampling_correction():
+    """Tests GSPO w/ importance sampling correction in ClippedPGLossFn."""
+    if not torch.cuda.is_available():
+        pytest.skip("No GPU available")
+
+    device = "cuda"
+    data, batch_size, seq_len, vocab_size = _setup_clipped_pg_test_data(device=device)
+
+    cfg = deepcopy(basic_pg_loss_test_config)
+    cfg["use_importance_sampling_correction"] = True
+    cfg["sequence_level_importance_ratios"] = True
+    cfg["token_level_loss"] = False
+    loss_fn = ClippedPGLossFn(cfg)
+
+    adv_masked = torch.tensor([[1.0, -1.0, 2.0]], device=device)
+    prev_lp_masked = torch.tensor([[-1.0, -1.0, -1.0]], device=device)
+    curr_lp_masked = torch.tensor(
+        [[-1.69315, -1.0, -0.59453]], device=device
+    )  # approx log(0.5)-1, log(1)-1, log(1.5)-1
+
+    ref_lp_masked = torch.tensor([[-1.0, -1.0, -1.0]], device=device)
+
+    # For Importance Sampling
+    gen_lp_masked = torch.tensor([[-0.5, -1.5, -0.8]], device=device)
+
+    # Fill full tensors
+    data["advantages"][0, 1:] = adv_masked
+    data["prev_logprobs"][0, 1:] = prev_lp_masked
+    data["generation_logprobs"][0, 1:] = gen_lp_masked
+    data["reference_policy_logprobs"][0, 1:] = ref_lp_masked
+
+    # --- Hand Calculation ---
+    # Actor Loss Calculation
+    actor_importance_weights = torch.exp(
+        (prev_lp_masked - gen_lp_masked).sum(dim=-1).unsqueeze(-1)
+    )  # exp([-1 - (-0.5), -1 - (-1.5), -1 - (-0.8)]) = [0.6065, 1.6487, 0.8187]
+    assert torch.allclose(
+        actor_importance_weights,
+        torch.tensor([[0.8187]], device=device),
+        rtol=1e-3,
+    )
+
+    log_ratios = curr_lp_masked - prev_lp_masked
+    seq_log_ratios_mean = torch.mean(log_ratios, dim=-1).unsqueeze(-1)
+    ratios = seq_log_ratios_mean.exp().repeat(1, 3)
+    assert torch.allclose(
+        ratios, torch.tensor([[0.9086, 0.9086, 0.9086]], device=device), rtol=1e-3
+    )
+
+    ratios_clamped = torch.clamp(
+        ratios, 1.0 - cfg["ratio_clip_min"], 1.0 + cfg["ratio_clip_max"]
+    )
+    assert torch.allclose(
+        ratios_clamped,
+        torch.tensor([[0.9086, 0.9086, 0.9086]], device=device),
+        rtol=1e-3,
+    )
+
+    loss1 = -adv_masked * ratios
+    assert torch.allclose(
+        loss1, torch.tensor([[-0.9086, 0.9086, -1.8171]], device=device), rtol=1e-3
+    )
+
+    loss2 = -adv_masked * ratios_clamped
+    assert torch.allclose(
+        loss2, torch.tensor([[-0.9086, 0.9086, -1.8171]], device=device), rtol=1e-3
+    )
+
+    max_loss = torch.maximum(loss1, loss2)
+    assert torch.allclose(
+        max_loss, torch.tensor([[-0.9086, 0.9086, -1.8171]], device=device), rtol=1e-3
+    )
+
+    importance_weighted_max_loss = actor_importance_weights * max_loss
+    assert torch.allclose(
+        importance_weighted_max_loss,
+        torch.tensor([[-0.7439, 0.7439, -1.4877]], device=device),
+        rtol=1e-3,
+    )
+
+    expected_actor_loss = torch.mean(importance_weighted_max_loss)
+    assert torch.allclose(
+        expected_actor_loss, torch.tensor(-0.4959, device=device), rtol=1e-3
+    )
+
+    input_ids = data["input_ids"]
+    dummy_logits = _create_exact_logits(
+        curr_lp_masked, input_ids, batch_size, seq_len, vocab_size, device
+    )
+
+    actual_loss, _ = loss_fn(
+        dummy_logits,
+        data,
+        global_valid_seqs=torch.sum(data["sample_mask"]),
+        global_valid_toks=torch.sum(data["sample_mask"] * data["token_mask"]),
+    )
+    torch.testing.assert_close(actual_loss, expected_actor_loss, atol=1e-4, rtol=1e-3)
+
+
+def setup_distillation_test_data(batch_size=2, seq_len=4, vocab_size=8, topk=64):
+    """Setup test data for distillation loss function tests."""
+    if not torch.cuda.is_available():
+        pytest.skip("No GPU available")
+
+    device = "cuda"
+
+    # Create input data
+    input_ids = torch.randint(0, vocab_size, (batch_size, seq_len), device=device)
+    input_lengths = torch.tensor([seq_len] * batch_size, device=device)
+    token_mask = torch.ones((batch_size, seq_len), device=device)
+    sample_mask = torch.ones(batch_size, device=device)
+
+    # Create teacher top-k logits and indices
+    teacher_topk_logits = torch.randn((batch_size, seq_len, topk), device=device)
+    teacher_topk_indices = torch.randint(
+        0, vocab_size, (batch_size, seq_len, topk), device=device
+    )
+
+    data = {
+        "input_ids": input_ids,
+        "input_lengths": input_lengths,
+        "token_mask": token_mask,
+        "sample_mask": sample_mask,
+        "teacher_topk_logits": teacher_topk_logits,
+        "teacher_topk_indices": teacher_topk_indices,
+    }
+
+    # Create student logits
+    student_logits = torch.randn((batch_size, seq_len, vocab_size), device=device)
+
+    return data, student_logits
+
+
+def test_distillation_loss_forward_kl():
+    """Test forward KL divergence loss calculation."""
+    data, student_logits = setup_distillation_test_data()
+
+    loss_fn = DistillationLossFn(
+        {
+            "kl_type": "forward",
+            "mixed_kl_weight": 0.5,
+            "zero_outside_topk": False,
+        }
+    )
+
+    loss, metrics = loss_fn(
+        student_logits,
+        data,
+        global_valid_seqs=torch.sum(data["sample_mask"]),
+        global_valid_toks=torch.sum(
+            data["sample_mask"].unsqueeze(-1) * data["token_mask"]
+        ),
+    )
+
+    # Verify loss is a scalar tensor
+    assert loss.dim() == 0
+    assert not torch.isnan(loss)
+    assert not torch.isinf(loss)
+
+    # Verify metrics dictionary
+    assert isinstance(metrics, dict)
+    assert "loss" in metrics
+
+
+def test_distillation_loss_reverse_kl():
+    """Test reverse KL divergence loss calculation."""
+    data, student_logits = setup_distillation_test_data()
+
+    loss_fn = DistillationLossFn(
+        {
+            "kl_type": "reverse",
+            "mixed_kl_weight": 0.5,
+            "zero_outside_topk": False,
+        }
+    )
+
+    loss, metrics = loss_fn(
+        student_logits,
+        data,
+        global_valid_seqs=torch.sum(data["sample_mask"]),
+        global_valid_toks=torch.sum(
+            data["sample_mask"].unsqueeze(-1) * data["token_mask"]
+        ),
+    )
+
+    # Verify loss is a scalar tensor
+    assert loss.dim() == 0
+    assert not torch.isnan(loss)
+    assert not torch.isinf(loss)
+
+    # Verify metrics dictionary
+    assert isinstance(metrics, dict)
+    assert "loss" in metrics
+
+
+def test_distillation_loss_mixed_kl():
+    """Test mixed KL divergence loss calculation."""
+    data, student_logits = setup_distillation_test_data()
+
+    mixed_kl_weight = 0.3
+    loss_fn = DistillationLossFn(
+        {
+            "kl_type": "mixed",
+            "mixed_kl_weight": mixed_kl_weight,
+            "zero_outside_topk": False,
+        }
+    )
+
+    loss, metrics = loss_fn(
+        student_logits,
+        data,
+        global_valid_seqs=torch.sum(data["sample_mask"]),
+        global_valid_toks=torch.sum(
+            data["sample_mask"].unsqueeze(-1) * data["token_mask"]
+        ),
+    )
+
+    # Verify loss is a scalar tensor
+    assert loss.dim() == 0
+    assert not torch.isnan(loss)
+    assert not torch.isinf(loss)
+
+    # Verify metrics dictionary
+    assert isinstance(metrics, dict)
+    assert "loss" in metrics
+
+
+def test_distillation_loss_topk_filtering():
+    """Test top-k filtering functionality with various k values."""
+    # Test with different k values (excluding k=0 which should be invalid)
+    k_values = [1, 32, 64, 1000000]  # Valid k values
+
+    for k in k_values:
+        data, student_logits = setup_distillation_test_data(topk=k)
+
+        loss_fn = DistillationLossFn(
+            {
+                "kl_type": "forward",
+                "mixed_kl_weight": 0.5,
+                "zero_outside_topk": False,
+            }
+        )
+
+        loss, metrics = loss_fn(
+            student_logits,
+            data,
+            global_valid_seqs=torch.sum(data["sample_mask"]),
+            global_valid_toks=torch.sum(
+                data["sample_mask"].unsqueeze(-1) * data["token_mask"]
+            ),
+        )
+
+        # Verify loss is calculated correctly with top-k filtering
+        assert loss.dim() == 0
+        assert not torch.isnan(loss)
+        assert not torch.isinf(loss)
+
+        # For k=1, we expect only the top-1 token to be considered
+        if k == 1:
+            assert isinstance(loss, torch.Tensor)
+
+        # For large k values, we expect normal behavior
+        if k >= 32:
+            assert isinstance(loss, torch.Tensor)
+            assert loss.item() != 0.0  # Should have some meaningful loss
+
+
+def test_distillation_loss_invalid_k_zero():
+    """Test that k=0 should raise a ValueError."""
+    # Test with k=0 which should be invalid
+    data, student_logits = setup_distillation_test_data(topk=0)
+
+    loss_fn = DistillationLossFn(
+        {
+            "kl_type": "forward",
+            "mixed_kl_weight": 0.5,
+            "zero_outside_topk": False,
+        }
+    )
+
+    # This should raise a ValueError for k=0
+    with pytest.raises(ValueError, match="topk must be positive"):
+        loss_fn(
+            student_logits,
+            data,
+            global_valid_seqs=torch.sum(data["sample_mask"]),
+            global_valid_toks=torch.sum(
+                data["sample_mask"].unsqueeze(-1) * data["token_mask"]
+            ),
+        )
+
+
+def test_distillation_loss_zero_outside_topk():
+    """Test zeroing outside top-k functionality with various k values."""
+    # Test with different k values for zero_outside_topk (excluding k=0 which should be invalid)
+    k_values = [1, 32, 64, 1000000]  # Valid k values
+
+    for k in k_values:
+        data, student_logits = setup_distillation_test_data(topk=k)
+
+        loss_fn = DistillationLossFn(
+            {
+                "kl_type": "forward",
+                "mixed_kl_weight": 0.5,
+                "zero_outside_topk": True,
+            }
+        )
+
+        loss, metrics = loss_fn(
+            student_logits,
+            data,
+            global_valid_seqs=torch.sum(data["sample_mask"]),
+            global_valid_toks=torch.sum(
+                data["sample_mask"].unsqueeze(-1) * data["token_mask"]
+            ),
+        )
+
+        # Verify loss is calculated correctly with zeroing
+        assert loss.dim() == 0
+        assert not torch.isnan(loss)
+        assert not torch.isinf(loss)
+
+        # For k=1, only top-1 token should remain non-zero
+        if k == 1:
+            assert isinstance(loss, torch.Tensor)
+
+        # For large k values, most tokens should remain non-zero
+        if k >= 32:
+            assert isinstance(loss, torch.Tensor)
+            assert loss.item() != 0.0  # Should have some meaningful loss
+
+
+def test_distillation_loss_gradient_flow():
+    """Test gradient flow in distillation loss function."""
+    data, student_logits = setup_distillation_test_data()
+
+    # Make student_logits require gradients
+    student_logits.requires_grad_(True)
+
+    loss_fn = DistillationLossFn(
+        {
+            "kl_type": "forward",
+            "mixed_kl_weight": 0.5,
+            "zero_outside_topk": False,
+        }
+    )
+
+    loss, _ = loss_fn(
+        student_logits,
+        data,
+        global_valid_seqs=torch.sum(data["sample_mask"]),
+        global_valid_toks=torch.sum(
+            data["sample_mask"].unsqueeze(-1) * data["token_mask"]
+        ),
+    )
+
+    # Compute gradients
+    loss.backward()
+
+    # Verify gradients are computed and non-zero
+    assert student_logits.grad is not None
+    assert not torch.allclose(
+        student_logits.grad, torch.zeros_like(student_logits.grad)
+    )
+
+
+def test_distillation_loss_edge_cases():
+    """Test distillation loss with edge cases."""
+    data, student_logits = setup_distillation_test_data()
+
+    loss_fn = DistillationLossFn(
+        {
+            "kl_type": "forward",
+            "mixed_kl_weight": 0.5,
+            "zero_outside_topk": False,
+        }
+    )
+
+    # Test with all-zero logits
+    zero_logits = torch.zeros_like(student_logits)
+    loss, _ = loss_fn(
+        zero_logits,
+        data,
+        global_valid_seqs=torch.sum(data["sample_mask"]),
+        global_valid_toks=torch.sum(
+            data["sample_mask"].unsqueeze(-1) * data["token_mask"]
+        ),
+    )
+    assert not torch.isnan(loss)
+    assert not torch.isinf(loss)
+
+    # Test with very large logits
+    large_logits = torch.ones_like(student_logits) * 100.0
+    loss, _ = loss_fn(
+        large_logits,
+        data,
+        global_valid_seqs=torch.sum(data["sample_mask"]),
+        global_valid_toks=torch.sum(
+            data["sample_mask"].unsqueeze(-1) * data["token_mask"]
+        ),
+    )
+    assert not torch.isnan(loss)
+    assert not torch.isinf(loss)
+
+    # Test with very small logits
+    small_logits = torch.ones_like(student_logits) * -100.0
+    loss, _ = loss_fn(
+        small_logits,
+        data,
+        global_valid_seqs=torch.sum(data["sample_mask"]),
+        global_valid_toks=torch.sum(
+            data["sample_mask"].unsqueeze(-1) * data["token_mask"]
+        ),
+    )
+    assert not torch.isnan(loss)
+    assert not torch.isinf(loss)
+
+
+def test_distillation_loss_fn_initialization():
+    """Test DistillationLossFn initialization."""
+    # Test with default values
+    default_config = {
+        "kl_type": "forward",
+        "mixed_kl_weight": 0.5,
+        "zero_outside_topk": False,
+    }
+    loss_fn = DistillationLossFn(default_config)
+    assert loss_fn.kl_type == "forward"
+    assert loss_fn.mixed_kl_weight == 0.5
+    assert not loss_fn.zero_outside_topk
+
+    # Test with custom values
+    custom_config = {
+        "kl_type": "reverse",
+        "mixed_kl_weight": 0.3,
+        "zero_outside_topk": True,
+    }
+    loss_fn = DistillationLossFn(custom_config)
+    assert loss_fn.kl_type == "reverse"
+    assert loss_fn.mixed_kl_weight == 0.3
+    assert loss_fn.zero_outside_topk
+
+
+def test_distillation_loss_fn_call():
+    """Test DistillationLossFn call interface."""
+    data, student_logits = setup_distillation_test_data()
+
+    loss_fn = DistillationLossFn(
+        {
+            "kl_type": "forward",
+            "mixed_kl_weight": 0.5,
+            "zero_outside_topk": False,
+        }
+    )
+
+    loss, metrics = loss_fn(
+        student_logits,
+        data,
+        global_valid_seqs=torch.sum(data["sample_mask"]),
+        global_valid_toks=torch.sum(
+            data["sample_mask"].unsqueeze(-1) * data["token_mask"]
+        ),
+    )
+
+    # Verify return types
+    assert isinstance(loss, torch.Tensor)
+    assert isinstance(metrics, dict)
+
+    # Verify loss is scalar
+    assert loss.dim() == 0
+
+    # Verify metrics contains expected fields
+    expected_fields = ["loss"]
+    for field in expected_fields:
+        assert field in metrics
diff --git a/tests/unit/algorithms/test_reward_functions.py b/tests/unit/algorithms/test_reward_functions.py
new file mode 100755
index 0000000000..5d71bb3d8d
--- /dev/null
+++ b/tests/unit/algorithms/test_reward_functions.py
@@ -0,0 +1,274 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+import torch
+
+from nemo_rl.algorithms.grpo import RewardScalingConfig, scale_rewards
+from nemo_rl.algorithms.reward_functions import (
+    RewardShapingConfig,
+    apply_reward_shaping,
+)
+from nemo_rl.data.interfaces import DatumSpec
+from nemo_rl.distributed.batched_data_dict import BatchedDataDict
+from tests.unit.algorithms.utils import create_mock_batch_with_responses
+
+
+def test_reward_scaling_disabled():
+    """Test that when reward scaling is disabled, rewards remain unchanged."""
+    batch = create_mock_batch_with_responses(
+        num_samples=3, response_lengths=[10, 20, 30], initial_rewards=[1.0, 0.5, 0.8]
+    )
+
+    original_rewards = batch["total_reward"].clone()
+    config = RewardScalingConfig(enabled=False)
+    result_batch = scale_rewards(batch, config)
+    assert torch.allclose(result_batch["total_reward"], original_rewards)
+    assert result_batch is batch  # Should return the same batch object
+
+
+def test_reward_scaling_base():
+    """Test that rewards are linearly scaled from [0.0, 1.0] to [0.0, 0.7]."""
+    batch = create_mock_batch_with_responses(
+        num_samples=3, response_lengths=[10, 20, 30], initial_rewards=[1.0, 0.5, 0.8]
+    )
+
+    config = RewardScalingConfig(
+        enabled=True, source_min=0.0, source_max=1.0, target_min=0.0, target_max=0.7
+    )
+
+    result_batch = scale_rewards(batch, config)
+    # Calculate expected rewards manually
+    # Response 0: length=10, initial_reward=1.0, clip_initial_reward=1.0, scaled_reward=0.0 + [(1-0.0)/(1.0-0.0)]*(0.7-0) =  0.7
+    # Response 1: length=20, initial_reward=0.5, clip_initial_reward=0.5, scaled_reward=0.0 + [(0.5-0.0)/(1.0-0.0)]*(0.7-0) =  0.35
+    # Response 2: length=30, initial_reward=0.8, clip_initial_reward=0.8, scaled_reward=0.0 + [(0.8-0.0)/(1.0-0.0)]*(0.7-0) =  0.56
+
+    expected_rewards = torch.tensor([0.7, 0.35, 0.56])
+    assert torch.allclose(result_batch["total_reward"], expected_rewards)
+    assert result_batch is batch  # Should return the same batch object
+
+
+def test_reward_scaling_dapo():
+    """Test that verifies binary rewards 0/1 are scaled to -1.0/1.0 respectively used in DAPO algorithm."""
+    batch = create_mock_batch_with_responses(
+        num_samples=5,
+        response_lengths=[10, 20, 30, 40, 50],
+        initial_rewards=[1.0, 0.0, 0.0, 1.0, 0.0],
+    )
+
+    config = RewardScalingConfig(
+        enabled=True, source_min=0.0, source_max=1.0, target_min=-1.0, target_max=1.0
+    )
+
+    result_batch = scale_rewards(batch, config)
+    expected_rewards = torch.tensor([1.0, -1.0, -1.0, 1.0, -1.0])
+
+    assert torch.allclose(result_batch["total_reward"], expected_rewards)
+    assert result_batch is batch  # Should return the same batch object
+
+
+def test_reward_scaling_clipping():
+    """Test that verifies the out-of-range rewards are clipped and scaled to the target range."""
+    batch = create_mock_batch_with_responses(
+        num_samples=6,
+        response_lengths=[10, 20, 30, 40, 50, 60],
+        initial_rewards=[-2.8, -0.25, 1.5, 0.5, 2.0, 2.5],
+    )
+
+    config = RewardScalingConfig(
+        enabled=True, source_min=-2.0, source_max=2.0, target_min=-1.0, target_max=1.0
+    )
+
+    result_batch = scale_rewards(batch, config)
+    # Calculate expected rewards manually
+    # Response 0: initial_reward=-2.8, clip_initial_reward=-2.0, scaled_reward=-1.0 + [(-2.0-(-2.0))/(2.0-(-2.0))]*(1.0-(-1.0)) =  -1.0
+    # Response 1: initial_reward=-0.25, clip_initial_reward=-0.25, scaled_reward=-1.0 + [(-0.25-(-2.0))/(2.0-(-2.0))]*(1.0-(-1.0)) =  -0.125
+    # Response 2: initial_reward=1.5, clip_initial_reward=1.5, scaled_reward=-1.0 + [(1.5-(-2.0))/(2.0-(-2.0))]*(1.0-(-1.0)) =  0.75
+    # Response 3: initial_reward=0.5, clip_initial_reward=0.5, scaled_reward=-1.0 + [(0.5-(-2.0))/(2.0-(-2.0))]*(1.0-(-1.0)) =  0.25
+    # Response 4: initial_reward=2.0, clip_initial_reward=2.0, scaled_reward=-1.0 + [(2.0-(-2.0))/(2.0-(-2.0))]*(1.0-(-1.0)) =  1.0
+    # Response 5: initial_reward=2.5, clip_initial_reward=2.0, scaled_reward=-1.0 + [(2.0-(-2.0))/(2.0-(-2.0))]*(1.0-(-1.0)) =  1.0
+
+    expected_rewards = torch.tensor([-1.0, -0.125, 0.75, 0.25, 1.0, 1.0])
+
+    assert torch.allclose(result_batch["total_reward"], expected_rewards)
+    assert result_batch is batch  # Should return the same batch object
+
+
+def test_reward_shaping_disabled():
+    """Test that when reward shaping is disabled, rewards remain unchanged."""
+    # Create batch with various response lengths
+    batch = create_mock_batch_with_responses(
+        num_samples=3, response_lengths=[10, 20, 30], initial_rewards=[1.0, 0.5, 0.8]
+    )
+
+    original_rewards = batch["total_reward"].clone()
+
+    # Disabled reward shaping config
+    config = RewardShapingConfig(
+        enabled=False,
+        overlong_buffer_length=5,
+        overlong_buffer_penalty=0.1,
+        max_response_length=25,
+    )
+
+    # Apply reward shaping
+    result_batch = apply_reward_shaping(batch, config)
+
+    # Rewards should remain unchanged
+    assert torch.allclose(result_batch["total_reward"], original_rewards)
+    assert result_batch is batch  # Should return the same batch object
+
+
+def test_reward_shaping_no_penalties():
+    """Test reward shaping when all responses are within acceptable length."""
+    # Create batch where all responses are shorter than expected length
+    batch = create_mock_batch_with_responses(
+        num_samples=3,
+        response_lengths=[10, 15, 18],  # All <= 20 (expected_response_length)
+        initial_rewards=[1.0, 0.5, 0.8],
+    )
+
+    original_rewards = batch["total_reward"].clone()
+
+    # Config: max_response_length=25, overlong_buffer_length=5 -> expected_response_length=20
+    config = RewardShapingConfig(
+        enabled=True,
+        overlong_buffer_length=5,
+        overlong_buffer_penalty=1.0,
+        max_response_length=25,
+    )
+
+    # Apply reward shaping
+    result_batch = apply_reward_shaping(batch, config)
+
+    # Since no responses exceed expected length, rewards should remain unchanged
+    assert torch.allclose(result_batch["total_reward"], original_rewards)
+
+
+def test_reward_shaping_with_penalties():
+    """Test reward shaping when responses exceed expected length and receive penalties."""
+    # Create batch with responses of varying lengths
+    batch = create_mock_batch_with_responses(
+        num_samples=4,
+        response_lengths=[10, 22, 25, 30],  # expected_response_length = 20
+        initial_rewards=[1.0, 0.8, 0.6, 0.4],
+    )
+
+    # Config: max_response_length=25, overlong_buffer_length=5 -> expected_response_length=20
+    config = RewardShapingConfig(
+        enabled=True,
+        overlong_buffer_length=5,
+        overlong_buffer_penalty=0.5,
+        max_response_length=25,
+    )
+
+    # Apply reward shaping
+    result_batch = apply_reward_shaping(batch, config)
+
+    # Calculate expected rewards manually
+    # Response 0: length=10, exceed_length=10-20=-10 (no penalty, reward stays 1.0)
+    # Response 1: length=22, exceed_length=22-20=2, penalty=min(-2/5*0.5, 0)=-0.2, reward=0.8-0.2=0.6
+    # Response 2: length=25, exceed_length=25-20=5, penalty=min(-5/5*0.5, 0)=-0.5, reward=0.6-0.5=0.1
+    # Response 3: length=30, exceed_length=30-20=10, penalty=min(-10/5*0.5, 0)=-1.0, reward=0.4-1.0=-0.6
+
+    expected_rewards = torch.tensor([1.0, 0.6, 0.1, -0.6])
+    assert torch.allclose(result_batch["total_reward"], expected_rewards, atol=1e-6)
+
+
+def test_reward_shaping_missing_config_values():
+    """Test that missing required config values raise ValueError."""
+    batch = create_mock_batch_with_responses(
+        num_samples=1, response_lengths=[20], initial_rewards=[1.0]
+    )
+
+    # Test missing overlong_buffer_length
+    config = RewardShapingConfig(
+        enabled=True,
+        overlong_buffer_length=None,
+        overlong_buffer_penalty=0.1,
+        max_response_length=25,
+    )
+
+    with pytest.raises(ValueError, match="DAPO reward shaping is currently supported"):
+        apply_reward_shaping(batch, config)
+
+    # Test missing overlong_buffer_penalty
+    config["overlong_buffer_length"] = 5
+    config["overlong_buffer_penalty"] = None
+
+    with pytest.raises(ValueError, match="DAPO reward shaping is currently supported"):
+        apply_reward_shaping(batch, config)
+
+    # Test missing max_response_length
+    config["overlong_buffer_penalty"] = 0.1
+    config["max_response_length"] = None
+
+    with pytest.raises(ValueError, match="DAPO reward shaping is currently supported"):
+        apply_reward_shaping(batch, config)
+
+
+def test_reward_shaping_missing_assistant_response():
+    """Test that missing assistant response raises assertion error."""
+    # Create a batch with only user messages (no assistant responses)
+    message_logs = [
+        [{"role": "user", "content": "Question", "token_ids": torch.tensor([1, 2, 3])}]
+    ]
+
+    batch = BatchedDataDict[DatumSpec](
+        {
+            "task_name": ["math"],
+            "message_log": message_logs,
+            "extra_env_info": [{}],
+            "loss_multiplier": torch.ones(1),
+            "total_reward": torch.tensor([1.0]),
+        }
+    )
+
+    config = RewardShapingConfig(
+        enabled=True,
+        overlong_buffer_length=5,
+        overlong_buffer_penalty=0.1,
+        max_response_length=25,
+    )
+
+    with pytest.raises(
+        AssertionError, match="Assistant response not found during reward shaping"
+    ):
+        apply_reward_shaping(batch, config)
+
+
+def test_reward_shaping_mismatched_lengths():
+    """Test that mismatched message_log and rewards lengths raise assertion error."""
+    # Create batch with mismatched lengths
+    batch = create_mock_batch_with_responses(
+        num_samples=2, response_lengths=[10, 20], initial_rewards=[1.0, 0.5]
+    )
+
+    # Manually add an extra reward to create mismatch
+    batch["total_reward"] = torch.tensor(
+        [1.0, 0.5, 0.3]
+    )  # 3 rewards but 2 message_logs
+
+    config = RewardShapingConfig(
+        enabled=True,
+        overlong_buffer_length=5,
+        overlong_buffer_penalty=0.1,
+        max_response_length=25,
+    )
+
+    with pytest.raises(
+        AssertionError,
+        match="The number of messages in the batch must match the number of rewards",
+    ):
+        apply_reward_shaping(batch, config)
diff --git a/tests/unit/algorithms/test_rm.py b/tests/unit/algorithms/test_rm.py
new file mode 100644
index 0000000000..8dabbedcb3
--- /dev/null
+++ b/tests/unit/algorithms/test_rm.py
@@ -0,0 +1,229 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from unittest.mock import MagicMock, patch
+
+import pytest
+import torch
+from torchdata.stateful_dataloader import StatefulDataLoader
+
+from nemo_rl.algorithms.loss_functions import PreferenceLoss
+from nemo_rl.algorithms.rm import _default_rm_save_state, rm_train
+
+
+@pytest.fixture
+def mock_components():
+    # Create mock components
+    policy = MagicMock()
+    policy.train.return_value = {
+        "loss": torch.tensor(0.5),
+        "grad_norm": torch.tensor(1.0),
+        "all_mb_metrics": {
+            "loss": [0.5],
+            "accuracy": [1.0],
+            "rewards_chosen_mean": [4.5],
+            "rewards_rejected_mean": [3.5],
+            "num_valid_samples": [1.0],
+            "global_valid_toks": [10],
+        },
+    }
+
+    # Create a proper message log structure with token_ids
+    mock_batch = {
+        "message_log": [
+            [  # chosen
+                {"role": "user", "token_ids": torch.tensor([1, 2, 3])},
+                {"role": "assistant", "token_ids": torch.tensor([4, 5, 6])},
+            ],
+            [  # rejected
+                {"role": "user", "token_ids": torch.tensor([1, 2, 3])},
+                {"role": "assistant", "token_ids": torch.tensor([7, 8, 9, 10, 11])},
+            ],
+        ],
+        "length": torch.tensor([6, 8]),
+        "loss_multiplier": torch.tensor([1.0, 1.0]),
+    }
+
+    # Create mock dataloader with 10 batches that can be iterated multiple times
+    train_dataloader = MagicMock(spec=StatefulDataLoader)
+
+    def train_iter(self):
+        return iter([mock_batch] * 10)
+
+    train_dataloader.__iter__ = train_iter
+    train_dataloader.__len__ = MagicMock(return_value=10)
+
+    val_dataloader = MagicMock(spec=StatefulDataLoader)
+
+    def val_iter(self):
+        return iter([mock_batch] * 10)
+
+    val_dataloader.__iter__ = val_iter
+    val_dataloader.__len__ = MagicMock(return_value=10)
+
+    tokenizer = MagicMock()
+    tokenizer.pad_token_id = 0
+
+    loss_fn = PreferenceLoss()
+    logger = MagicMock()
+    checkpointer = MagicMock()
+    rm_task_spec = MagicMock()
+
+    # Create mock master config
+    master_config = {
+        "rm": {
+            "max_num_steps": 5,
+            "max_num_epochs": 2,
+            "val_period": 100,
+            "val_batches": 1,
+            "val_global_batch_size": 1,
+            "val_micro_batch_size": 1,
+            "val_at_start": False,
+        },
+        "policy": {
+            "train_global_batch_size": 1,
+            "make_sequence_length_divisible_by": 1,
+            "reward_model_cfg": {
+                "enabled": True,
+                "reward_model_type": "bradley_terry",
+            },
+            "train_micro_batch_size": 1,
+        },
+        "checkpointing": {
+            "enabled": False,
+            "checkpoint_must_save_by": None,
+            "save_period": 10,
+        },
+        "cluster": {
+            "num_nodes": 1,
+            "gpus_per_node": 2,
+        },
+    }
+
+    return {
+        "policy": policy,
+        "train_dataloader": train_dataloader,
+        "val_dataloader": val_dataloader,
+        "tokenizer": tokenizer,
+        "loss_fn": loss_fn,
+        "logger": logger,
+        "checkpointer": checkpointer,
+        "rm_task_spec": rm_task_spec,
+        "master_config": master_config,
+    }
+
+
+def test_exit_on_max_steps(mock_components):
+    """Test that training loop exits when max_num_steps is reached"""
+    # Set max steps to 12, which is less than len(train_dataloader) * max_num_epochs
+    mock_components["master_config"]["rm"]["max_num_steps"] = 12
+
+    rm_save_state = _default_rm_save_state()
+
+    # Run training
+    rm_train(
+        mock_components["policy"],
+        mock_components["train_dataloader"],
+        mock_components["val_dataloader"],
+        mock_components["tokenizer"],
+        mock_components["loss_fn"],
+        mock_components["master_config"],
+        mock_components["logger"],
+        mock_components["rm_task_spec"],
+        mock_components["checkpointer"],
+        rm_save_state,
+    )
+
+    # Verify we only trained for 12 steps.
+    assert mock_components["policy"].train.call_count == 12
+
+
+def test_exit_on_max_epochs(mock_components):
+    """Test that training loop exits when max_num_epochs is reached"""
+    # Set max epochs to 2 and max steps to a large number
+    mock_components["master_config"]["rm"]["max_num_epochs"] = 2
+    mock_components["master_config"]["rm"]["max_num_steps"] = 100
+
+    rm_save_state = _default_rm_save_state()
+
+    # Run training
+    rm_train(
+        mock_components["policy"],
+        mock_components["train_dataloader"],
+        mock_components["val_dataloader"],
+        mock_components["tokenizer"],
+        mock_components["loss_fn"],
+        mock_components["master_config"],
+        mock_components["logger"],
+        mock_components["rm_task_spec"],
+        mock_components["checkpointer"],
+        rm_save_state,
+    )
+
+    # Verify we trained for exactly two epochs (20 batches).
+    assert mock_components["policy"].train.call_count == 20
+
+
+def test_exit_on_timeout(mock_components, capsys):
+    """Test that training loop exits when timeout is reached"""
+    # Set max steps and epochs to large numbers
+    mock_components["master_config"]["rm"]["max_num_steps"] = 100
+    mock_components["master_config"]["rm"]["max_num_epochs"] = 10
+
+    rm_save_state = _default_rm_save_state()
+
+    # Mock TimeoutChecker to return False for first 7 checks, then True (timeout)
+    with patch("nemo_rl.algorithms.rm.TimeoutChecker") as mock_timeout_class:
+        mock_timeout_instance = MagicMock()
+        # Create a side_effect that returns False 7 times, then True
+        check_results = [False] * 7 + [True]
+        mock_timeout_instance.check_save.side_effect = check_results
+        mock_timeout_class.return_value = mock_timeout_instance
+
+        # Run training
+        rm_train(
+            mock_components["policy"],
+            mock_components["train_dataloader"],
+            mock_components["val_dataloader"],
+            mock_components["tokenizer"],
+            mock_components["loss_fn"],
+            mock_components["master_config"],
+            mock_components["logger"],
+            mock_components["rm_task_spec"],
+            mock_components["checkpointer"],
+            rm_save_state,
+        )
+
+        # Verify training stopped at 8 steps (when check_save returned True)
+        assert mock_components["policy"].train.call_count == 8
+
+        # Verify the timeout message was printed and is near the end (not followed by more training)
+        captured = capsys.readouterr()
+        output_lines = captured.out.strip().split("\n")
+
+        # Find the timeout message
+        timeout_line_idx = None
+        for i, line in enumerate(output_lines):
+            if "Timeout has been reached, stopping training early" in line:
+                timeout_line_idx = i
+                break
+
+        assert timeout_line_idx is not None, "Timeout message not found in output"
+
+        # Verify no new epoch started after timeout (which would indicate a bug where break was used instead of return)
+        remaining_lines = output_lines[timeout_line_idx:]
+        for line in remaining_lines:
+            assert "Epoch" not in line or "Epoch 1/10" in line, (
+                f"Training continued to next epoch after timeout: {line}"
+            )
diff --git a/tests/unit/algorithms/test_sequence_packing_gradients.py b/tests/unit/algorithms/test_sequence_packing_gradients.py
index 33d858fbe4..48b3500ff9 100644
--- a/tests/unit/algorithms/test_sequence_packing_gradients.py
+++ b/tests/unit/algorithms/test_sequence_packing_gradients.py
@@ -128,11 +128,16 @@ def test_sequence_packing_gradients(self):
 
         loss_config = {
             "reference_policy_kl_penalty": 0.1,
+            "reference_policy_kl_type": "k3",
+            "kl_input_clamp_value": 20.0,
+            "kl_output_clamp_value": 10.0,
             "ratio_clip_min": 0.2,
             "ratio_clip_max": 0.2,
             "ratio_clip_c": 3.0,
             "use_on_policy_kl_approximation": False,
             "use_importance_sampling_correction": False,
+            "truncated_importance_sampling_ratio": None,
+            "sequence_level_importance_ratios": False,
             "token_level_loss": True,
         }
 
diff --git a/tests/unit/algorithms/test_sft.py b/tests/unit/algorithms/test_sft.py
index 4b6d9ee2ce..e43630651e 100644
--- a/tests/unit/algorithms/test_sft.py
+++ b/tests/unit/algorithms/test_sft.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from unittest.mock import MagicMock
+from unittest.mock import MagicMock, patch
 
 import pytest
 import torch
@@ -29,7 +29,7 @@ def mock_components():
     policy.train.return_value = {
         "loss": torch.tensor(0.5),
         "grad_norm": torch.tensor(1.0),
-        "all_mb_metrics": {},
+        "all_mb_metrics": {"global_valid_toks": [10]},
     }
 
     # Create a proper message log structure with token_ids
@@ -78,7 +78,15 @@ def val_iter(self):
             "train_global_batch_size": 1,
             "make_sequence_length_divisible_by": 8,
         },
-        "checkpointing": {"enabled": False},
+        "checkpointing": {
+            "enabled": False,
+            "checkpoint_must_save_by": None,
+            "save_period": 10,
+        },
+        "cluster": {
+            "num_nodes": 1,
+            "gpus_per_node": 2,
+        },
     }
 
     return {
@@ -143,3 +151,57 @@ def test_exit_on_max_epochs(mock_components):
 
     # Verify we trained for exactly two epochs (20 batches).
     assert mock_components["policy"].train.call_count == 20
+
+
+def test_exit_on_timeout(mock_components, capsys):
+    """Test that training loop exits when timeout is reached"""
+    # Set max steps and epochs to large numbers
+    mock_components["master_config"]["sft"]["max_num_steps"] = 100
+    mock_components["master_config"]["sft"]["max_num_epochs"] = 10
+
+    sft_save_state = _default_sft_save_state()
+
+    # Mock TimeoutChecker to return False for first 7 checks, then True (timeout)
+    with patch("nemo_rl.algorithms.sft.TimeoutChecker") as mock_timeout_class:
+        mock_timeout_instance = MagicMock()
+        # Create a side_effect that returns False 7 times, then True
+        check_results = [False] * 7 + [True]
+        mock_timeout_instance.check_save.side_effect = check_results
+        mock_timeout_class.return_value = mock_timeout_instance
+
+        # Run training
+        sft_train(
+            mock_components["policy"],
+            mock_components["train_dataloader"],
+            mock_components["val_dataloader"],
+            mock_components["tokenizer"],
+            mock_components["loss_fn"],
+            mock_components["master_config"],
+            mock_components["logger"],
+            mock_components["sft_task_spec"],
+            mock_components["checkpointer"],
+            sft_save_state,
+        )
+
+        # Verify training stopped at 8 steps (when check_save returned True)
+        assert mock_components["policy"].train.call_count == 8
+
+        # Verify the timeout message was printed and is near the end (not followed by more training)
+        captured = capsys.readouterr()
+        output_lines = captured.out.strip().split("\n")
+
+        # Find the timeout message
+        timeout_line_idx = None
+        for i, line in enumerate(output_lines):
+            if "Timeout has been reached, stopping training early" in line:
+                timeout_line_idx = i
+                break
+
+        assert timeout_line_idx is not None, "Timeout message not found in output"
+
+        # Verify no new epoch started after timeout (which would indicate a bug where break was used instead of return)
+        remaining_lines = output_lines[timeout_line_idx:]
+        for line in remaining_lines:
+            assert "Epoch" not in line or "Epoch 1/10" in line, (
+                f"Training continued to next epoch after timeout: {line}"
+            )
diff --git a/tests/unit/algorithms/test_utils.py b/tests/unit/algorithms/test_utils.py
index 2dd00a5eeb..edc8d0a812 100755
--- a/tests/unit/algorithms/test_utils.py
+++ b/tests/unit/algorithms/test_utils.py
@@ -12,12 +12,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import math
 from datetime import datetime
 
 import pytest
+import torch
 
-from nemo_rl.algorithms.utils import get_tokenizer
-from nemo_rl.data.hf_datasets.chat_templates import COMMON_CHAT_TEMPLATES
+from nemo_rl.algorithms.utils import (
+    calculate_baseline_and_std_per_prompt,
+    get_tokenizer,
+    maybe_pad_last_batch,
+    print_performance_metrics,
+)
+from nemo_rl.data.chat_templates import COMMON_CHAT_TEMPLATES
+from nemo_rl.distributed.batched_data_dict import BatchedDataDict
 
 
 @pytest.fixture
@@ -130,3 +138,458 @@ def test_get_tokenizer_custom_jinja_template(conversation_messages):
     formatted = tokenizer.apply_chat_template(conversation_messages, tokenize=False)
     expected = get_format_with_simple_role_header(conversation_messages)
     assert formatted == expected
+
+
+def test_maybe_pad_last_batch():
+    """Test maybe_pad_last_batch function for various scenarios"""
+    # Test case 1: No padding needed
+    batch_size = 8
+    dp_size = 2
+    mbs = 2
+
+    batch = BatchedDataDict(
+        {
+            "input_ids": torch.randn(batch_size, 10),
+            "input_lengths": torch.randint(1, 10, (batch_size,)),
+            "sample_mask": torch.ones(batch_size),
+            "token_mask": torch.ones(batch_size, 10),
+            "reference_policy_logprobs": torch.randn(batch_size, 10),
+        }
+    )
+
+    result = maybe_pad_last_batch(batch, dp_size, mbs)
+
+    # Should not be padded since 8 is divisible by (2 * 2) = 4
+    assert result["input_ids"].shape[0] == batch_size
+    assert result["input_lengths"].shape[0] == batch_size
+    assert result["sample_mask"].shape[0] == batch_size
+    assert result["token_mask"].shape[0] == batch_size
+    assert result["reference_policy_logprobs"].shape[0] == batch_size
+
+    # Test case 2: Padding needed
+    batch_size = 7
+    dp_size = 2
+    mbs = 2
+
+    batch = BatchedDataDict(
+        {
+            "input_ids": torch.randn(batch_size, 10),
+            "input_lengths": torch.randint(1, 10, (batch_size,)),
+            "sample_mask": torch.ones(batch_size),
+            "token_mask": torch.ones(batch_size, 10),
+            "reference_policy_logprobs": torch.randn(batch_size, 10),
+        }
+    )
+
+    result = maybe_pad_last_batch(batch, dp_size, mbs)
+
+    # Should be padded to 8 (next multiple of 4)
+    expected_size = 8
+    assert result["input_ids"].shape[0] == expected_size
+    assert result["input_lengths"].shape[0] == expected_size
+    assert result["sample_mask"].shape[0] == expected_size
+    assert result["token_mask"].shape[0] == expected_size
+    assert result["reference_policy_logprobs"].shape[0] == expected_size
+
+    # Check that sample_mask padding is zeros
+    assert torch.allclose(
+        result["sample_mask"][-1], torch.zeros_like(batch["sample_mask"][-1])
+    )
+
+    # Test case 3: Batch without optional fields
+    batch_size = 5
+    dp_size = 3
+    mbs = 2
+
+    batch = BatchedDataDict(
+        {
+            "input_ids": torch.randn(batch_size, 10),
+            "input_lengths": torch.randint(1, 10, (batch_size,)),
+            "sample_mask": torch.ones(batch_size),
+        }
+    )
+
+    result = maybe_pad_last_batch(batch, dp_size, mbs)
+
+    # Should be padded to 6 (next multiple of 3 * 2 = 6)
+    expected_size = 6
+    assert result["input_ids"].shape[0] == expected_size
+    assert result["input_lengths"].shape[0] == expected_size
+    assert result["sample_mask"].shape[0] == expected_size
+    assert "token_mask" not in result
+    assert "reference_policy_logprobs" not in result
+
+
+# Performance Metrics Tests
+
+
+def _base_master_config(colocated: bool):
+    return {
+        "cluster": {"num_nodes": 2, "gpus_per_node": 8},
+        "policy": {
+            "generation": {
+                "colocated": {
+                    "enabled": colocated,
+                    "resources": {"num_nodes": 1, "gpus_per_node": 8},
+                }
+            }
+        },
+        "grpo": {"num_prompts_per_step": 8, "num_generations_per_prompt": 10},
+    }
+
+
+def test_sync_colocated_throughput_flops_and_imbalance(capsys):
+    master_config = _base_master_config(colocated=True)
+
+    timing_metrics = {
+        "policy_and_reference_logprobs": 2.0,
+        "policy_training": 4.0,
+        "total_step_time": 10.0,
+        "generation": 5.0,
+        "weight_sync": 1.0,
+    }
+
+    # total_num_gpus = 2 * 8 = 16
+    # samples_per_step = 8 * 10 = 80
+    metrics = {
+        "total_num_tokens": 8000.0,
+        "per_worker_token_counts": {0: 1000, 1: 2000, 2: 3000, 3: 4000},
+    }
+
+    # total_tflops = total_flops / policy_training / 1e12 = 1e15 / 4 / 1e12 = 250
+    # per-rank TFLOPS message shows 31.25 TFLOPS per rank for 8 ranks
+    train_results = {
+        "total_flops": 1.0e15,
+        "num_ranks": 8,
+        "theoretical_tflops": 500.0,
+    }
+
+    perf = print_performance_metrics(
+        train_results, metrics, timing_metrics, master_config
+    )
+
+    # Validate key throughput metrics
+    assert math.isclose(perf["samples_per_sec_per_gpu"], 0.5, rel_tol=1e-6)
+    assert math.isclose(perf["tokens_per_sec_per_gpu"], 50.0, rel_tol=1e-6)
+    assert math.isclose(
+        perf["policy_training_tokens_per_sec_per_gpu"], 125.0, rel_tol=1e-6
+    )
+    assert math.isclose(
+        perf["policy_and_reference_logprobs_tokens_per_sec_per_gpu"],
+        250.0,
+        rel_tol=1e-6,
+    )
+    assert math.isclose(
+        perf["training_worker_group_tokens_per_sec_per_gpu"],
+        8000.0 / 6.0 / 16.0,
+        rel_tol=1e-6,
+    )
+    assert math.isclose(
+        perf["generation_tokens_per_sec_per_gpu"], 8000.0 / 5.0 / 16.0, rel_tol=1e-6
+    )
+
+    # Group totals
+    assert math.isclose(perf["samples_per_sec"], 8.0, rel_tol=1e-6)
+    assert math.isclose(perf["tokens_per_sec"], 800.0, rel_tol=1e-6)
+    assert math.isclose(
+        perf["training_worker_group_tokens_per_sec"], 8000.0 / 6.0, rel_tol=1e-6
+    )
+
+    # Imbalance metric from ratios [0.25, 0.5, 0.75, 1.0]
+    assert math.isclose(perf["average_token_imbalance"], 0.375, rel_tol=1e-6)
+
+    # Verify selected console output snippets
+    out = capsys.readouterr().out
+    assert "Performance Metrics" in out
+    assert "Throughputs (per GPU)" in out
+    assert "Average Token Imbalance" in out
+    assert "Training FLOPS" in out
+    assert "Floating Point Utilization" in out
+
+
+def test_async_non_colocated_idle_ratio_and_generation_time(capsys):
+    master_config = _base_master_config(colocated=False)
+    master_config["async_grpo"] = {"enabled": True}
+
+    timing_metrics = {
+        "policy_and_reference_logprobs": 2.0,
+        "policy_training": 4.0,
+        "total_step_time": 10.0,
+        "exposed_generation": 2.0,
+        "prepare_for_generation/total": 1.0,
+    }
+
+    # total_num_gpus = 16, training_num_gpus = 8, generation_num_gpus = 8
+    metrics = {
+        "total_num_tokens": 6050.0,
+        "per_worker_token_counts": [{0: 3000}, {1: 3050}],
+    }
+
+    train_results = {}
+
+    perf = print_performance_metrics(
+        train_results, metrics, timing_metrics, master_config
+    )
+
+    # Throughput checks
+    assert math.isclose(perf["samples_per_sec_per_gpu"], 0.5, rel_tol=1e-6)
+    assert math.isclose(
+        perf["tokens_per_sec_per_gpu"], 6050.0 / 10.0 / 16.0, rel_tol=1e-6
+    )
+    assert math.isclose(
+        perf["policy_training_tokens_per_sec_per_gpu"],
+        6050.0 / 4.0 / 8.0,
+        rel_tol=1e-6,
+    )
+    assert math.isclose(
+        perf["policy_and_reference_logprobs_tokens_per_sec_per_gpu"],
+        6050.0 / 2.0 / 8.0,
+        rel_tol=1e-6,
+    )
+    assert math.isclose(
+        perf["training_worker_group_tokens_per_sec_per_gpu"],
+        6050.0 / (4.0 + 2.0) / 8.0,
+        rel_tol=1e-6,
+    )
+    # generation_time = 2 + 2 + 4 = 8.0, per-gpu = 6050 / 8.0 / 8.0
+    assert math.isclose(
+        perf["generation_tokens_per_sec_per_gpu"], 6050.0 / 8.0 / 8.0, rel_tol=1e-6
+    )
+
+    # Aggregated worker counts: {0: 3000, 1: 3050} -> imbalance = 0.05
+    imbalance = ((3050 - 3000) / 3050) / 2
+    assert math.isclose(perf["average_token_imbalance"], imbalance, rel_tol=1e-6)
+
+
+def test_minimal_inputs_no_counts_no_flops(capsys):
+    master_config = _base_master_config(colocated=False)
+
+    timing_metrics = {
+        "policy_and_reference_logprobs": 1.0,
+        "policy_training": 3.0,
+        "total_step_time": 8.0,
+        "exposed_generation": 0.2,
+        "prepare_for_generation/total": 0.5,
+    }
+
+    metrics = {
+        "total_num_tokens": 1600.0,
+        # no per_worker_token_counts present
+    }
+
+    train_results = {}
+
+    perf = print_performance_metrics(
+        train_results, metrics, timing_metrics, master_config
+    )
+
+    # Core metrics exist
+    for k in [
+        "samples_per_sec",
+        "tokens_per_sec",
+        "samples_per_sec_per_gpu",
+        "tokens_per_sec_per_gpu",
+    ]:
+        assert k in perf
+
+    out = capsys.readouterr().out
+    assert "Throughputs (per GPU)" in out
+
+
+# ============================================================================
+# Tests for calculate_baseline_and_std_per_prompt function
+# ============================================================================
+
+
+def test_calculate_baseline_and_std_per_prompt_basic():
+    """Test basic functionality of calculate_baseline_and_std_per_prompt."""
+    # Create rewards for 2 prompts, each with 3 generations
+    rewards = torch.tensor([1.0, 2.0, 3.0, 4.0, 5.0, 6.0])
+    prompts = torch.tensor(
+        [
+            [1, 2, 3],  # prompt 0
+            [1, 2, 3],  # prompt 0
+            [1, 2, 3],  # prompt 0
+            [4, 5, 6],  # prompt 1
+            [4, 5, 6],  # prompt 1
+            [4, 5, 6],  # prompt 1
+        ]
+    )
+    valid_mask = torch.ones(6)
+
+    baseline, std = calculate_baseline_and_std_per_prompt(prompts, rewards, valid_mask)
+
+    expected_baseline = torch.tensor([2.5, 2.0, 1.5, 5.5, 5.0, 4.5])
+    expected_std = torch.tensor(
+        [0.707107, 1.414214, 0.707107, 0.707107, 1.414214, 0.707107]
+    )
+
+    assert torch.allclose(baseline, expected_baseline, rtol=1e-5)
+    assert torch.allclose(std, expected_std, rtol=1e-5)
+
+
+def test_calculate_baseline_and_std_per_prompt_single_generation_per_prompt():
+    """Test calculate_baseline_and_std_per_prompt when num_valid < 2 (single generation per prompt)."""
+    # Case where each prompt has only 1 generation (num_valid = 1 < 2)
+    rewards = torch.tensor([2.5, 4.0])
+    prompts = torch.tensor(
+        [
+            [1, 2, 3],  # prompt 0
+            [4, 5, 6],  # prompt 1
+        ]
+    )
+    valid_mask = torch.ones(2)
+
+    baseline, std = calculate_baseline_and_std_per_prompt(prompts, rewards, valid_mask)
+
+    # When num_valid <= 1 (single generation per prompt), baseline equals reward
+    expected_baseline = torch.tensor([2.5, 4.0])
+    expected_std = torch.tensor([0.0, 0.0])
+
+    assert torch.allclose(baseline, expected_baseline, rtol=1e-5)
+    assert torch.allclose(std, expected_std, rtol=1e-5)
+
+
+def test_calculate_baseline_and_std_per_prompt_identical_rewards():
+    """Test calculate_baseline_and_std_per_prompt when all rewards for a prompt are identical."""
+    # All generations for both prompts have the same reward
+    rewards = torch.tensor([3.0, 3.0, 3.0, 7.0, 7.0, 7.0])
+    prompts = torch.tensor(
+        [
+            [1, 2, 3],  # prompt 0
+            [1, 2, 3],  # prompt 0
+            [1, 2, 3],  # prompt 0
+            [4, 5, 6],  # prompt 1
+            [4, 5, 6],  # prompt 1
+            [4, 5, 6],  # prompt 1
+        ]
+    )
+    valid_mask = torch.ones(6)
+
+    baseline, std = calculate_baseline_and_std_per_prompt(prompts, rewards, valid_mask)
+
+    expected_baseline = torch.tensor([3.0, 3.0, 3.0, 7.0, 7.0, 7.0])
+    expected_std = torch.tensor([0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
+
+    assert torch.allclose(baseline, expected_baseline, rtol=1e-5)
+    assert torch.allclose(std, expected_std, rtol=1e-5)
+
+
+def test_calculate_baseline_and_std_per_prompt_mixed_prompt_sizes():
+    """Test calculate_baseline_and_std_per_prompt with different number of generations per prompt."""
+    # Prompt 0 has 2 generations, Prompt 1 has 3 generations
+    rewards = torch.tensor([1.0, 2.0, 4.0, 5.0, 6.0])
+    prompts = torch.tensor(
+        [
+            [1, 2, 3],  # prompt 0
+            [1, 2, 3],  # prompt 0
+            [4, 5, 6],  # prompt 1
+            [4, 5, 6],  # prompt 1
+            [4, 5, 6],  # prompt 1
+        ]
+    )
+    valid_mask = torch.ones(5)
+
+    baseline, std = calculate_baseline_and_std_per_prompt(prompts, rewards, valid_mask)
+
+    expected_baseline = torch.tensor([2.0, 1.0, 5.5, 5.0, 4.5])
+    expected_std = torch.tensor([0.0, 0.0, 0.707107, 1.414214, 0.707107])
+
+    assert torch.allclose(baseline, expected_baseline, rtol=1e-5)
+    assert torch.allclose(std, expected_std, rtol=1e-5)
+
+
+def test_calculate_baseline_and_std_per_prompt_empty_input():
+    """Test calculate_baseline_and_std_per_prompt with empty tensors."""
+    rewards = torch.tensor([])
+    prompts = torch.empty(0, 3, dtype=torch.long)
+    valid_mask = torch.tensor([])
+
+    baseline, std = calculate_baseline_and_std_per_prompt(prompts, rewards, valid_mask)
+
+    assert baseline.shape == torch.Size([0])
+    assert std.shape == torch.Size([0])
+    assert torch.equal(baseline, torch.tensor([]))
+    assert torch.equal(std, torch.tensor([]))
+
+
+def test_calculate_baseline_and_std_per_prompt_nan_handling():
+    """Test calculate_baseline_and_std_per_prompt handles valid_mask correctly with masked samples."""
+    # Test that valid_mask properly excludes samples from baseline calculation
+    # Note: The function doesn't handle actual NaN values; it uses valid_mask to exclude samples
+    rewards = torch.tensor([1.0, 999.0, 3.0, 4.0, 5.0, 6.0])  # 999.0 should be ignored
+    prompts = torch.tensor(
+        [
+            [1, 2, 3],  # prompt 0
+            [1, 2, 3],  # prompt 0 (invalid sample)
+            [1, 2, 3],  # prompt 0
+            [4, 5, 6],  # prompt 1
+            [4, 5, 6],  # prompt 1
+            [4, 5, 6],  # prompt 1
+        ]
+    )
+    # Mark the second sample as invalid
+    valid_mask = torch.tensor([1.0, 0.0, 1.0, 1.0, 1.0, 1.0])
+
+    baseline, std = calculate_baseline_and_std_per_prompt(prompts, rewards, valid_mask)
+
+    expected_baseline = torch.tensor([3.0, 4.0, 1.0, 5.5, 5.0, 4.5])
+    expected_std = torch.tensor([0.0, 0.0, 0.0, 0.707107, 1.414214, 0.707107])
+
+    assert torch.allclose(baseline, expected_baseline, rtol=1e-5)
+    assert torch.allclose(std, expected_std, rtol=1e-5)
+
+
+def test_calculate_baseline_and_std_per_prompt_cuda_compatibility():
+    """Test calculate_baseline_and_std_per_prompt works with CUDA tensors if available."""
+    if not torch.cuda.is_available():
+        pytest.skip("CUDA not available")
+
+    rewards = torch.tensor([1.0, 2.0, 3.0, 4.0]).cuda()
+    prompts = torch.tensor(
+        [
+            [1, 2, 3],  # prompt 0
+            [1, 2, 3],  # prompt 0
+            [4, 5, 6],  # prompt 1
+            [4, 5, 6],  # prompt 1
+        ]
+    ).cuda()
+    valid_mask = torch.ones(4).cuda()
+
+    baseline, std = calculate_baseline_and_std_per_prompt(prompts, rewards, valid_mask)
+
+    # Verify results are on CUDA and have expected values
+    assert baseline.device.type == "cuda"
+    assert std.device.type == "cuda"
+
+    expected_baseline = torch.tensor([2.0, 1.0, 4.0, 3.0]).cuda()
+    expected_std = torch.tensor([0.0, 0.0, 0.0, 0.0]).cuda()
+
+    assert torch.allclose(baseline, expected_baseline, rtol=1e-5)
+    assert torch.allclose(std, expected_std, rtol=1e-5)
+
+
+def test_calculate_baseline_and_std_per_prompt_numerical_precision():
+    """Test calculate_baseline_and_std_per_prompt with edge case numerical values."""
+    # Use very small and very large values
+    rewards = torch.tensor([1e-8, 2e-8, 3e-8, 1e8, 2e8, 3e8])
+    prompts = torch.tensor(
+        [
+            [1, 2, 3],  # prompt 0
+            [1, 2, 3],  # prompt 0
+            [1, 2, 3],  # prompt 0
+            [4, 5, 6],  # prompt 1
+            [4, 5, 6],  # prompt 1
+            [4, 5, 6],  # prompt 1
+        ]
+    )
+    valid_mask = torch.ones(6)
+
+    baseline, std = calculate_baseline_and_std_per_prompt(prompts, rewards, valid_mask)
+
+    expected_baseline = torch.tensor([2.5e-8, 2e-8, 1.5e-8, 2.5e8, 2e8, 1.5e8])
+
+    assert torch.allclose(baseline, expected_baseline, rtol=1e-5)
+    # Std values should be finite and not NaN
+    assert torch.isfinite(std).all()
+    assert not torch.isnan(std).any()
diff --git a/tests/unit/algorithms/utils.py b/tests/unit/algorithms/utils.py
new file mode 100644
index 0000000000..37eefe28f3
--- /dev/null
+++ b/tests/unit/algorithms/utils.py
@@ -0,0 +1,75 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+
+from nemo_rl.data.interfaces import DatumSpec, LLMMessageLogType
+from nemo_rl.distributed.batched_data_dict import BatchedDataDict
+
+
+def create_mock_batch_with_responses(
+    num_samples: int,
+    response_lengths: list[int],
+    initial_rewards: list[float],
+    task_names: list[str] = None,
+) -> BatchedDataDict[DatumSpec]:
+    """Helper function to create a mock batch with specified response lengths and initial rewards."""
+    if task_names is None:
+        task_names = ["math"] * num_samples
+
+    message_logs = []
+    for i, length in enumerate(response_lengths):
+        # Create dummy token_ids for assistant response with specified length
+        assistant_tokens = torch.arange(length, dtype=torch.long)
+        user_tokens = torch.tensor([100, 101, 102], dtype=torch.long)
+
+        message_log = [
+            {"role": "user", "content": f"Question {i}", "token_ids": user_tokens},
+            {
+                "role": "assistant",
+                "content": f"Response {i}",
+                "token_ids": assistant_tokens,
+            },
+        ]
+        message_logs.append(message_log)
+
+    return BatchedDataDict[DatumSpec](
+        {
+            "task_name": task_names,
+            "message_log": message_logs,
+            "extra_env_info": [{} for _ in range(num_samples)],
+            "loss_multiplier": torch.ones(num_samples),
+            "total_reward": torch.tensor(initial_rewards),
+        }
+    )
+
+
+def create_mock_batch(
+    num_samples: int,
+    task_names: list[str],
+    message_logs: list[LLMMessageLogType],
+    extra_env_info: list[dict] = None,
+) -> BatchedDataDict[DatumSpec]:
+    """Helper function to create a mock batch for testing."""
+    if extra_env_info is None:
+        extra_env_info = [{} for _ in range(num_samples)]
+
+    return BatchedDataDict[DatumSpec](
+        {
+            "task_name": task_names,
+            "message_log": message_logs,
+            "extra_env_info": extra_env_info,
+            "loss_multiplier": torch.ones(num_samples),
+        }
+    )
diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py
index 1346a1173d..ab3368185c 100644
--- a/tests/unit/conftest.py
+++ b/tests/unit/conftest.py
@@ -45,44 +45,104 @@ def pytest_addoption(parser):
         default=False,
         help="Run ONLY mcore tests (combine with --hf-gated to include mcore+hf_gated tests)",
     )
+    parser.addoption(
+        "--automodel-only",
+        action="store_true",
+        default=False,
+        help="Run ONLY automodel tests",
+    )
+    parser.addoption(
+        "--vllm-only",
+        action="store_true",
+        default=False,
+        help="Run ONLY vllm tests",
+    )
 
 
 def pytest_collection_modifyitems(config, items):
     """Modify test collection to skip tests based on markers unless explicitly requested."""
     run_hf_gated = config.getoption("--hf-gated")
     run_mcore_only = config.getoption("--mcore-only")
+    run_automodel_only = config.getoption("--automodel-only")
+    run_vllm_only = config.getoption("--vllm-only")
+
+    # Check for mutually exclusive options
+    exclusive_options = [run_mcore_only, run_automodel_only, run_vllm_only]
+    if sum(exclusive_options) > 1:
+        raise ValueError(
+            "--mcore-only, --automodel-only, and --vllm-only are mutually exclusive"
+        )
+
     marker_expr = config.getoption("-m", default="")
 
-    # If user specified -m marker expressions, let pytest handle everything normally
+    # If user specified -m marker expressions, still prioritize run_first tests
     if marker_expr:
+        items.sort(key=lambda item: 0 if item.get_closest_marker("run_first") else 1)
         return
 
-    # Filter tests based on the desired configurations
-    new_items = []
+    # Start with all items and apply filters sequentially
+    new_items = list(items)
 
-    if run_mcore_only and run_hf_gated:
-        # Configuration 4: Only mcore tests, including ones with hf_gated
-        new_items = [item for item in items if item.get_closest_marker("mcore")]
-    elif run_mcore_only:
-        # Configuration 3: Only mcore tests, excluding ones with hf_gated
+    # Filter by hf_gated marker
+    if not run_hf_gated:
+        # Exclude hf_gated tests unless explicitly requested
         new_items = [
-            item
-            for item in items
-            if item.get_closest_marker("mcore")
-            and not item.get_closest_marker("hf_gated")
+            item for item in new_items if not item.get_closest_marker("hf_gated")
         ]
-    elif run_hf_gated:
-        # Configuration 2: Default tests + hf_gated tests, excluding mcore
-        new_items = [item for item in items if not item.get_closest_marker("mcore")]
+
+    # Filter by mcore marker
+    if run_mcore_only:
+        # Validate that megatron.core is available
+        try:
+            import megatron.core  # noqa: F401
+        except ImportError:
+            raise ImportError(
+                "Cannot run mcore tests: megatron.core is not available.\n"
+                "Please run tests with: uv run --extra mcore --group test pytest ..."
+            )
+        # Include only mcore tests
+        new_items = [item for item in new_items if item.get_closest_marker("mcore")]
+    else:
+        # Exclude mcore tests by default
+        new_items = [item for item in new_items if not item.get_closest_marker("mcore")]
+
+    # Filter by automodel marker
+    if run_automodel_only:
+        # Validate that nemo_automodel is available
+        try:
+            import nemo_automodel  # noqa: F401
+        except ImportError:
+            raise ImportError(
+                "Cannot run automodel tests: nemo_automodel is not available.\n"
+                "Please run tests with: uv run --extra automodel --group test pytest ..."
+            )
+        # Include only automodel tests
+        new_items = [item for item in new_items if item.get_closest_marker("automodel")]
     else:
-        # Configuration 1: Default only - exclude both hf_gated and mcore
+        # Exclude automodel tests by default
         new_items = [
-            item
-            for item in items
-            if not item.get_closest_marker("hf_gated")
-            and not item.get_closest_marker("mcore")
+            item for item in new_items if not item.get_closest_marker("automodel")
         ]
 
+    # Filter by vllm marker
+    if run_vllm_only:
+        # Validate that vllm is available
+        try:
+            import vllm  # noqa: F401
+        except ImportError:
+            raise ImportError(
+                "Cannot run vllm tests: vllm is not available.\n"
+                "Please run tests with: uv run --extra vllm --group test pytest ..."
+            )
+        # Include only vllm tests
+        new_items = [item for item in new_items if item.get_closest_marker("vllm")]
+    else:
+        # Exclude vllm tests by default
+        new_items = [item for item in new_items if not item.get_closest_marker("vllm")]
+
+    # Ensure run_first tests are prioritized
+    new_items.sort(key=lambda item: 0 if item.get_closest_marker("run_first") else 1)
+
     # Update the items list in-place
     items[:] = new_items
 
@@ -576,3 +636,49 @@ def tiny_gemma3_model_path():
     tokenizer.save_pretrained(model_path)
     del model, tokenizer
     yield model_path
+
+
+def _build_tiny_nemotron5_h_checkpoint(model_path: str) -> None:
+    import shutil
+
+    from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
+
+    config = AutoConfig.from_pretrained(
+        "nvidia/Nemotron-H-8B-Base-8K", trust_remote_code=True
+    )
+    config.hybrid_override_pattern = "M*-"
+    config.num_hidden_layers = 3
+    config.intermediate_size = 32
+    config.hidden_size = 256
+    config.num_attention_heads = 8
+    config.mamba_num_heads = 8
+    config.num_key_value_heads = 8
+    config.n_groups = 1
+
+    model = AutoModelForCausalLM.from_config(config, trust_remote_code=True)
+    tokenizer = AutoTokenizer.from_pretrained(
+        "nvidia/Nemotron-H-8B-Base-8K", trust_remote_code=True
+    )
+
+    shutil.rmtree(model_path, ignore_errors=True)
+    model.save_pretrained(model_path)
+    tokenizer.save_pretrained(model_path)
+
+
+@pytest.fixture(scope="session")
+def tiny_nemotron5_h_model_path():
+    """Fixture that returns a path to a tiny nemotron model with a dummy tokenizer.
+
+    If the asset hasn't been prepared by the prepare script, skip the tests that require it.
+    """
+    model_path = os.path.join(
+        TEST_ASSETS_DIR, "tiny_nemotron5_h_with_nemotron_tokenizer"
+    )
+
+    config_file = os.path.join(model_path, "config.json")
+    if not os.path.exists(config_file):
+        pytest.skip(
+            "Tiny Nemotron-H test asset not prepared. Run `uv run tests/unit/prepare_unit_test_assets.py` first."
+        )
+
+    yield model_path
diff --git a/tests/unit/data/datasets/test_eval_dataset.py b/tests/unit/data/datasets/test_eval_dataset.py
new file mode 100644
index 0000000000..dc567d09d7
--- /dev/null
+++ b/tests/unit/data/datasets/test_eval_dataset.py
@@ -0,0 +1,116 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import pytest
+from transformers import AutoTokenizer
+
+from nemo_rl.data.datasets import load_eval_dataset
+
+
+@pytest.mark.skip(reason="dataset download is flaky")
+def test_gpqa_dataset():
+    # load the dataset
+    data_config = {
+        "dataset_name": "gpqa",
+        "prompt_file": None,
+        "system_prompt_file": None,
+    }
+    gpqa_dataset = load_eval_dataset(data_config)
+
+    # load the tokenizer
+    tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-1.5B-Instruct")
+
+    # check that the dataset is formatted correctly
+    for example in gpqa_dataset.rekeyed_ds.take(5):
+        assert "question" in example
+        assert "options" in example
+        assert "answer" in example
+
+        ## check that applying chat template works as expected
+        default_templated = tokenizer.apply_chat_template(
+            [{"role": "user", "content": example["question"]}],
+            tokenize=False,
+            add_generation_prompt=False,
+            add_special_tokens=False,
+        )
+
+        assert (
+            default_templated
+            == f"<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n<|im_start|>user\n{example['question']}<|im_end|>\n"
+        )
+
+
+@pytest.mark.skip(reason="dataset download is flaky")
+def test_math_dataset():
+    # load the dataset
+    data_config = {
+        "dataset_name": "math",
+        "prompt_file": None,
+        "system_prompt_file": None,
+    }
+    math_dataset = load_eval_dataset(data_config)
+
+    # load the tokenizer
+    tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-1.5B-Instruct")
+
+    # check that the dataset is formatted correctly
+    for example in math_dataset.rekeyed_ds.take(5):
+        assert "problem" in example
+        assert "expected_answer" in example
+
+        ## check that applying chat template works as expected
+        default_templated = tokenizer.apply_chat_template(
+            [{"role": "user", "content": example["problem"]}],
+            tokenize=False,
+            add_generation_prompt=False,
+            add_special_tokens=False,
+        )
+
+        assert (
+            default_templated
+            == f"<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n<|im_start|>user\n{example['problem']}<|im_end|>\n"
+        )
+
+
+@pytest.mark.skip(reason="dataset download is flaky")
+def test_mmlu_dataset():
+    # load the dataset
+    data_config = {
+        "dataset_name": "mmlu",
+        "prompt_file": None,
+        "system_prompt_file": None,
+    }
+    mmlu_dataset = load_eval_dataset(data_config)
+
+    # load the tokenizer
+    tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-1.5B-Instruct")
+
+    # check that the dataset is formatted correctly
+    for example in mmlu_dataset.rekeyed_ds.take(5):
+        assert "question" in example
+        assert "options" in example
+        assert "answer" in example
+        assert "subject" in example
+
+        ## check that applying chat template works as expected
+        default_templated = tokenizer.apply_chat_template(
+            [{"role": "user", "content": example["question"]}],
+            tokenize=False,
+            add_generation_prompt=False,
+            add_special_tokens=False,
+        )
+
+        assert (
+            default_templated
+            == f"<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n<|im_start|>user\n{example['question']}<|im_end|>\n"
+        )
diff --git a/tests/unit/data/datasets/test_helpsteer.py b/tests/unit/data/datasets/test_helpsteer.py
new file mode 100644
index 0000000000..337a2b630d
--- /dev/null
+++ b/tests/unit/data/datasets/test_helpsteer.py
@@ -0,0 +1,151 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import pytest
+
+from nemo_rl.data.datasets.preference_datasets import HelpSteer3Dataset
+from nemo_rl.data.datasets.preference_datasets.helpsteer3 import (
+    to_preference_data_format,
+)
+
+
+@pytest.fixture(scope="module")
+def helpsteer3_dataset():
+    try:
+        dataset = HelpSteer3Dataset()
+        yield dataset
+    except Exception as e:
+        print(f"Error during loading HelpSteer3Dataset: {e}")
+        yield
+
+
+def test_to_preference_data_format():
+    """Test the `to_preference_data_format()` function with different preference values."""
+    # Test case 1: response1 is preferred (overall_preference < 0)
+    data1 = {
+        "context": "What is 2+2?",
+        "response1": "The answer is 4.",
+        "response2": "I don't know.",
+        "overall_preference": -1,
+    }
+    result1 = to_preference_data_format(data1)
+    assert result1["context"] == [{"content": "What is 2+2?", "role": "user"}]
+    assert result1["completions"] == [
+        {
+            "rank": 0,
+            "completion": [{"role": "assistant", "content": "The answer is 4."}],
+        },
+        {"rank": 1, "completion": [{"role": "assistant", "content": "I don't know."}]},
+    ]
+
+    # Test case 2: response2 is preferred (overall_preference > 0)
+    data2 = {
+        "context": "What is the capital of France?",
+        "response1": "The capital of France is London.",
+        "response2": "The capital of France is Paris.",
+        "overall_preference": 1,
+    }
+    result2 = to_preference_data_format(data2)
+    assert result2["context"] == [
+        {"content": "What is the capital of France?", "role": "user"}
+    ]
+    assert result2["completions"] == [
+        {
+            "rank": 0,
+            "completion": [
+                {"role": "assistant", "content": "The capital of France is Paris."}
+            ],
+        },
+        {
+            "rank": 1,
+            "completion": [
+                {"role": "assistant", "content": "The capital of France is London."}
+            ],
+        },
+    ]
+
+    # Test case 3: no preference (overall_preference = 0)
+    data3 = {
+        "context": "What is the weather like?",
+        "response1": "It's sunny today.",
+        "response2": "The weather is sunny.",
+        "overall_preference": 0,
+    }
+    result3 = to_preference_data_format(data3)
+    assert result3["context"] == [
+        {"content": "What is the weather like?", "role": "user"}
+    ]
+    # When preference is 0, neither response is preferred, so
+    # response 1 is used for both chosen and rejected
+    assert result3["completions"] == [
+        {
+            "rank": 0,
+            "completion": [{"role": "assistant", "content": "It's sunny today."}],
+        },
+        {
+            "rank": 1,
+            "completion": [{"role": "assistant", "content": "It's sunny today."}],
+        },
+    ]
+
+    # Test case 4: context is a list of dicts
+    data1 = {
+        "context": [
+            {"role": "user", "content": "Can I ask you a question?"},
+            {"role": "assistant", "content": "Sure, what do you want to know?"},
+            {"role": "user", "content": "What is 2+2?"},
+        ],
+        "response1": "4.",
+        "response2": "I don't know.",
+        "overall_preference": -1,
+    }
+    result1 = to_preference_data_format(data1)
+    assert result1["context"] == [
+        {"role": "user", "content": "Can I ask you a question?"},
+        {"role": "assistant", "content": "Sure, what do you want to know?"},
+        {"role": "user", "content": "What is 2+2?"},
+    ]
+    assert result1["completions"] == [
+        {"rank": 0, "completion": [{"role": "assistant", "content": "4."}]},
+        {"rank": 1, "completion": [{"role": "assistant", "content": "I don't know."}]},
+    ]
+
+
+def test_helpsteer3_dataset_initialization(helpsteer3_dataset):
+    """Test that HelpSteer3Dataset initializes correctly."""
+
+    dataset = helpsteer3_dataset
+    if dataset is None:
+        pytest.skip("dataset download is flaky")
+
+    # Verify dataset initialization
+    assert dataset.task_spec.task_name == "HelpSteer3"
+
+
+def test_helpsteer3_dataset_data_format(helpsteer3_dataset):
+    """Test that HelpSteer3Dataset correctly formats the data."""
+
+    dataset = helpsteer3_dataset
+    if dataset is None:
+        pytest.skip("dataset download is flaky")
+
+    assert isinstance(dataset.formatted_ds, dict)
+    assert "train" in dataset.formatted_ds
+    assert "validation" in dataset.formatted_ds
+
+    # Verify data format
+    sample = dataset.formatted_ds["train"][0]
+    assert "context" in sample
+    assert "completions" in sample
diff --git a/tests/unit/data/hf_datasets/test_oai_format_dataset.py b/tests/unit/data/datasets/test_oai_format_dataset.py
similarity index 96%
rename from tests/unit/data/hf_datasets/test_oai_format_dataset.py
rename to tests/unit/data/datasets/test_oai_format_dataset.py
index ae6b878779..aad989ed15 100644
--- a/tests/unit/data/hf_datasets/test_oai_format_dataset.py
+++ b/tests/unit/data/datasets/test_oai_format_dataset.py
@@ -18,10 +18,8 @@
 import pytest
 from transformers import AutoTokenizer
 
-from nemo_rl.data.hf_datasets.chat_templates import COMMON_CHAT_TEMPLATES
-from nemo_rl.data.hf_datasets.oai_format_dataset import (
-    OpenAIFormatDataset,
-)
+from nemo_rl.data.chat_templates import COMMON_CHAT_TEMPLATES
+from nemo_rl.data.datasets.response_datasets import OpenAIFormatDataset
 
 
 @pytest.fixture
diff --git a/tests/unit/data/datasets/test_preference_dataset.py b/tests/unit/data/datasets/test_preference_dataset.py
new file mode 100644
index 0000000000..c4633e08b8
--- /dev/null
+++ b/tests/unit/data/datasets/test_preference_dataset.py
@@ -0,0 +1,240 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import os
+import tempfile
+
+import pytest
+
+from nemo_rl.data.datasets import load_preference_dataset
+
+
+@pytest.fixture
+def mock_preference_data():
+    """Create temporary preference dataset files with sample data."""
+    preference_data = [
+        {
+            "context": [{"role": "user", "content": "What is 2+2?"}],
+            "completions": [
+                {
+                    "rank": 1,
+                    "completion": [
+                        {"role": "assistant", "content": "The answer is 4."}
+                    ],
+                },
+                {
+                    "rank": 2,
+                    "completion": [{"role": "assistant", "content": "I don't know."}],
+                },
+            ],
+        },
+        {
+            "context": [{"role": "user", "content": "What is the capital of France?"}],
+            "completions": [
+                {
+                    "rank": 1,
+                    "completion": [
+                        {
+                            "role": "assistant",
+                            "content": "The capital of France is Paris.",
+                        }
+                    ],
+                },
+                {
+                    "rank": 2,
+                    "completion": [
+                        {
+                            "role": "assistant",
+                            "content": "The capital of France is London.",
+                        }
+                    ],
+                },
+            ],
+        },
+    ]
+
+    with tempfile.NamedTemporaryFile(
+        mode="w", suffix=".json", delete=False
+    ) as preference_file:
+        json.dump(preference_data, preference_file)
+        preference_path = preference_file.name
+
+    try:
+        yield preference_path
+    finally:
+        # Cleanup
+        os.unlink(preference_path)
+
+
+def test_preference_dataset_initialization(mock_preference_data):
+    """Test that PreferenceDataset initializes correctly with valid data files."""
+    # Load the dataset
+    data_config = {
+        "dataset_name": "PreferenceDataset",
+        "train_data_path": mock_preference_data,
+    }
+    dataset = load_preference_dataset(data_config)
+
+    # Verify dataset initialization
+    assert dataset.task_spec.task_name == "PreferenceDataset"
+
+    # Verify formatted_ds structure
+    assert "train" in dataset.formatted_ds
+    assert len(dataset.formatted_ds["train"]) == 2
+
+
+def test_preference_dataset_data_format(mock_preference_data):
+    """Test that PreferenceDataset correctly loads and formats the data."""
+    # Load the dataset
+    data_config = {
+        "dataset_name": "PreferenceDataset",
+        "train_data_path": mock_preference_data,
+    }
+    dataset = load_preference_dataset(data_config)
+
+    # Verify data format
+    sample = dataset.formatted_ds["train"][0]
+    assert "context" in sample
+    assert "completions" in sample
+
+    # Verify context structure
+    assert isinstance(sample["context"], list)
+    assert len(sample["context"]) == 1
+    assert "role" in sample["context"][0]
+    assert "content" in sample["context"][0]
+
+    # Verify completions structure
+    assert isinstance(sample["completions"], list)
+    assert len(sample["completions"]) == 2
+
+    for completion in sample["completions"]:
+        assert "rank" in completion
+        assert "completion" in completion
+        assert isinstance(completion["rank"], int)
+        assert isinstance(completion["completion"], list)
+
+
+@pytest.fixture
+def mock_binary_preference_data():
+    """Create temporary chosen_rejected dataset files with sample data."""
+    train_data = [
+        {
+            "prompt": "What is 2+2?",
+            "chosen_response": "The answer is 4.",
+            "rejected_response": "I don't know.",
+        },
+        {
+            "prompt": "What is the capital of France?",
+            "chosen_response": "The capital of France is Paris.",
+            "rejected_response": "The capital of France is London.",
+        },
+    ]
+
+    val_data = [
+        {
+            "prompt": "What is 3*3?",
+            "chosen_response": "The answer is 9.",
+            "rejected_response": "The answer is 6.",
+        }
+    ]
+
+    with tempfile.NamedTemporaryFile(
+        mode="w", suffix=".json", delete=False
+    ) as train_file:
+        json.dump(train_data, train_file)
+        train_path = train_file.name
+
+    with tempfile.NamedTemporaryFile(
+        mode="w", suffix=".json", delete=False
+    ) as val_file:
+        json.dump(val_data, val_file)
+        val_path = val_file.name
+
+    try:
+        yield train_path, val_path
+    finally:
+        # Cleanup
+        os.unlink(train_path)
+        os.unlink(val_path)
+
+
+def test_binary_preference_dataset_initialization(mock_binary_preference_data):
+    """Test that PreferenceDataset initializes correctly with valid data files."""
+    # Load the dataset
+    train_path, val_path = mock_binary_preference_data
+    data_config = {
+        "dataset_name": "BinaryPreferenceDataset",
+        "train_data_path": train_path,
+        "val_data_path": val_path,
+        "prompt_key": "prompt",
+        "chosen_key": "chosen_response",
+        "rejected_key": "rejected_response",
+    }
+    dataset = load_preference_dataset(data_config)
+
+    # Verify dataset initialization
+    assert dataset.task_spec.task_name == "BinaryPreferenceDataset"
+
+    # Verify formatted_ds structure
+    assert "train" in dataset.formatted_ds
+    assert "validation" in dataset.formatted_ds
+
+    assert len(dataset.formatted_ds["train"]) == 2
+    assert len(dataset.formatted_ds["validation"]) == 1
+
+
+def test_binary_preference_dataset_invalid_files():
+    """Test that PreferenceDataset raises appropriate errors with invalid files."""
+    with pytest.raises(FileNotFoundError):
+        data_config = {
+            "dataset_name": "BinaryPreferenceDataset",
+            "train_data_path": "nonexistent.json",
+            "val_data_path": "nonexistent.json",
+            "prompt_key": "prompt",
+            "chosen_key": "chosen_response",
+            "rejected_key": "rejected_response",
+        }
+        load_preference_dataset(data_config)
+
+
+def test_binary_preference_dataset_data_format(mock_binary_preference_data):
+    """Test that PreferenceDataset correctly formats the data."""
+    # Load the dataset
+    train_path, val_path = mock_binary_preference_data
+    data_config = {
+        "dataset_name": "BinaryPreferenceDataset",
+        "train_data_path": train_path,
+        "val_data_path": val_path,
+        "prompt_key": "prompt",
+        "chosen_key": "chosen_response",
+        "rejected_key": "rejected_response",
+    }
+    dataset = load_preference_dataset(data_config)
+
+    # Verify data format
+    train_sample = dataset.formatted_ds["train"][0]
+    assert "context" in train_sample
+    assert "completions" in train_sample
+
+    # Verify data content
+    print(train_sample["completions"])
+    assert train_sample["context"] == [{"content": "What is 2+2?", "role": "user"}]
+    assert train_sample["completions"] == [
+        {
+            "completion": [{"content": "The answer is 4.", "role": "assistant"}],
+            "rank": 0,
+        },
+        {"completion": [{"content": "I don't know.", "role": "assistant"}], "rank": 1},
+    ]
diff --git a/tests/unit/data/datasets/test_preserving_dataset.py b/tests/unit/data/datasets/test_preserving_dataset.py
new file mode 100644
index 0000000000..9c16a6ffeb
--- /dev/null
+++ b/tests/unit/data/datasets/test_preserving_dataset.py
@@ -0,0 +1,315 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import tempfile
+
+import pytest
+from datasets import Dataset
+
+from nemo_rl.data.datasets.response_datasets.oai_format_dataset import (
+    PreservingDataset,
+)
+
+
+class TestPreservingDataset:
+    """Test suite for PreservingDataset class."""
+
+    def test_no_none_filling(self):
+        """Test that PreservingDataset doesn't add None values for missing keys."""
+        # Create data with heterogeneous structure
+        data = [
+            {"role": "user", "content": "Hello", "extra_key": "value1"},
+            {"role": "assistant", "content": "Hi"},  # Missing 'extra_key'
+            {"role": "user", "content": "How are you?", "another_key": "value2"},
+        ]
+
+        dataset = PreservingDataset(data)
+
+        # Check that missing keys are not filled with None
+        assert "extra_key" not in dataset[1]
+        assert "another_key" not in dataset[0]
+        assert "another_key" not in dataset[1]
+
+        # Verify original structure is preserved
+        assert dataset[0]["extra_key"] == "value1"
+        assert dataset[2]["another_key"] == "value2"
+
+    def test_indexing_operations(self):
+        """Test various indexing operations."""
+        data = [{"id": i, "value": f"item_{i}"} for i in range(5)]
+        dataset = PreservingDataset(data)
+
+        # Test integer indexing
+        assert dataset[0]["id"] == 0
+        assert dataset[2]["value"] == "item_2"
+
+        # Test negative indexing
+        assert dataset[-1]["id"] == 4
+        assert dataset[-2]["value"] == "item_3"
+
+        # Test slicing
+        sliced = dataset[1:3]
+        assert len(sliced) == 2
+        assert sliced[0]["id"] == 1
+        assert sliced[1]["id"] == 2
+
+        # Test list indexing
+        selected = dataset[[0, 2, 4]]
+        assert len(selected) == 3
+        assert selected[0]["id"] == 0
+        assert selected[1]["id"] == 2
+        assert selected[2]["id"] == 4
+
+        # Test out of range
+        with pytest.raises(IndexError):
+            _ = dataset[10]
+
+    def test_map_function(self):
+        """Test the map function preserves structure."""
+        data = [
+            {"id": 1, "value": 10},
+            {"id": 2, "value": 20, "extra": "data"},
+        ]
+        dataset = PreservingDataset(data)
+
+        # Map without indices
+        def double_value(item):
+            item = item.copy()
+            item["value"] *= 2
+            return item
+
+        mapped = dataset.map(double_value)
+        assert mapped[0]["value"] == 20
+        assert mapped[1]["value"] == 40
+        assert "extra" not in mapped[0]  # Still no extra key
+        assert mapped[1]["extra"] == "data"  # Extra key preserved
+
+        # Map with indices
+        def add_index(item, idx):
+            item = item.copy()
+            item["index"] = idx
+            return item
+
+        indexed = dataset.map(add_index, with_indices=True)
+        assert indexed[0]["index"] == 0
+        assert indexed[1]["index"] == 1
+
+    def test_iteration(self):
+        """Test iteration over dataset."""
+        data = [{"id": i} for i in range(3)]
+        dataset = PreservingDataset(data)
+
+        items = list(dataset)
+        assert len(items) == 3
+        for i, item in enumerate(dataset):
+            assert item["id"] == i
+
+    def test_length(self):
+        """Test len() operation."""
+        dataset = PreservingDataset([])
+        assert len(dataset) == 0
+
+        dataset = PreservingDataset([{"a": 1}, {"b": 2}])
+        assert len(dataset) == 2
+
+
+class TestOpenAIFormatDatasetWithHeterogeneousTools:
+    """Test OpenAIFormatDataset with heterogeneous tool calls."""
+
+    @pytest.fixture
+    def heterogeneous_data(self):
+        """Create test data with varying tool call structures."""
+        train_data = [
+            {
+                "messages": [
+                    {"role": "user", "content": "Check the workspace and write a file"},
+                    {
+                        "role": "assistant",
+                        "content": "Let me look at the workspace first",
+                        "tool_calls": [
+                            {
+                                "id": "call_1",
+                                "type": "function",
+                                "function": {
+                                    "name": "view_file",
+                                    "arguments": {
+                                        "path": "/workspace",
+                                        "line_start": 1,
+                                    },
+                                },
+                            }
+                        ],
+                    },
+                    {
+                        "role": "tool",
+                        "content": "workspace contents...",
+                        "tool_call_id": "call_1",
+                    },
+                    {
+                        "role": "assistant",
+                        "content": "Now writing the file",
+                        "tool_calls": [
+                            {
+                                "id": "call_2",
+                                "type": "function",
+                                "function": {
+                                    "name": "write_file",
+                                    # Different argument structure - has 'content' and 'mode' that view_file doesn't
+                                    "arguments": {
+                                        "path": "test.py",
+                                        "content": "print('hello')",
+                                        "mode": "w",
+                                    },
+                                },
+                            }
+                        ],
+                    },
+                ]
+            },
+            {
+                "messages": [
+                    {"role": "user", "content": "Search for something"},
+                    {
+                        "role": "assistant",
+                        "content": "Searching",
+                        "tool_calls": [
+                            {
+                                "id": "call_3",
+                                "type": "function",
+                                "function": {
+                                    "name": "search",
+                                    # Yet another different structure
+                                    "arguments": {
+                                        "query": "test",
+                                        "max_results": 10,
+                                        "filter": "*.py",
+                                    },
+                                },
+                            }
+                        ],
+                    },
+                ]
+            },
+        ]
+
+        val_data = [
+            {
+                "messages": [
+                    {"role": "user", "content": "Delete a file"},
+                    {
+                        "role": "assistant",
+                        "content": "Deleting",
+                        "tool_calls": [
+                            {
+                                "id": "call_4",
+                                "type": "function",
+                                "function": {
+                                    "name": "delete_file",
+                                    # Simple structure with just path
+                                    "arguments": {"path": "old.txt"},
+                                },
+                            }
+                        ],
+                    },
+                ]
+            }
+        ]
+
+        # Write to temporary files
+        with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
+            for item in train_data:
+                json.dump(item, f)
+                f.write("\n")
+            train_path = f.name
+
+        with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
+            for item in val_data:
+                json.dump(item, f)
+                f.write("\n")
+            val_path = f.name
+
+        return train_path, val_path, train_data, val_data
+
+    def test_preserves_tool_structure_without_none(self, heterogeneous_data):
+        """Test that heterogeneous tool calls are handled correctly.
+        Note: This test verifies the PreservingDataset behavior when it's triggered.
+        In this test case, the standard loading may succeed, so we test the class directly.
+        """
+        train_path, val_path, original_train, original_val = heterogeneous_data
+
+        # Test PreservingDataset directly to verify its behavior
+        from nemo_rl.data.datasets.response_datasets.oai_format_dataset import (
+            PreservingDataset,
+        )
+
+        # Simulate what happens in the exception handler
+        with open(train_path, "r") as f:
+            train_data = [json.loads(line) for line in f]
+
+        # Create PreservingDataset
+        preserving_dataset = PreservingDataset(train_data)
+
+        # Verify no None-filling occurs
+        sample_0 = preserving_dataset[0]
+        sample_1 = preserving_dataset[1]
+
+        # First sample has two assistant messages with different tool structures
+        # First assistant message - view_file
+        assert "tool_calls" in sample_0["messages"][1]
+        view_args = sample_0["messages"][1]["tool_calls"][0]["function"]["arguments"]
+        assert "path" in view_args
+        assert "line_start" in view_args
+        # These keys should NOT exist (not filled with None)
+        assert "content" not in view_args
+        assert "mode" not in view_args
+        assert "query" not in view_args
+
+        # Second assistant message in same sample - write_file
+        assert "tool_calls" in sample_0["messages"][3]
+        write_args = sample_0["messages"][3]["tool_calls"][0]["function"]["arguments"]
+        assert "path" in write_args
+        assert "content" in write_args
+        assert "mode" in write_args
+        # These keys should NOT exist
+        assert "line_start" not in write_args
+        assert "query" not in write_args
+
+        # Second sample - search with different structure
+        assert "tool_calls" in sample_1["messages"][1]
+        search_args = sample_1["messages"][1]["tool_calls"][0]["function"]["arguments"]
+        assert "query" in search_args
+        assert "max_results" in search_args
+        assert "filter" in search_args
+        # These keys should NOT exist
+        assert "path" not in search_args
+        assert "content" not in search_args
+
+    def test_comparison_with_standard_dataset(self):
+        """Compare behavior with standard HuggingFace Dataset to show the difference."""
+        # Data with heterogeneous structure
+        data = [
+            {"role": "user", "content": "Hello", "tool_id": "123"},
+            {"role": "assistant", "content": "Hi"},  # Missing tool_id
+        ]
+
+        # Standard HuggingFace Dataset adds None
+        hf_dataset = Dataset.from_list(data)
+        assert hf_dataset[0]["tool_id"] == "123"
+        assert hf_dataset[1]["tool_id"] is None  # HF adds None
+
+        # PreservingDataset doesn't add None
+        preserving_dataset = PreservingDataset(data)
+        assert preserving_dataset[0]["tool_id"] == "123"
+        assert "tool_id" not in preserving_dataset[1]  # Key doesn't exist
diff --git a/tests/unit/data/hf_datasets/test_prompt_response.py b/tests/unit/data/datasets/test_response_dataset.py
similarity index 54%
rename from tests/unit/data/hf_datasets/test_prompt_response.py
rename to tests/unit/data/datasets/test_response_dataset.py
index cbf18977a4..e8c935da4f 100644
--- a/tests/unit/data/hf_datasets/test_prompt_response.py
+++ b/tests/unit/data/datasets/test_response_dataset.py
@@ -18,10 +18,8 @@
 import pytest
 from transformers import AutoTokenizer
 
-from nemo_rl.data.hf_datasets.chat_templates import COMMON_CHAT_TEMPLATES
-from nemo_rl.data.hf_datasets.prompt_response_dataset import (
-    PromptResponseDataset,
-)
+from nemo_rl.data.chat_templates import COMMON_CHAT_TEMPLATES
+from nemo_rl.data.datasets import load_response_dataset
 
 
 @pytest.fixture
@@ -56,8 +54,14 @@ def sample_data(request):
 
 @pytest.mark.parametrize("sample_data", [("input", "output")], indirect=True)
 def test_dataset_initialization(sample_data):
+    # load the dataset
     train_path, val_path = sample_data
-    dataset = PromptResponseDataset(train_path, val_path)
+    data_config = {
+        "dataset_name": "ResponseDataset",
+        "train_data_path": train_path,
+        "val_data_path": val_path,
+    }
+    dataset = load_response_dataset(data_config)
 
     assert dataset.input_key == "input"
     assert dataset.output_key == "output"
@@ -67,10 +71,16 @@ def test_dataset_initialization(sample_data):
 
 @pytest.mark.parametrize("sample_data", [("question", "answer")], indirect=True)
 def test_custom_keys(sample_data):
+    # load the dataset
     train_path, val_path = sample_data
-    dataset = PromptResponseDataset(
-        train_path, val_path, input_key="question", output_key="answer"
-    )
+    data_config = {
+        "dataset_name": "ResponseDataset",
+        "train_data_path": train_path,
+        "val_data_path": val_path,
+        "input_key": "question",
+        "output_key": "answer",
+    }
+    dataset = load_response_dataset(data_config)
 
     assert dataset.input_key == "question"
     assert dataset.output_key == "answer"
@@ -79,10 +89,16 @@ def test_custom_keys(sample_data):
 @pytest.mark.hf_gated
 @pytest.mark.parametrize("sample_data", [("question", "answer")], indirect=True)
 def test_message_formatting(sample_data):
+    # load the dataset
     train_path, val_path = sample_data
-    dataset = PromptResponseDataset(
-        train_path, val_path, input_key="question", output_key="answer"
-    )
+    data_config = {
+        "dataset_name": "ResponseDataset",
+        "train_data_path": train_path,
+        "val_data_path": val_path,
+        "input_key": "question",
+        "output_key": "answer",
+    }
+    dataset = load_response_dataset(data_config)
 
     first_example = dataset.formatted_ds["train"][0]
 
@@ -105,3 +121,47 @@ def test_message_formatting(sample_data):
     assert combined_message == "".join(
         message["content"] for message in first_example["messages"]
     )
+
+
+@pytest.mark.hf_gated
+@pytest.mark.skip(reason="dataset download is flaky")
+def test_squad_dataset():
+    # load the dataset
+    data_config = {
+        "dataset_name": "squad",
+        "prompt_file": None,
+        "system_prompt_file": None,
+    }
+    squad_dataset = load_response_dataset(data_config)
+
+    # load the tokenizer
+    tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
+
+    # check that the dataset is formatted correctly
+    for example in squad_dataset.formatted_ds["train"].take(5):
+        assert "messages" in example
+        assert len(example["messages"]) == 3
+
+        assert example["messages"][0]["role"] == "system"
+        assert example["messages"][1]["role"] == "user"
+        assert example["messages"][2]["role"] == "assistant"
+
+        template = "{% for message in messages %}{%- if message['role'] == 'system'  %}{{'Context: ' + message['content'].strip()}}{%- elif message['role'] == 'user'  %}{{' Question: ' + message['content'].strip() + ' Answer:'}}{%- elif message['role'] == 'assistant'  %}{{' ' + message['content'].strip()}}{%- endif %}{% endfor %}"
+
+        ## check that applying chat template works as expected
+        default_templated = tokenizer.apply_chat_template(
+            example["messages"],
+            chat_template=template,
+            tokenize=False,
+            add_generation_prompt=False,
+            add_special_tokens=False,
+        )
+
+        assert default_templated == (
+            "Context: "
+            + example["messages"][0]["content"]
+            + " Question: "
+            + example["messages"][1]["content"]
+            + " Answer: "
+            + example["messages"][2]["content"]
+        )
diff --git a/tests/unit/data/datasets/test_tulu3.py b/tests/unit/data/datasets/test_tulu3.py
new file mode 100644
index 0000000000..2379bee646
--- /dev/null
+++ b/tests/unit/data/datasets/test_tulu3.py
@@ -0,0 +1,81 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import pytest
+
+from nemo_rl.data.datasets.preference_datasets import Tulu3PreferenceDataset
+from nemo_rl.data.datasets.preference_datasets.tulu3 import to_preference_data_format
+
+
+@pytest.fixture(scope="module")
+def tulu3_dataset():
+    try:
+        dataset = Tulu3PreferenceDataset()
+        yield dataset
+    except Exception as e:
+        print(f"Error during loading Tulu3PreferenceDataset: {e}")
+        yield
+
+
+def test_to_preference_data_format():
+    """Test the `to_preference_data_format()` function with different preference values."""
+    data = {
+        "prompt": "What is 2+2?",
+        "chosen": [
+            {"content": "What is 2+2?", "role": "user"},
+            {"role": "assistant", "content": "The answer is 4."},
+        ],
+        "rejected": [
+            {"content": "What is 2+2?", "role": "user"},
+            {"role": "assistant", "content": "I don't know."},
+        ],
+    }
+    result = to_preference_data_format(data)
+    assert result["context"] == [{"content": "What is 2+2?", "role": "user"}]
+    assert result["completions"] == [
+        {
+            "rank": 0,
+            "completion": [{"role": "assistant", "content": "The answer is 4."}],
+        },
+        {"rank": 1, "completion": [{"role": "assistant", "content": "I don't know."}]},
+    ]
+
+
+def test_tulu3_dataset_initialization(tulu3_dataset):
+    """Test that Tulu3PreferenceDataset initializes correctly."""
+
+    dataset = tulu3_dataset
+    if dataset is None:
+        pytest.skip("dataset download is flaky")
+
+    # Verify dataset initialization
+    assert dataset.task_spec.task_name == "Tulu3Preference"
+
+
+def test_tulu3_dataset_data_format(tulu3_dataset):
+    """Test that Tulu3PreferenceDataset correctly formats the data."""
+
+    dataset = tulu3_dataset
+    if dataset is None:
+        pytest.skip("dataset download is flaky")
+
+    assert isinstance(dataset.formatted_ds, dict)
+    assert "train" in dataset.formatted_ds
+
+    # Verify data format
+    sample = dataset.formatted_ds["train"][0]
+    assert "prompt" in sample
+    assert "chosen" in sample
+    assert "rejected" in sample
diff --git a/tests/unit/data/eval_datasets/test_gpqa.py b/tests/unit/data/eval_datasets/test_gpqa.py
deleted file mode 100644
index 3441f11974..0000000000
--- a/tests/unit/data/eval_datasets/test_gpqa.py
+++ /dev/null
@@ -1,42 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import pytest
-from transformers import AutoTokenizer
-
-from nemo_rl.data.eval_datasets.gpqa import GPQADataset
-
-
-@pytest.mark.skip(reason="dataset download is flaky")
-def test_gpqa_dataset():
-    tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-1.5B-Instruct")
-    gpqa_dataset = GPQADataset()
-
-    # check that the dataset is formatted correctly
-    for example in gpqa_dataset.rekeyed_ds.take(5):
-        assert "question" in example
-        assert "options" in example
-        assert "answer" in example
-
-        ## check that applying chat template works as expected
-        default_templated = tokenizer.apply_chat_template(
-            [{"role": "user", "content": example["question"]}],
-            tokenize=False,
-            add_generation_prompt=False,
-            add_special_tokens=False,
-        )
-
-        assert (
-            default_templated
-            == f"<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n<|im_start|>user\n{example['question']}<|im_end|>\n"
-        )
diff --git a/tests/unit/data/eval_datasets/test_math.py b/tests/unit/data/eval_datasets/test_math.py
deleted file mode 100644
index 3bab184f1a..0000000000
--- a/tests/unit/data/eval_datasets/test_math.py
+++ /dev/null
@@ -1,41 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import pytest
-from transformers import AutoTokenizer
-
-from nemo_rl.data.eval_datasets.math import MathDataset
-
-
-@pytest.mark.skip(reason="dataset download is flaky")
-def test_math_dataset():
-    tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-1.5B-Instruct")
-    math_dataset = MathDataset()
-
-    # check that the dataset is formatted correctly
-    for example in math_dataset.rekeyed_ds.take(5):
-        assert "problem" in example
-        assert "expected_answer" in example
-
-        ## check that applying chat template works as expected
-        default_templated = tokenizer.apply_chat_template(
-            [{"role": "user", "content": example["problem"]}],
-            tokenize=False,
-            add_generation_prompt=False,
-            add_special_tokens=False,
-        )
-
-        assert (
-            default_templated
-            == f"<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n<|im_start|>user\n{example['problem']}<|im_end|>\n"
-        )
diff --git a/tests/unit/data/eval_datasets/test_mmlu.py b/tests/unit/data/eval_datasets/test_mmlu.py
deleted file mode 100644
index 02c1936003..0000000000
--- a/tests/unit/data/eval_datasets/test_mmlu.py
+++ /dev/null
@@ -1,43 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import pytest
-from transformers import AutoTokenizer
-
-from nemo_rl.data.eval_datasets.mmlu import MMLUDataset
-
-
-@pytest.mark.skip(reason="dataset download is flaky")
-def test_mmlu_dataset():
-    tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-1.5B-Instruct")
-    mmlu_dataset = MMLUDataset()
-
-    # check that the dataset is formatted correctly
-    for example in mmlu_dataset.rekeyed_ds.take(5):
-        assert "question" in example
-        assert "options" in example
-        assert "answer" in example
-        assert "subject" in example
-
-        ## check that applying chat template works as expected
-        default_templated = tokenizer.apply_chat_template(
-            [{"role": "user", "content": example["question"]}],
-            tokenize=False,
-            add_generation_prompt=False,
-            add_special_tokens=False,
-        )
-
-        assert (
-            default_templated
-            == f"<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n<|im_start|>user\n{example['question']}<|im_end|>\n"
-        )
diff --git a/tests/unit/data/hf_datasets/test_dpo_dataset.py b/tests/unit/data/hf_datasets/test_dpo_dataset.py
deleted file mode 100644
index ed13df2c99..0000000000
--- a/tests/unit/data/hf_datasets/test_dpo_dataset.py
+++ /dev/null
@@ -1,104 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import json
-import os
-import tempfile
-
-import pytest
-
-from nemo_rl.data.hf_datasets.dpo import DPODataset
-
-
-@pytest.fixture
-def mock_dpo_data():
-    """Create temporary DPO dataset files with sample data."""
-    train_data = [
-        {
-            "prompt": "What is 2+2?",
-            "chosen_response": "The answer is 4.",
-            "rejected_response": "I don't know.",
-        },
-        {
-            "prompt": "What is the capital of France?",
-            "chosen_response": "The capital of France is Paris.",
-            "rejected_response": "The capital of France is London.",
-        },
-    ]
-
-    val_data = [
-        {
-            "prompt": "What is 3*3?",
-            "chosen_response": "The answer is 9.",
-            "rejected_response": "The answer is 6.",
-        }
-    ]
-
-    train_ctx = tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False)
-    val_ctx = tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False)
-
-    with tempfile.NamedTemporaryFile(
-        mode="w", suffix=".json", delete=False
-    ) as train_file:
-        json.dump(train_data, train_file)
-        train_path = train_file.name
-    with tempfile.NamedTemporaryFile(
-        mode="w", suffix=".json", delete=False
-    ) as val_file:
-        json.dump(val_data, val_file)
-        val_path = val_file.name
-    yield train_path, val_path
-    # Cleanup
-    os.unlink(train_path)
-    os.unlink(val_path)
-
-
-def test_dpo_dataset_initialization(mock_dpo_data):
-    """Test that DPODataset initializes correctly with valid data files."""
-    train_path, val_path = mock_dpo_data
-
-    dataset = DPODataset(train_data_path=train_path, val_data_path=val_path)
-
-    # Verify dataset initialization
-    assert dataset.task_spec.task_name == "DPO"
-
-    # Verify formatted_ds structure
-    assert "train" in dataset.formatted_ds
-    assert "validation" in dataset.formatted_ds
-
-    assert len(dataset.formatted_ds["train"]) == 2
-    assert len(dataset.formatted_ds["validation"]) == 1
-
-
-def test_dpo_dataset_invalid_files():
-    """Test that DPODataset raises appropriate errors with invalid files."""
-    with pytest.raises(FileNotFoundError):
-        DPODataset(train_data_path="nonexistent.json", val_data_path="nonexistent.json")
-
-
-def test_dpo_dataset_data_format(mock_dpo_data):
-    """Test that DPODataset correctly formats the data."""
-    train_path, val_path = mock_dpo_data
-    dataset = DPODataset(train_data_path=train_path, val_data_path=val_path)
-
-    # Verify data format
-    train_sample = dataset.formatted_ds["train"][0]
-    assert "prompt" in train_sample
-    assert "chosen_response" in train_sample
-    assert "rejected_response" in train_sample
-
-    # Verify data content
-    assert train_sample["prompt"] == "What is 2+2?"
-    assert train_sample["chosen_response"] == "The answer is 4."
-    assert train_sample["rejected_response"] == "I don't know."
diff --git a/tests/unit/data/hf_datasets/test_helpsteer.py b/tests/unit/data/hf_datasets/test_helpsteer.py
deleted file mode 100644
index 036ba75669..0000000000
--- a/tests/unit/data/hf_datasets/test_helpsteer.py
+++ /dev/null
@@ -1,101 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import pytest
-
-from nemo_rl.data.hf_datasets.helpsteer3 import (
-    HelpSteer3Dataset,
-    format_helpsteer3,
-)
-
-
-@pytest.fixture(scope="module")
-def helpsteer3_dataset():
-    try:
-        dataset = HelpSteer3Dataset()
-        yield dataset
-    except Exception as e:
-        print(f"Error during loading HelpSteer3Dataset: {e}")
-        yield
-
-
-def test_format_helpsteer3():
-    """Test the format_helpsteer3 function with different preference values."""
-    # Test case 1: response1 is preferred (overall_preference < 0)
-    data1 = {
-        "context": "What is 2+2?",
-        "response1": "The answer is 4.",
-        "response2": "I don't know.",
-        "overall_preference": -1,
-    }
-    result1 = format_helpsteer3(data1)
-    assert result1["prompt"] == "What is 2+2?"
-    assert result1["chosen_response"] == "The answer is 4."
-    assert result1["rejected_response"] == "I don't know."
-
-    # Test case 2: response2 is preferred (overall_preference > 0)
-    data2 = {
-        "context": "What is the capital of France?",
-        "response1": "The capital of France is London.",
-        "response2": "The capital of France is Paris.",
-        "overall_preference": 1,
-    }
-    result2 = format_helpsteer3(data2)
-    assert result2["prompt"] == "What is the capital of France?"
-    assert result2["chosen_response"] == "The capital of France is Paris."
-    assert result2["rejected_response"] == "The capital of France is London."
-
-    # Test case 3: no preference (overall_preference = 0)
-    data3 = {
-        "context": "What is the weather like?",
-        "response1": "It's sunny today.",
-        "response2": "The weather is sunny.",
-        "overall_preference": 0,
-    }
-    result3 = format_helpsteer3(data3)
-    assert result3["prompt"] == "What is the weather like?"
-    # When preference is 0, neither response is preferred, so
-    # response 1 is used for both chosen and rejected
-    assert result3["chosen_response"] == "It's sunny today."
-    assert result3["rejected_response"] == "It's sunny today."
-
-
-def test_helpsteer3_dataset_initialization(helpsteer3_dataset):
-    """Test that HelpSteer3Dataset initializes correctly."""
-
-    dataset = helpsteer3_dataset
-    if dataset is None:
-        pytest.skip("dataset download is flaky")
-
-    # Verify dataset initialization
-    assert dataset.task_spec.task_name == "HelpSteer3"
-
-
-def test_helpsteer3_dataset_data_format(helpsteer3_dataset):
-    """Test that HelpSteer3Dataset correctly formats the data."""
-
-    dataset = helpsteer3_dataset
-    if dataset is None:
-        pytest.skip("dataset download is flaky")
-
-    assert isinstance(dataset.formatted_ds, dict)
-    assert "train" in dataset.formatted_ds
-    assert "validation" in dataset.formatted_ds
-
-    # Verify data format
-    sample = dataset.formatted_ds["train"][0]
-    assert "prompt" in sample
-    assert "chosen_response" in sample
-    assert "rejected_response" in sample
diff --git a/tests/unit/data/hf_datasets/test_squad.py b/tests/unit/data/hf_datasets/test_squad.py
deleted file mode 100644
index f5e01b250a..0000000000
--- a/tests/unit/data/hf_datasets/test_squad.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import pytest
-from transformers import AutoTokenizer
-
-from nemo_rl.data.hf_datasets.squad import SquadDataset
-
-
-@pytest.mark.hf_gated
-@pytest.mark.skip(reason="dataset download is flaky")
-def test_squad_dataset():
-    tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
-    squad_dataset = SquadDataset()
-
-    # check that the dataset is formatted correctly
-    for example in squad_dataset.formatted_ds["train"].take(5):
-        assert "messages" in example
-        assert len(example["messages"]) == 3
-
-        assert example["messages"][0]["role"] == "system"
-        assert example["messages"][1]["role"] == "user"
-        assert example["messages"][2]["role"] == "assistant"
-
-        template = "{% for message in messages %}{%- if message['role'] == 'system'  %}{{'Context: ' + message['content'].strip()}}{%- elif message['role'] == 'user'  %}{{' Question: ' + message['content'].strip() + ' Answer:'}}{%- elif message['role'] == 'assistant'  %}{{' ' + message['content'].strip()}}{%- endif %}{% endfor %}"
-
-        ## check that applying chat template works as expected
-        default_templated = tokenizer.apply_chat_template(
-            example["messages"],
-            chat_template=template,
-            tokenize=False,
-            add_generation_prompt=False,
-            add_special_tokens=False,
-        )
-
-        assert default_templated == (
-            "Context: "
-            + example["messages"][0]["content"]
-            + " Question: "
-            + example["messages"][1]["content"]
-            + " Answer: "
-            + example["messages"][2]["content"]
-        )
diff --git a/tests/unit/data/test_datasets.py b/tests/unit/data/test_collate_fn.py
similarity index 93%
rename from tests/unit/data/test_datasets.py
rename to tests/unit/data/test_collate_fn.py
index d879b09a85..e531f4cb5e 100755
--- a/tests/unit/data/test_datasets.py
+++ b/tests/unit/data/test_collate_fn.py
@@ -16,13 +16,13 @@
 
 import torch
 
-from nemo_rl.data.datasets import dpo_collate_fn
+from nemo_rl.data.collate_fn import preference_collate_fn
 from nemo_rl.data.interfaces import DatumSpec
 from nemo_rl.distributed.batched_data_dict import BatchedDataDict
 
 
-def test_dpo_collate_fn():
-    """Test that dpo_collate_fn correctly processes DPO training data."""
+def test_preference_collate_fn():
+    """Test that preference_collate_fn correctly processes preference data."""
     # Create mock tokenizer
     mock_tokenizer = MagicMock()
     mock_tokenizer.pad_token_id = 0
@@ -93,9 +93,12 @@ def test_dpo_collate_fn():
         ),
     ]
 
-    # Call dpo_collate_fn
-    train_data = dpo_collate_fn(
-        data_batch, mock_tokenizer, make_sequence_length_divisible_by=16
+    # Call preference_collate_fn
+    train_data = preference_collate_fn(
+        data_batch,
+        mock_tokenizer,
+        make_sequence_length_divisible_by=16,
+        add_loss_mask=True,
     )
 
     # Verify the output structure
diff --git a/tests/unit/data/test_data_processor.py b/tests/unit/data/test_data_processor.py
index dc88bebee3..8da06bfa2e 100644
--- a/tests/unit/data/test_data_processor.py
+++ b/tests/unit/data/test_data_processor.py
@@ -14,7 +14,11 @@
 
 import os
 import sys
+import tempfile
+from collections import defaultdict
 
+import pytest
+import torch
 from datasets import Dataset
 
 abspath = os.path.abspath(__file__)
@@ -22,14 +26,43 @@
 
 from nemo_rl.algorithms.utils import get_tokenizer
 from nemo_rl.data.datasets import AllTaskProcessedDataset
-from nemo_rl.data.interfaces import TaskDataSpec
-from nemo_rl.data.processors import math_data_processor
+from nemo_rl.data.datasets.eval_datasets import (
+    AIMEDataset,
+    GPQADataset,
+    MathDataset,
+    MMLUDataset,
+)
+from nemo_rl.data.datasets.response_datasets import (
+    DeepScalerDataset,
+    OpenMathInstruct2Dataset,
+)
+from nemo_rl.data.interfaces import TaskDataProcessFnCallable, TaskDataSpec
+from nemo_rl.data.processors import math_data_processor, math_hf_data_processor
 from nemo_rl.models.policy import TokenizerConfig
 
-basic_tokenizer_test_config: TokenizerConfig = {
-    "name": "Qwen/Qwen2.5-Math-1.5B-Instruct",
-    "chat_template": "default",
-}
+
+class DummyTokenizer:
+    def apply_chat_template(
+        self,
+        messages,
+        tokenize=False,
+        add_generation_prompt=True,
+        add_special_tokens=False,
+    ):
+        content = "".join(
+            f"{m.get('role', 'user')}: {m['content']}\n" for m in messages
+        )
+        if add_generation_prompt:
+            content += "assistant:"
+        return content
+
+    def __call__(self, text, return_tensors=None, add_special_tokens=False):
+        if isinstance(text, list):
+            text = "".join(text)
+        encoded = list(range(len(text)))
+        if return_tensors == "pt":
+            return {"input_ids": torch.tensor([encoded], dtype=torch.long)}
+        return {"input_ids": encoded}
 
 
 def test_math_data_processor():
@@ -40,7 +73,12 @@ def test_math_data_processor():
         ]
     )
 
-    tokenizer = get_tokenizer(basic_tokenizer_test_config)
+    tokenizer = get_tokenizer(
+        TokenizerConfig(
+            name="Qwen/Qwen2.5-Math-1.5B-Instruct",
+            chat_template="default",
+        )
+    )
 
     math_task_spec = TaskDataSpec(
         task_name="math",
@@ -58,3 +96,158 @@ def test_math_data_processor():
 
     assert dataset[0]["extra_env_info"]["ground_truth"] == "answer1"
     assert dataset[1]["extra_env_info"]["ground_truth"] == "answer2"
+
+
+@pytest.mark.hf_gated
+@pytest.mark.parametrize(
+    "tokenizer_name",
+    [
+        "meta-llama/Llama-3.2-1B-Instruct",
+        "Qwen/Qwen2.5-1.5B-Instruct",  # no bos token
+        "google/gemma-3-1b-it",
+        "Qwen/Qwen3-0.6B",  # no bos token
+        "deepseek-ai/DeepSeek-V3",
+        "moonshotai/Moonlight-16B-A3B-Instruct",
+    ],
+)
+@pytest.mark.parametrize(
+    "dataset_cls",
+    [
+        OpenMathInstruct2Dataset,
+        DeepScalerDataset,
+    ],
+)
+def test_math_hf_data_processor(tokenizer_name, dataset_cls):
+    # Initialize dataset
+    data = dataset_cls()
+
+    # Setup tokenizer
+    tokenizer = get_tokenizer(
+        TokenizerConfig(
+            name=tokenizer_name,
+            chat_template="default",
+        )
+    )
+
+    # Configure task specification
+    math_task_spec = TaskDataSpec(
+        task_name="math",
+        prompt_file=f"{os.path.dirname(abspath)}/../../../examples/prompts/cot.txt",
+        system_prompt_file=None,
+    )
+
+    task_data_processors: dict[str, tuple[TaskDataSpec, TaskDataProcessFnCallable]] = (
+        defaultdict(lambda: (math_task_spec, math_hf_data_processor))
+    )
+    task_data_processors["math"] = (math_task_spec, math_hf_data_processor)
+
+    dataset = AllTaskProcessedDataset(
+        dataset=data.formatted_ds["train"],
+        tokenizer=tokenizer,
+        default_task_data_spec=math_task_spec,
+        task_data_processors=task_data_processors,
+        max_seq_length=128,
+    )
+
+    # Test that the first item can be retrieved when the BOS token assertion passes
+    first_item = dataset[0]
+    assert first_item is not None
+    assert "message_log" in first_item
+    assert len(first_item["message_log"]) > 0
+
+
+def test_math_hf_data_processor_without_prompt():
+    datum_dict = {
+        "messages": [
+            {"role": "user", "content": "Solve 1+1."},
+            {"role": "assistant", "content": "2"},
+        ],
+        "task_name": "math",
+    }
+    tokenizer = DummyTokenizer()
+
+    math_task_spec = TaskDataSpec(
+        task_name="math",
+        prompt_file=None,
+        system_prompt_file=None,
+    )
+
+    result = math_hf_data_processor(
+        datum_dict=datum_dict,
+        task_data_spec=math_task_spec,
+        tokenizer=tokenizer,
+        max_seq_length=128,
+        idx=0,
+    )
+
+    assert result["extra_env_info"]["ground_truth"] == "2"
+    assert result["loss_multiplier"] == 1.0
+    assert len(result["message_log"]) == 1
+    assert result["message_log"][0]["role"] == "user"
+    assert "Solve 1+1." in result["message_log"][0]["content"]
+
+
+@pytest.fixture
+def system_prompt_file(request):
+    with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as file:
+        file.write("You are a helpful assistant.\n{}")
+
+    return file.name
+
+
+@pytest.mark.hf_gated
+@pytest.mark.parametrize(
+    "tokenizer_name",
+    [
+        "meta-llama/Llama-3.2-1B-Instruct",
+        "Qwen/Qwen2.5-1.5B-Instruct",  # no bos token
+        "google/gemma-3-1b-it",
+        "Qwen/Qwen3-0.6B",  # no bos token
+        "deepseek-ai/DeepSeek-V3",
+        "moonshotai/Moonlight-16B-A3B-Instruct",
+    ],
+)
+@pytest.mark.parametrize(
+    "dataset_cls",
+    [
+        AIMEDataset,
+        GPQADataset,
+        MathDataset,
+        MMLUDataset,
+    ],
+)
+@pytest.mark.parametrize(
+    "system_prompt_file", [system_prompt_file, None], indirect=True
+)
+def test_eval_math_hf_data_processor(tokenizer_name, dataset_cls, system_prompt_file):
+    # Initialize dataset
+    data = dataset_cls()
+
+    # Setup tokenizer
+    tokenizer = get_tokenizer(
+        TokenizerConfig(
+            name=tokenizer_name,
+            chat_template="default",
+        )
+    )
+
+    # Configure task specification
+    math_task_spec = TaskDataSpec(
+        task_name="math",
+        prompt_file=f"{os.path.dirname(abspath)}/../../../examples/prompts/cot.txt",
+        system_prompt_file=system_prompt_file,
+    )
+
+    dataset = AllTaskProcessedDataset(
+        dataset=data.rekeyed_ds,
+        tokenizer=tokenizer,
+        default_task_data_spec=math_task_spec,
+        task_data_processors=data.processor,
+        max_seq_length=128,
+    )
+
+    # Test that the first item can be retrieved when the BOS token assertion passes
+    first_item = dataset[0]
+    assert first_item is not None
+    assert "message_log" in first_item
+    assert len(first_item["message_log"]) > 0
diff --git a/tests/unit/data/test_data_shuffle_reproducity.py b/tests/unit/data/test_data_shuffle_reproducity.py
new file mode 100644
index 0000000000..155a5b11e7
--- /dev/null
+++ b/tests/unit/data/test_data_shuffle_reproducity.py
@@ -0,0 +1,152 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import tempfile
+from collections import defaultdict
+
+import pytest
+import torch
+from torchdata.stateful_dataloader import StatefulDataLoader
+
+from nemo_rl.algorithms.utils import get_tokenizer, set_seed
+from nemo_rl.data.collate_fn import rl_collate_fn
+from nemo_rl.data.datasets import AllTaskProcessedDataset
+from nemo_rl.data.datasets.response_datasets import OpenMathInstruct2Dataset
+from nemo_rl.data.interfaces import TaskDataProcessFnCallable, TaskDataSpec
+from nemo_rl.data.processors import math_hf_data_processor
+from nemo_rl.models.policy import TokenizerConfig
+
+# Test configuration
+TOKENIZER_CONFIG: TokenizerConfig = {
+    "name": "Qwen/Qwen2.5-Math-1.5B-Instruct",
+    "chat_template": "default",
+}
+
+MAX_BATCHES_TO_TEST = 10
+
+
+def create_dataloader(
+    seed: int = 42, max_seq_length: int = 128, batch_size: int = 4
+) -> StatefulDataLoader:
+    """Create a dataloader with consistent configuration for testing."""
+    # Initialize dataset
+    data = OpenMathInstruct2Dataset(seed=seed)
+
+    # Setup tokenizer
+    tokenizer = get_tokenizer(TOKENIZER_CONFIG)
+
+    # Configure task specification
+    math_task_spec = TaskDataSpec(
+        task_name="math",
+        prompt_file=f"{os.path.dirname(os.path.abspath(__file__))}/../../../examples/prompts/cot.txt",
+        system_prompt_file=None,
+    )
+
+    task_data_processors: dict[str, tuple[TaskDataSpec, TaskDataProcessFnCallable]] = (
+        defaultdict(lambda: (math_task_spec, math_hf_data_processor))
+    )
+    task_data_processors["math"] = (math_task_spec, math_hf_data_processor)
+
+    dataset = AllTaskProcessedDataset(
+        dataset=data.formatted_ds["train"].select(range(1000)),
+        tokenizer=tokenizer,
+        default_task_data_spec=math_task_spec,
+        task_data_processors=task_data_processors,
+        max_seq_length=max_seq_length,
+    )
+
+    return StatefulDataLoader(
+        dataset,
+        batch_size=batch_size,
+        shuffle=True,
+        collate_fn=rl_collate_fn,
+        drop_last=True,
+    )
+
+
+@pytest.mark.parametrize("seed", [42, 24])
+def test_data_shuffle_reproducity_from_start(seed):
+    """Test that dataloader shuffling is reproducible with the same seed."""
+    # Step 1: Set seed and create initial dataloader
+    set_seed(seed)
+    original_dataloader = create_dataloader(seed=seed)
+
+    expected_batches = []
+    for batch in original_dataloader:
+        expected_batches.append(batch)
+        if len(expected_batches) >= MAX_BATCHES_TO_TEST:
+            break
+
+    # Step 2: to mimic a new experiment:
+    #    set original seed and create new dataloader under the same seed environment
+    set_seed(seed)
+    new_dataloader = create_dataloader(seed=seed)
+
+    for i, (expected_batch, actual_batch) in enumerate(
+        zip(expected_batches, new_dataloader)
+    ):
+        assert str(expected_batch) == str(actual_batch), f"Batch {i} is different"
+
+
+@pytest.mark.parametrize("save_state_at_batch", [6, 10])
+def test_data_shuffle_reproducity_from_continue(save_state_at_batch, seed=42):
+    """Test that dataloader state can be saved and restored for continuation."""
+    # Step 1: Set seed and create initial dataloader
+    set_seed(seed)
+    original_dataloader = create_dataloader(seed=seed)
+
+    with tempfile.TemporaryDirectory() as temp_dir:
+        expected_batches = []
+        for i, batch in enumerate(original_dataloader):
+            if (
+                i >= save_state_at_batch - 1
+            ):  # Stop after consuming exactly save_state_at_batch batches
+                if i == save_state_at_batch - 1:
+                    # Step 2: Save the state at this point
+                    state_file = os.path.join(temp_dir, "dataloader_state.pt")
+                    torch.save(original_dataloader.state_dict(), state_file)
+                else:
+                    # Step 3: Get the expected continuation from original dataloader
+                    expected_batches.append(batch)
+                    if len(expected_batches) >= MAX_BATCHES_TO_TEST:
+                        break
+
+        # step 4: to mimic a continued experiment:
+        #    set original seed and create new dataloader under the same seed environment
+        #    load the saved state and continue from the saved point
+        set_seed(seed)
+        continued_dataloader = create_dataloader(seed=seed)
+
+        state_dict = torch.load(state_file)
+        continued_dataloader.load_state_dict(state_dict)
+
+        # Step 5: Get batches from the continued dataloader
+        actual_batches = []
+        for batch in continued_dataloader:
+            if len(actual_batches) >= MAX_BATCHES_TO_TEST:
+                break
+            actual_batches.append(batch)
+
+        assert len(actual_batches) == len(expected_batches)
+
+        # Step 6: Compare the batches - they should be identical
+        for i, (actual_batch, expected_batch) in enumerate(
+            zip(actual_batches, expected_batches)
+        ):
+            assert str(actual_batch) == str(expected_batch), (
+                f"Batch {i} from continued dataloader doesn't match expected batch\n"
+                f"actual_batch['idx']:\t{actual_batch['idx']}\n"
+                f"expected_batch['idx']:\t{expected_batch['idx']}"
+            )
diff --git a/tests/unit/data/test_llm_message_utils.py b/tests/unit/data/test_llm_message_utils.py
index 91ae2e41b7..39b8fab49d 100644
--- a/tests/unit/data/test_llm_message_utils.py
+++ b/tests/unit/data/test_llm_message_utils.py
@@ -13,11 +13,15 @@
 # limitations under the License.
 
 
+from typing import Any, Callable
+
 import pytest
 import torch
-from transformers import AutoTokenizer
+from PIL import Image
+from transformers import AutoProcessor, AutoTokenizer
 
-from nemo_rl.data.hf_datasets import COMMON_CHAT_TEMPLATES
+from nemo_rl.algorithms.utils import get_tokenizer
+from nemo_rl.data.chat_templates import COMMON_CHAT_TEMPLATES
 from nemo_rl.data.interfaces import LLMMessageLogType, TaskDataSpec
 from nemo_rl.data.llm_message_utils import (
     _validate_tensor_consistency,
@@ -88,6 +92,52 @@ def raw_chat_message_log() -> list[LLMMessageLogType]:
     ]
 
 
+def qwen3_message_log(
+    model_name: str, enable_thinking: bool
+) -> tuple[list[LLMMessageLogType], list[str]]:
+    """Helper function for Qwen3 message logs."""
+    # input data for test
+    input_data = [
+        {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "user", "content": "Hello!"},
+        {
+            "role": "assistant",
+            "content": "<think>\noh wait.\n</think>\n\nHi there!"
+            if enable_thinking
+            else "Hi there!",
+        },
+    ]
+
+    # use a tokenizer directly from HuggingFace to prepare expected result
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    assert tokenizer.bos_token is None
+
+    # get expected result
+    ## result is equivalent to if we apply chat template to the full message log,
+    expected_text_string = tokenizer.apply_chat_template(
+        [input_data[:2]],
+        tokenize=False,
+        add_generation_prompt=True,
+        add_special_tokens=False,
+        enable_thinking=enable_thinking,
+    )[0]
+
+    delimiter = "<|im_end|>\n"
+    split_text = expected_text_string.split(delimiter, 1)
+    expected_text = []
+    for i in range(len(split_text)):
+        if i == len(split_text) - 1:
+            expected_text.append(split_text[i])
+        else:
+            expected_text.append(split_text[i] + delimiter)
+
+    ## separately handle the last message because of the generation prompt
+    formatted_assistant_message = input_data[2]["content"] + delimiter
+    expected_text.append(formatted_assistant_message)
+
+    return input_data, expected_text
+
+
 @pytest.fixture
 def tokenized_non_chat_message_log() -> list[LLMMessageLogType]:
     return [
@@ -328,170 +378,149 @@ def test_batch_pad_message_log_custom_pad_value(
     )
 
 
-@pytest.mark.hf_gated
-def test_get_formatted_message_log_llama(
-    raw_chat_message_log: LLMMessageLogType,
-) -> None:
-    tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
-
-    ## get expected result
-    formatted_system_message = tokenizer.apply_chat_template(
-        [raw_chat_message_log[0]],
-        tokenize=False,
-        add_generation_prompt=False,
-        add_special_tokens=False,
-    )
-    formatted_user_message = tokenizer.apply_chat_template(
-        [raw_chat_message_log[1]],
-        tokenize=False,
-        add_generation_prompt=False,
-        add_special_tokens=False,
-    )
-    formatted_assistant_message = tokenizer.apply_chat_template(
-        [raw_chat_message_log[2]],
-        tokenize=False,
-        add_generation_prompt=False,
-        add_special_tokens=False,
-    )
-
-    ## text should be equivalent to if we apply chat template
-    ## to each turn separately and manually remove the bot string
-    ## from the intermediate turns
-    bot_str = "<|begin_of_text|>"
-    expected_text = [
-        formatted_system_message,
-        formatted_user_message[len(bot_str) :],
-        formatted_assistant_message[len(bot_str) :],
-    ]
-
-    task_data_spec = TaskDataSpec(
-        task_name="test",
-    )
-    result = get_formatted_message_log(raw_chat_message_log, tokenizer, task_data_spec)
-    actual_text = [m["content"] for m in result]
-
-    assert actual_text == expected_text
-
-
-@pytest.mark.hf_gated
-def test_get_formatted_message_log_add_generation_prompt_llama(
+@pytest.mark.parametrize(
+    "model_id, chat_log_transform",
+    [
+        pytest.param(
+            "meta-llama/Meta-Llama-3-8B-Instruct",
+            lambda raw: raw,
+            marks=pytest.mark.hf_gated,
+            id="llama",
+        ),
+        pytest.param(
+            "google/gemma-3-27b-it",
+            # Some Gemma chat templates (or versions) raise on system turns.
+            # For portability across environments, test on user+assistant only.
+            # If your tokenizer supports system turns, you can change this to `lambda raw: raw`.
+            lambda raw: [raw[1], raw[2]],
+            marks=pytest.mark.hf_gated,
+            id="gemma",
+        ),
+        pytest.param(
+            "Qwen/Qwen2.5-Coder-32B-Instruct",
+            lambda raw: raw,
+            id="qwen",
+        ),
+    ],
+)
+@pytest.mark.parametrize("add_generation_prompt", [False, True])
+def test_get_formatted_message_log_models(
     raw_chat_message_log: LLMMessageLogType,
+    model_id: str,
+    chat_log_transform: Callable[[Any], Any],
+    add_generation_prompt: bool,
 ) -> None:
-    tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct")
-
-    ## get expected result
-    formatted_system_message = tokenizer.apply_chat_template(
-        [raw_chat_message_log[0]],
-        tokenize=False,
-        add_generation_prompt=False,
-        add_special_tokens=False,
-    )
-    formatted_user_message = tokenizer.apply_chat_template(
-        [raw_chat_message_log[1]],
-        tokenize=False,
-        add_generation_prompt=True,
-        add_special_tokens=False,
-    )
-    formatted_assistant_message = (
-        raw_chat_message_log[2]["content"] + tokenizer.eos_token
-    )
-
-    ## text should be equivalent to if we apply chat template
-    ## to each turn separately and manually remove the bot string
-    ## from the intermediate turns
-    bot_str = "<|begin_of_text|>"
-    expected_text = [
-        formatted_system_message,
-        formatted_user_message[len(bot_str) :],
-        formatted_assistant_message,
-    ]
-
-    task_data_spec = TaskDataSpec(
-        task_name="test",
-    )
+    """Validate that get_formatted_message_log produces text consistent with the
+    tokenizer's chat template across models.
+
+    This test is parametrized over model/tokenizer and whether to include a
+    generation prompt. For models like Gemma that error on system turns, the
+    input chat log is transformed to exclude the system message.
+
+    Expectations:
+    - Require an EOS token for well-defined end-of-turn comparison.
+    - When add_generation_prompt is False, the concatenated contents must match
+      the tokenizer's apply_chat_template output; if the tokenizer omits a final
+      EOS, accept the actual with EOS by appending EOS to the expected before
+      comparison.
+    - When add_generation_prompt is True and the last turn is an assistant
+      message, accept either:
+        (1) prefix built with add_generation_prompt=True followed by the raw
+            assistant content plus EOS; or
+        (2) the tokenizer's full non-generation template output plus EOS.
+      This avoids hard-coding model-specific headers or delimiters while still
+      verifying semantic equivalence.
+    - Only normalization performed is trimming a trailing newline after EOS.
+    """
+    tokenizer = AutoTokenizer.from_pretrained(model_id)
+    chat_log = chat_log_transform(raw_chat_message_log)
+    # Ensure tokenizer defines an EOS token; otherwise the test logic is ill-defined
+    assert tokenizer.eos_token, "Tokenizer must define eos_token for this test"
+    eos = tokenizer.eos_token
+    task_data_spec = TaskDataSpec(task_name="test")
     result = get_formatted_message_log(
-        raw_chat_message_log,
+        chat_log,
         tokenizer,
         task_data_spec,
-        add_generation_prompt=True,
+        add_generation_prompt=add_generation_prompt,
     )
-    actual_text = [m["content"] for m in result]
-
-    assert actual_text == expected_text
-
-
-def test_get_formatted_message_log_qwen(
-    raw_chat_message_log: LLMMessageLogType,
-) -> None:
-    ## test using a tokenizer that does not have a bos token
-    tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-Coder-32B-Instruct")
-    assert tokenizer.bos_token is None
-
-    ## get expected result
-    ## result is equivalent to if we apply chat template to the full message log,
-    ## remove the trailing newline, and then partition by the delimiter
-    expected_text_string = tokenizer.apply_chat_template(
-        [raw_chat_message_log],
-        tokenize=False,
-        add_generation_prompt=False,
-        add_special_tokens=False,
-    )[0].rstrip("\n")  ## remove trailing newline
-
-    delimiter = "<|im_end|>\n"
-    split_text = expected_text_string.split(delimiter)
-    expected_text = []
-    for i in range(len(split_text)):
-        if i == len(raw_chat_message_log) - 1:
-            expected_text.append(split_text[i])
+    actual_concat = "".join(m["content"] for m in result)
+
+    def normalize(s: str) -> str:
+        # Normalize EOS+newline quirk to EOS only
+        if s.endswith(eos + "\n"):
+            return s[:-1]
+        return s
+
+    if not add_generation_prompt:
+        expected_concat = tokenizer.apply_chat_template(
+            [chat_log],
+            tokenize=False,
+            add_generation_prompt=False,
+            add_special_tokens=False,
+        )[0]
+        # Accept EOS presence even if the tokenizer's template omits it
+        if actual_concat.endswith(eos) and not expected_concat.endswith(eos):
+            expected_concat = expected_concat + eos
+        assert normalize(actual_concat) == normalize(expected_concat)
+    else:
+        if len(chat_log) > 0 and chat_log[-1].get("role") == "assistant":
+            prefix_log = chat_log[:-1]
+            # Some tokenizers include a role header when add_generation_prompt=True.
+            # Accept either behavior without hard-coding model-specific strings.
+            prefix_gen = tokenizer.apply_chat_template(
+                [prefix_log],
+                tokenize=False,
+                add_generation_prompt=True,
+                add_special_tokens=False,
+            )[0]
+            assistant_suffix = chat_log[-1]["content"] + eos
+            expected_concat_a = prefix_gen + assistant_suffix
+            # Alternative: take the full non-generation template output and just append EOS
+            full_no_gen = tokenizer.apply_chat_template(
+                [chat_log],
+                tokenize=False,
+                add_generation_prompt=False,
+                add_special_tokens=False,
+            )[0]
+            expected_concat_b = full_no_gen + eos
+            actual_norm = normalize(actual_concat)
+            assert actual_norm == normalize(
+                expected_concat_a
+            ) or actual_norm == normalize(expected_concat_b)
         else:
-            expected_text.append(split_text[i] + delimiter)
-
-    task_data_spec = TaskDataSpec(
-        task_name="test",
-    )
-    result = get_formatted_message_log(raw_chat_message_log, tokenizer, task_data_spec)
-    actual_text = [m["content"] for m in result]
-
-    assert actual_text == expected_text
-
-
-def test_get_formatted_message_log_add_generation_prompt_qwen(
-    raw_chat_message_log: LLMMessageLogType,
+            expected_concat = tokenizer.apply_chat_template(
+                [chat_log],
+                tokenize=False,
+                add_generation_prompt=True,
+                add_special_tokens=False,
+            )[0]
+            assert normalize(actual_concat) == normalize(expected_concat)
+
+
+@pytest.mark.parametrize("enable_thinking", [True, False])
+def test_get_formatted_message_log_qwen3_enable_thinking(
+    enable_thinking,
 ) -> None:
-    ## test using a tokenizer that does not have a bos token
-    tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-Coder-32B-Instruct")
-    assert tokenizer.bos_token is None
-
-    ## get expected result
-    ## result is equivalent to if we apply chat template to the full message log,
-    ## remove the trailing newline, and then partition by the delimiter
-    ## Separately handle the last message because of the generation prompt
-    expected_text_string = tokenizer.apply_chat_template(
-        [raw_chat_message_log[:2]],
-        tokenize=False,
-        add_generation_prompt=True,
-        add_special_tokens=False,
-    )[0]
+    model_name = "Qwen/Qwen3-8B"
 
-    delimiter = "<|im_end|>\n"
-    split_text = expected_text_string.split(delimiter, 1)
-    expected_text = []
-    for i in range(len(split_text)):
-        if i == len(split_text) - 1:
-            expected_text.append(split_text[i])
-        else:
-            expected_text.append(split_text[i] + delimiter)
+    # setup test data
+    input_data, expected_text = qwen3_message_log(model_name, enable_thinking)
 
-    formatted_assistant_message = (
-        raw_chat_message_log[2]["content"] + tokenizer.eos_token
-    )
-    expected_text.append(formatted_assistant_message)
+    # setup tokenizer
+    tokenizer_config = {
+        "name": model_name,
+        "chat_template": "default",
+        "chat_template_kwargs": {"enable_thinking": enable_thinking},
+    }
+    tokenizer = get_tokenizer(tokenizer_config)
 
+    # get actual result
     task_data_spec = TaskDataSpec(
         task_name="test",
     )
     result = get_formatted_message_log(
-        raw_chat_message_log,
+        input_data,
         tokenizer,
         task_data_spec,
         add_generation_prompt=True,
@@ -591,3 +620,175 @@ def test_get_first_index_that_differs():
     assert get_first_index_that_differs("hello world", "hello") == 5
     assert get_first_index_that_differs("hi1", "hello2") == 1
     assert get_first_index_that_differs("hello2", "hi1") == 1
+
+
+def test_message_log_to_flat_messages_with_packed_images() -> None:
+    from nemo_rl.data.multimodal_utils import PackedTensor
+
+    # two turns, each with an image tensor wrapped in PackedTensor
+    img1 = torch.randn(2, 3, 8, 8)
+    img2 = torch.randn(3, 3, 8, 8)
+    message_log: LLMMessageLogType = [
+        {
+            "role": "user",
+            "content": "see image",
+            "token_ids": torch.tensor([1, 2]),
+            "images": PackedTensor(img1, dim_to_pack=0),
+        },
+        {
+            "role": "assistant",
+            "content": "ok",
+            "token_ids": torch.tensor([3]),
+            "images": PackedTensor(img2, dim_to_pack=0),
+        },
+    ]
+    flat = message_log_to_flat_messages(message_log)
+    assert isinstance(flat["images"], PackedTensor)
+    assert tuple(flat["images"].as_tensor().shape) == (5, 3, 8, 8)
+    assert torch.equal(flat["token_ids"], torch.tensor([1, 2, 3]))
+
+
+def test_batched_message_log_to_flat_message_with_packed_images() -> None:
+    from nemo_rl.data.multimodal_utils import PackedTensor
+
+    img_a = torch.randn(1, 3, 4, 4)
+    img_b = torch.randn(2, 3, 4, 4)
+    img_c = torch.randn(1, 3, 4, 4)
+
+    batch_logs = [
+        [
+            {
+                "role": "user",
+                "content": "prompt a",
+                "token_ids": torch.tensor([1, 2, 3]),
+                "images": PackedTensor(img_a, dim_to_pack=0),
+            },
+            {"role": "assistant", "content": "resp", "token_ids": torch.tensor([4])},
+        ],
+        [
+            {
+                "role": "user",
+                "content": "prompt b",
+                "token_ids": torch.tensor([5, 6]),
+                "images": PackedTensor(img_b, dim_to_pack=0),
+            },
+            {
+                "role": "assistant",
+                "content": "resp2",
+                "token_ids": torch.tensor([7, 8]),
+            },
+            {
+                "role": "user",
+                "content": "again",
+                "token_ids": torch.tensor([9]),
+                "images": PackedTensor(img_c, dim_to_pack=0),
+            },
+        ],
+    ]
+
+    batched, input_lengths = batched_message_log_to_flat_message(
+        batch_logs, pad_value_dict={"token_ids": 0}
+    )
+    assert isinstance(batched["images"], PackedTensor)
+    # flattened_concat keeps two packed tensors (one per convo)
+    assert len(batched["images"]) == 2
+    # total packed along dim 0 = 1 + (2 + 1) = 4
+    assert tuple(batched["images"].as_tensor().shape) == (4, 3, 4, 4)
+    assert torch.equal(input_lengths, torch.tensor([4, 5], dtype=torch.int32))
+
+
+@pytest.mark.hf_gated
+def test_get_formatted_message_log_multimodal_prompt_formatting() -> None:
+    processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-3B-Instruct")
+    task_data_spec = TaskDataSpec(task_name="t")
+    task_data_spec.prompt = "Question: {} Answer:"
+
+    # one user turn with text+image, then assistant
+    image = Image.new("RGB", (16, 16), color=(0, 0, 0))
+    message_log: LLMMessageLogType = [
+        {
+            "role": "system",
+            "content": "",  # to prevent Qwen's default system prompt taking over
+        },
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": "a cat?"},
+                {"type": "image", "image": image},
+            ],
+        },
+        {"role": "assistant", "content": "okay"},
+    ]
+
+    out = get_formatted_message_log(
+        message_log, processor, task_data_spec, add_bos_token=False, add_eos_token=False
+    )
+    # First message text should be formatted by prompt
+    assert isinstance(out[1]["content"], list)
+    assert any(
+        item["type"] == "text"
+        and item["text"].startswith("<|im_start|>user\nQuestion: ")
+        for item in out[1]["content"]
+    )  # type: ignore[index]
+    # pixel_values should be added as PackedTensor for the first message
+    from nemo_rl.data.multimodal_utils import PackedTensor
+
+    assert isinstance(out[1]["pixel_values"], PackedTensor)
+    assert isinstance(out[1]["image_grid_thw"], PackedTensor)
+    pv = out[1]["pixel_values"].as_tensor()
+    grid_thw = out[1]["image_grid_thw"].as_tensor()
+    assert pv.ndim == 2 and pv.shape[1] == 1176
+    assert grid_thw.ndim == 2 and grid_thw.shape == torch.Size([1, 3])
+    # token_ids should be non-empty tensors
+    assert (
+        isinstance(out[1]["token_ids"], torch.Tensor)
+        and out[1]["token_ids"].numel() > 0
+    )
+    assert (
+        isinstance(out[2]["token_ids"], torch.Tensor)
+        and out[2]["token_ids"].numel() > 0
+    )
+
+    #### Case 2 : without system prompt
+    image = Image.new("RGB", (16, 16), color=(0, 0, 0))
+    message_log: LLMMessageLogType = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": "a cat?"},
+                {"type": "image", "image": image},
+            ],
+        },
+        {"role": "assistant", "content": "okay"},
+    ]
+
+    out = get_formatted_message_log(
+        message_log, processor, task_data_spec, add_bos_token=False, add_eos_token=False
+    )
+    # First message text should be formatted by prompt
+    assert isinstance(out[0]["content"], list)
+    assert any(
+        item["type"] == "text"
+        and item["text"].startswith(
+            "<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\nQuestion: "
+        )
+        for item in out[0]["content"]
+    )  # type: ignore[index]
+    # pixel_values should be added as PackedTensor for the first message
+    from nemo_rl.data.multimodal_utils import PackedTensor
+
+    assert isinstance(out[0]["pixel_values"], PackedTensor)
+    assert isinstance(out[0]["image_grid_thw"], PackedTensor)
+    pv = out[0]["pixel_values"].as_tensor()
+    grid_thw = out[0]["image_grid_thw"].as_tensor()
+    assert pv.ndim == 2 and pv.shape[1] == 1176
+    assert grid_thw.ndim == 2 and grid_thw.shape == torch.Size([1, 3])
+    # token_ids should be non-empty tensors
+    assert (
+        isinstance(out[0]["token_ids"], torch.Tensor)
+        and out[0]["token_ids"].numel() > 0
+    )
+    assert (
+        isinstance(out[1]["token_ids"], torch.Tensor)
+        and out[1]["token_ids"].numel() > 0
+    )
diff --git a/tests/unit/data/test_multimodal_dict.py b/tests/unit/data/test_multimodal_dict.py
new file mode 100644
index 0000000000..a94412222a
--- /dev/null
+++ b/tests/unit/data/test_multimodal_dict.py
@@ -0,0 +1,351 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import pytest
+import torch
+
+from nemo_rl.data.llm_message_utils import batched_message_log_to_flat_message
+from nemo_rl.data.multimodal_utils import (
+    PackedTensor,
+)
+from nemo_rl.distributed.batched_data_dict import (
+    BatchedDataDict,
+    DynamicBatchingArgs,
+    SequencePackingArgs,
+)
+
+
+def test_packed_data_basic():
+    """Test basic functionality of PackedTensor."""
+    # Create sample packed items
+    tensor1 = torch.randn(16, 3)
+    tensor2 = torch.randn(45, 3)
+
+    item1 = PackedTensor(tensor1, dim_to_pack=0)
+    item2 = PackedTensor(tensor2, dim_to_pack=0)
+
+    # Test item functionality
+    assert torch.equal(item1.as_tensor(), tensor1)
+    assert item1.dim_to_pack == 0
+
+    # Test batch creation and concatenation
+    batch = PackedTensor([item1.as_tensor(), item2.as_tensor()], dim_to_pack=0)
+    assert len(batch) == 2
+
+    # Test as_tensor
+    expected_tensor = torch.cat([tensor1, tensor2], dim=0)
+    assert torch.equal(batch.as_tensor(), expected_tensor)
+
+
+def test_shard_by_batch_size_with_packed_data():
+    """Test shard_by_batch_size with packed multimodal data."""
+    # Create sample data
+    text_tensor = torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]])
+    image_tensors = [torch.randn(3 * i + 2, 3, 128, 128) for i in range(4)]
+
+    # Create packed image data
+    packed_batch = PackedTensor(image_tensors, dim_to_pack=0)
+
+    # Create BatchedDataDict
+    batch = BatchedDataDict(
+        {
+            "text_ids": text_tensor,
+            "image_features": packed_batch,
+            "labels": [1, 2, 3, 4],
+        }
+    )
+
+    # Test sharding
+    shards = batch.shard_by_batch_size(shards=2)
+    assert len(shards) == 2
+
+    # Verify first shard
+    assert torch.equal(shards[0]["text_ids"], torch.tensor([[1, 2, 3], [4, 5, 6]]))
+    assert isinstance(shards[0]["image_features"], PackedTensor)
+    assert len(shards[0]["image_features"]) == 2
+    assert shards[0]["image_features"].as_tensor().shape == (2 + 5, 3, 128, 128)
+    assert shards[0]["labels"] == [1, 2]
+
+    # Verify second shard
+    assert torch.equal(shards[1]["text_ids"], torch.tensor([[7, 8, 9], [10, 11, 12]]))
+    assert isinstance(shards[1]["image_features"], PackedTensor)
+    assert len(shards[1]["image_features"]) == 2
+    assert shards[1]["image_features"].as_tensor().shape == (8 + 11, 3, 128, 128)
+    assert shards[1]["labels"] == [3, 4]
+
+
+def test_truncate_tensors_with_packed_data():
+    """Test truncate_tensors with packed multimodal data."""
+    # Create sample data
+    text_tensor = torch.tensor([[1, 2, 3, 4], [5, 6, 7, 8]])
+    image_tensors = [
+        torch.randn(5, 3, 128, 4, 2, 2) for i in range(2)
+    ]  # also check a different dim_to_pack
+
+    # Create packed image data
+    packed_batch = PackedTensor(image_tensors, dim_to_pack=1)
+
+    # Create BatchedDataDict
+    batch = BatchedDataDict({"text_ids": text_tensor, "image_features": packed_batch})
+
+    # Test truncation
+    batch.truncate_tensors(dim=1, truncated_len=2)
+
+    # Verify text was truncated
+    assert torch.equal(batch["text_ids"], torch.tensor([[1, 2], [5, 6]]))
+    # Verify image features were not affected (assumed safe as per comment in truncate_tensors)
+    assert isinstance(batch["image_features"], PackedTensor)
+    assert batch["image_features"].as_tensor().shape == (5, 6, 128, 4, 2, 2)
+
+
+def test_multiturn_rollout_with_packed_data():
+    """Test multiturn conversations with packed multimodal data."""
+    message_log_1 = [
+        {
+            "role": "user",
+            "token_ids": torch.tensor([1, 2, 3, 4, 5, 6, 7, 8]),
+            "images": PackedTensor(torch.randn(3, 128, 128), dim_to_pack=0),
+        },
+        {
+            "role": "assistant",
+            "token_ids": torch.tensor([9, 10, 11, 12, 13, 14, 15, 16]),
+        },
+        {
+            "role": "user",
+            "token_ids": torch.tensor([17, 18, 19, 20, 21, 22, 23, 24]),
+            "images": PackedTensor(torch.randn(3, 128, 128), dim_to_pack=0),
+        },
+    ]
+    message_log_2 = [
+        {
+            "role": "user",
+            "token_ids": torch.tensor([1, 2, 3, 4, 5, 6, 7, 8]),
+            "images": PackedTensor(torch.randn(3, 128, 128), dim_to_pack=0),
+        },
+        {
+            "role": "assistant",
+            "token_ids": torch.tensor([9, 10, 11, 12, 13, 14, 15, 16]),
+        },
+        {
+            "role": "user",
+            "token_ids": torch.tensor([17, 18, 19, 20, 21, 22, 23, 24]),
+        },
+    ]
+    # data spec
+    message_logs = BatchedDataDict(
+        {
+            "message_log": [message_log_1, message_log_2],
+        }
+    )
+    flat_message, input_lengths = batched_message_log_to_flat_message(
+        message_logs["message_log"],
+        pad_value_dict={
+            "token_ids": -1,
+        },
+    )
+    shards = flat_message.shard_by_batch_size(shards=2)
+    assert len(shards) == 2
+    assert tuple(shards[0]["images"].as_tensor().shape) == (6, 128, 128)
+    assert tuple(shards[1]["images"].as_tensor().shape) == (3, 128, 128)
+
+
+def test_sequence_packing_with_packed_data():
+    """Test sequence packing with packed multimodal data."""
+    # Create sample data
+    text_tensor = torch.tensor(
+        [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]]
+    )
+    image_tensors = [torch.randn(2**i, 1176) for i in range(4)]
+
+    # Create packed image data
+    packed_batch = PackedTensor(image_tensors, dim_to_pack=0)
+
+    # Create BatchedDataDict
+    batch = BatchedDataDict(
+        {
+            "text_ids": text_tensor,
+            "image_features": packed_batch,
+            "sequence_lengths": torch.tensor([2, 3, 2, 4]),
+        }
+    )
+
+    sequence_packing_args = SequencePackingArgs(
+        max_tokens_per_microbatch=6,
+        input_key="text_ids",
+        input_lengths_key="sequence_lengths",
+        algorithm="modified_first_fit_decreasing",
+        sequence_length_pad_multiple=1,
+    )
+
+    # Test sequence packing
+    sharded_batches, sorted_indices = batch.shard_by_batch_size(
+        shards=2, sequence_packing_args=sequence_packing_args
+    )
+
+    # Verify basic structure
+    assert len(sharded_batches) == 2
+    assert len(sorted_indices) == 4
+
+    print("sequence packing sorted indices", sorted_indices)
+
+    # Verify each shard has the necessary attributes
+    for shard in sharded_batches:
+        assert hasattr(shard, "micro_batch_indices")
+        assert hasattr(shard, "micro_batch_lengths")
+        assert isinstance(shard["image_features"], PackedTensor)
+
+
+def test_dynamic_batching_with_packed_data():
+    """Test dynamic batching with packed multimodal data."""
+    # Create sample data
+    text_tensor = torch.tensor(
+        [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]]
+    )
+    image_tensors = [torch.randn(2**i, 1176) for i in range(4)]
+
+    # Create packed image data
+    packed_batch = PackedTensor(image_tensors, dim_to_pack=0)
+
+    # Create BatchedDataDict
+    batch = BatchedDataDict(
+        {
+            "text_ids": text_tensor,
+            "image_features": packed_batch,
+            "sequence_lengths": torch.tensor([2, 3, 2, 4]),
+        }
+    )
+
+    dynamic_batching_args: DynamicBatchingArgs = {
+        "input_key": "text_ids",
+        "input_lengths_key": "sequence_lengths",
+        "sequence_length_round": 2,
+        "max_tokens_per_microbatch": 6,
+    }
+
+    # Test dynamic batching
+    sharded_batches, sorted_indices = batch.shard_by_batch_size(
+        shards=2, dynamic_batching_args=dynamic_batching_args
+    )
+
+    print("dynamic batching sorted indices", sorted_indices)
+
+    # Verify basic structure
+    assert len(sharded_batches) == 2
+    assert len(sorted_indices) == 4
+
+    # Verify each shard has the necessary attributes
+    for shard in sharded_batches:
+        assert hasattr(shard, "micro_batch_indices")
+        assert hasattr(shard, "micro_batch_lengths")
+        assert isinstance(shard["image_features"], PackedTensor)
+
+
+def test_multimodal_specific_functionality():
+    """Test functionality specific to multimodal data handling. (length, device movement, as_tensor)"""
+    # Create sample data
+    text_tensor = torch.tensor([[1, 2, 3], [4, 5, 6]])
+    image_tensor = torch.tensor([[[1.0, 2.0]], [[3.0, 4.0]]])
+
+    # Test PackedTensorItem
+    mm_data = PackedTensor(image_tensor, dim_to_pack=0)
+    assert isinstance(mm_data, PackedTensor)
+    assert torch.equal(mm_data.as_tensor(), image_tensor)
+    assert len(mm_data) == 1
+
+    # Test device movement
+    if torch.cuda.is_available():
+        mm_data = mm_data.to("cuda")
+        assert mm_data.tensors[0].device.type == "cuda"
+
+    # images differ along a different dimension
+    image_tensors = [torch.randn(3, 128, 128 + i) for i in range(2)]
+
+    mm_batch = PackedTensor(image_tensors, dim_to_pack=0)
+    with pytest.raises(RuntimeError):
+        batch_tensor = mm_batch.as_tensor()
+
+    # check for packing on correct dimension
+    image_tensors = [torch.randn(3 + 10**i, 128, 128) for i in range(2)]
+    mm_batch = PackedTensor(image_tensors, dim_to_pack=0)
+    mm_tensor = mm_batch.as_tensor()
+
+    expected_dim = sum([3 + 10**i for i in range(2)])
+    assert mm_tensor.shape == (expected_dim, 128, 128)
+
+
+def test_get_multimodal_dict():
+    """Test the get_multimodal_dict functionality."""
+    # Create sample data
+    text_tensor = torch.tensor([[1, 2, 3], [4, 5, 6]])
+    image_tensor = torch.tensor([[[1.0, 2.0]], [[3.0, 4.0]]])
+    token_type_ids = torch.tensor([[1, 1, 1], [1, 1, 1]])
+
+    # Create packed image data
+    packed_image = PackedTensor(image_tensor, dim_to_pack=0)
+
+    # Create BatchedDataDict
+    batch = BatchedDataDict(
+        {
+            "text_ids": text_tensor,
+            "image_features": packed_image,
+            "token_type_ids": token_type_ids,  # Special key that should be included
+        }
+    )
+
+    # Test getting multimodal dict as tensors
+    mm_dict = batch.get_multimodal_dict(as_tensors=True)
+    assert "image_features" in mm_dict
+    assert "token_type_ids" in mm_dict
+    assert torch.is_tensor(mm_dict["image_features"])
+    assert torch.is_tensor(mm_dict["token_type_ids"])
+    assert "text_ids" not in mm_dict  # Regular tensors should not be included
+
+    # Test getting multimodal dict as packed items
+    mm_dict = batch.get_multimodal_dict(as_tensors=False)
+    assert "image_features" in mm_dict
+    assert "token_type_ids" in mm_dict
+    assert isinstance(mm_dict["image_features"], PackedTensor)
+    assert torch.is_tensor(mm_dict["token_type_ids"])
+
+
+def test_packedtensor_all_none():
+    pt = PackedTensor([None, None], dim_to_pack=0)
+    assert pt.as_tensor() is None
+
+
+def test_packedtensor_with_none_entry():
+    original = PackedTensor([torch.randn(2, 3), None], dim_to_pack=0)
+    empty = PackedTensor.empty_like(original)
+    # same logical length
+    assert len(empty) == len(original)
+    # all entries are None, thus as_tensor returns None
+    assert empty.as_tensor() is None
+
+
+def test_packedtensor_to_with_none_entry():
+    t = torch.randn(1, 2)
+    pt = PackedTensor([None, t], dim_to_pack=0)
+    pt = pt.to("cpu")
+    assert pt.tensors[0] is None
+    assert isinstance(pt.tensors[1], torch.Tensor)
+    assert pt.tensors[1].device.type == "cpu"
+
+
+def test_packedtensor_as_tensor_with_mixed_none_and_tensors():
+    t1 = torch.randn(2, 3)
+    t2 = None
+    t3 = torch.randn(4, 3)
+    pt = PackedTensor([t1, t2, t3], dim_to_pack=0)
+    out = pt.as_tensor()
+    expected = torch.cat([t1, t3], dim=0)
+    assert torch.equal(out, expected)
diff --git a/tests/unit/distributed/test_batched_data_dict.py b/tests/unit/distributed/test_batched_data_dict.py
index 539f2fab2b..9c982c1a11 100644
--- a/tests/unit/distributed/test_batched_data_dict.py
+++ b/tests/unit/distributed/test_batched_data_dict.py
@@ -14,6 +14,7 @@
 import pytest
 import torch
 
+from nemo_rl.data.multimodal_utils import PackedTensor
 from nemo_rl.distributed.batched_data_dict import (
     BatchedDataDict,
     DynamicBatchingArgs,
@@ -469,6 +470,152 @@ def test_sequence_packing_with_dynamic_batching_conflict():
         )
 
 
+def test_shard_by_batch_size_with_packed_multimodal():
+    """Sharding should slice PackedTensor items correctly and preserve types."""
+    text = torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]])
+    images = [
+        torch.randn(2, 3, 8, 8),
+        torch.randn(3, 3, 8, 8),
+        torch.randn(1, 3, 8, 8),
+        torch.randn(5, 3, 8, 8),
+    ]
+    packed = PackedTensor(images, dim_to_pack=0)
+    batch = BatchedDataDict(
+        {
+            "input_ids": text,
+            "pixel_values": packed,
+            "labels": [0, 1, 2, 3],
+        }
+    )
+
+    shards = batch.shard_by_batch_size(shards=2)
+    assert len(shards) == 2
+    # First shard should contain first two items
+    assert torch.equal(shards[0]["input_ids"], torch.tensor([[1, 2, 3], [4, 5, 6]]))
+    assert isinstance(shards[0]["pixel_values"], PackedTensor)
+    assert len(shards[0]["pixel_values"]) == 2
+    assert shards[0]["labels"] == [0, 1]
+    # Packed lengths along dim 0: 2 + 3
+    assert tuple(shards[0]["pixel_values"].as_tensor().shape) == (5, 3, 8, 8)
+    # Second shard should contain last two items
+    assert torch.equal(shards[1]["input_ids"], torch.tensor([[7, 8, 9], [10, 11, 12]]))
+    assert isinstance(shards[1]["pixel_values"], PackedTensor)
+    assert len(shards[1]["pixel_values"]) == 2
+    assert shards[1]["labels"] == [2, 3]
+    # Packed lengths along dim 0: 1 + 5
+    assert tuple(shards[1]["pixel_values"].as_tensor().shape) == (6, 3, 8, 8)
+
+
+def test_get_multimodal_dict_mixed_content_and_device_move():
+    """get_multimodal_dict should include PackedTensor and optional keys, and support device movement."""
+    images = [torch.randn(2, 3, 8, 8), torch.randn(1, 3, 8, 8)]
+    packed = PackedTensor(images, dim_to_pack=0)
+    token_type_ids = torch.ones(2, 4, dtype=torch.long)
+    regular = torch.arange(2)
+
+    batch = BatchedDataDict(
+        {
+            "pixel_values": packed,
+            "token_type_ids": token_type_ids,
+            "regular_tensor": regular,
+            "labels": [0, 1],
+        }
+    )
+
+    # as tensors
+    mm_dict_t = batch.get_multimodal_dict(as_tensors=True)
+    assert set(mm_dict_t.keys()) == {"pixel_values", "token_type_ids"}
+    assert (
+        torch.is_tensor(mm_dict_t["pixel_values"])
+        and mm_dict_t["pixel_values"].shape[0] == 3
+    )
+    assert torch.is_tensor(mm_dict_t["token_type_ids"]) and tuple(
+        mm_dict_t["token_type_ids"].shape
+    ) == (2, 4)
+
+    # as packed
+    mm_dict_p = batch.get_multimodal_dict(as_tensors=False)
+    assert isinstance(mm_dict_p["pixel_values"], PackedTensor)
+
+    # move device
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    moved = BatchedDataDict({"pixel_values": packed}).to(device)
+    mm_after_move = moved.get_multimodal_dict(as_tensors=True)
+    assert torch.is_tensor(mm_after_move["pixel_values"]) and mm_after_move[
+        "pixel_values"
+    ].device.type == ("cuda" if torch.cuda.is_available() else "cpu")
+
+
+def test_from_batches_pads_3d_tensors_along_sequence_dim():
+    """from_batches should pad 3D tensors along the sequence dimension before stacking."""
+
+    pad_value = -5.0
+    batch1 = BatchedDataDict(
+        {
+            "teacher_logits": torch.tensor(
+                [
+                    [[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]],
+                    [[7.0, 8.0], [9.0, 10.0], [11.0, 12.0]],
+                ],
+                dtype=torch.float32,
+            )
+        }
+    )
+    batch2 = BatchedDataDict(
+        {
+            "teacher_logits": torch.tensor(
+                [
+                    [
+                        [13.0, 14.0],
+                        [15.0, 16.0],
+                        [17.0, 18.0],
+                        [19.0, 20.0],
+                        [21.0, 22.0],
+                    ],
+                    [
+                        [23.0, 24.0],
+                        [25.0, 26.0],
+                        [27.0, 28.0],
+                        [29.0, 30.0],
+                        [31.0, 32.0],
+                    ],
+                ],
+                dtype=torch.float32,
+            )
+        }
+    )
+
+    stacked = BatchedDataDict.from_batches(
+        [batch1, batch2], pad_value_dict={"teacher_logits": pad_value}
+    )
+
+    stacked_logits = stacked["teacher_logits"]
+    assert stacked_logits.shape == (4, 5, 2)
+
+    expected_batch1 = torch.tensor(
+        [
+            [
+                [1.0, 2.0],
+                [3.0, 4.0],
+                [5.0, 6.0],
+                [pad_value, pad_value],
+                [pad_value, pad_value],
+            ],
+            [
+                [7.0, 8.0],
+                [9.0, 10.0],
+                [11.0, 12.0],
+                [pad_value, pad_value],
+                [pad_value, pad_value],
+            ],
+        ],
+        dtype=torch.float32,
+    )
+    expected = torch.cat([expected_batch1, batch2["teacher_logits"]], dim=0)
+
+    assert torch.equal(stacked_logits, expected)
+
+
 @pytest.mark.parametrize("pad_to_multiple_of", [1, 32, 64, 256])
 def test_sequence_packing_microbatch_boundaries(pad_to_multiple_of):
     """Test that microbatch boundaries are correctly maintained across chunks with random sequences."""
diff --git a/tests/unit/distributed/test_model_utils.py b/tests/unit/distributed/test_model_utils.py
index 2f8ef2011a..8637ad22fe 100644
--- a/tests/unit/distributed/test_model_utils.py
+++ b/tests/unit/distributed/test_model_utils.py
@@ -18,6 +18,8 @@
 import torch
 
 from nemo_rl.distributed.model_utils import (
+    ChunkedDistributedGatherLogprob,
+    ChunkedDistributedLogprob,
     DistributedLogprob,
     _compute_distributed_log_softmax,
     _get_tokens_on_this_cp_rank,
@@ -426,10 +428,177 @@ def test_allgather_cp_sharded_tensor(register_allgather_cp_test_actor, cp_size):
         cluster.shutdown()
 
 
+@ray.remote(num_gpus=1)
+class ChunkedGatherLogprobTestActor:
+    def __init__(self, tp_size, chunk_size, inference_only, sharding):
+        self.tp_size = tp_size
+        self.chunk_size = chunk_size
+        self.inference_only = inference_only
+        self.sharding = sharding
+        self.env_vars = dict(os.environ)
+
+    def test_chunked_gather_logprob(self):
+        torch.distributed.init_process_group(backend="nccl")
+
+        rank = int(os.environ["RANK"])
+        # TP-only: world_size == tp_size when cp_size == 1
+        tp_rank = rank
+        tp_group = torch.distributed.new_group(ranks=list(range(self.tp_size)))
+
+        batch_size = 2
+        seq_len = 16
+        vocab_size = 256
+        gather_k = 3
+
+        torch.manual_seed(1337)
+        full_logits = torch.randn(batch_size, seq_len, vocab_size, device="cuda")
+        global_indices = torch.randint(
+            low=0, high=vocab_size, size=(batch_size, seq_len, gather_k), device="cuda"
+        )
+
+        vocab_part_size = vocab_size // self.tp_size
+        vocab_start_index = tp_rank * vocab_part_size
+        vocab_end_index = (tp_rank + 1) * vocab_part_size
+
+        baseline_logits = (
+            full_logits.clone().detach().requires_grad_(not self.inference_only)
+        )
+        baseline_log_probs = torch.nn.functional.log_softmax(baseline_logits, dim=-1)
+        baseline_selected = torch.gather(
+            baseline_log_probs, dim=-1, index=global_indices
+        )
+
+        if not self.inference_only:
+            torch.gather(
+                baseline_log_probs, dim=-1, index=global_indices
+            ).sum().backward()
+            baseline_grad = baseline_logits.grad[
+                :, :, vocab_start_index:vocab_end_index
+            ]
+
+        local_logits = full_logits[:, :, vocab_start_index:vocab_end_index]
+        local_logits = (
+            local_logits.clone().detach().requires_grad_(not self.inference_only)
+        )
+
+        gathered = ChunkedDistributedGatherLogprob.apply(
+            local_logits,
+            global_indices,
+            vocab_start_index,
+            vocab_end_index,
+            self.chunk_size,
+            tp_group,
+            self.inference_only,
+        )
+
+        torch.testing.assert_close(gathered, baseline_selected, rtol=1e-4, atol=1e-4)
+
+        forward_diff = torch.max(torch.abs(gathered - baseline_selected)).item()
+
+        if not self.inference_only:
+            gathered.sum().backward()
+            grad_local = local_logits.grad
+            torch.testing.assert_close(grad_local, baseline_grad, rtol=1e-4, atol=1e-4)
+            grad_diff = torch.max(torch.abs(grad_local - baseline_grad)).item()
+        else:
+            grad_diff = None
+
+        return {
+            "forward_max_diff": forward_diff,
+            "grad_max_diff": grad_diff,
+        }
+
+
+CHUNKED_GATHER_LOGPROB_TEST_ACTOR_FQN = (
+    f"{ChunkedGatherLogprobTestActor.__module__}.ChunkedGatherLogprobTestActor"
+)
+
+
+@pytest.fixture
+def register_chunked_gather_logprob_test_actor():
+    original_registry_value = ACTOR_ENVIRONMENT_REGISTRY.get(
+        CHUNKED_GATHER_LOGPROB_TEST_ACTOR_FQN
+    )
+    ACTOR_ENVIRONMENT_REGISTRY[CHUNKED_GATHER_LOGPROB_TEST_ACTOR_FQN] = (
+        PY_EXECUTABLES.SYSTEM
+    )
+
+    yield CHUNKED_GATHER_LOGPROB_TEST_ACTOR_FQN
+
+    if CHUNKED_GATHER_LOGPROB_TEST_ACTOR_FQN in ACTOR_ENVIRONMENT_REGISTRY:
+        if original_registry_value is None:
+            del ACTOR_ENVIRONMENT_REGISTRY[CHUNKED_GATHER_LOGPROB_TEST_ACTOR_FQN]
+        else:
+            ACTOR_ENVIRONMENT_REGISTRY[CHUNKED_GATHER_LOGPROB_TEST_ACTOR_FQN] = (
+                original_registry_value
+            )
+
+
+@pytest.mark.parametrize(
+    "tp_size, chunk_size, inference_only",
+    [
+        (1, 5, False),
+        (2, 4, False),
+        (1, 3, True),
+    ],
+)
+def test_chunked_distributed_gather_logprob(
+    register_chunked_gather_logprob_test_actor, tp_size, chunk_size, inference_only
+):
+    world_size = tp_size
+
+    if not torch.cuda.is_available() or torch.cuda.device_count() < world_size:
+        pytest.skip(
+            f"Not enough GPUs available. Need {world_size}, got {torch.cuda.device_count()}"
+        )
+
+    cluster = RayVirtualCluster(bundle_ct_per_node_list=[world_size], use_gpus=True)
+
+    try:
+        actor_fqn = register_chunked_gather_logprob_test_actor
+
+        sharding = NamedSharding(
+            layout=np.arange(world_size).reshape(tp_size), names=["tp"]
+        )
+        builder = RayWorkerBuilder(
+            actor_fqn, tp_size, chunk_size, inference_only, sharding
+        )
+
+        worker_group = RayWorkerGroup(
+            cluster=cluster,
+            remote_worker_builder=builder,
+            workers_per_node=None,
+            sharding_annotations=sharding,
+        )
+
+        futures = worker_group.run_all_workers_single_data(
+            "test_chunked_gather_logprob"
+        )
+        results = ray.get(futures)
+
+        for i, result in enumerate(results):
+            assert result["forward_max_diff"] < 1e-4, (
+                f"Worker {i} forward diff too large: {result['forward_max_diff']}"
+            )
+            if not inference_only:
+                assert (
+                    result["grad_max_diff"] is not None
+                    and result["grad_max_diff"] < 1e-4
+                ), f"Worker {i} grad diff too large: {result['grad_max_diff']}"
+            else:
+                assert result["grad_max_diff"] is None
+
+        worker_group.shutdown(force=True)
+
+    finally:
+        cluster.shutdown()
+
+
 @ray.remote(num_gpus=1)
 class DistributedLogprobTestActor:
-    def __init__(self, tp_size):
+    def __init__(self, tp_size, chunk_size):
         self.tp_size = tp_size
+        self.chunk_size = chunk_size
         self.env_vars = dict(os.environ)
         torch.distributed.init_process_group(backend="nccl")
         self.tp_group = torch.distributed.new_group(ranks=list(range(tp_size)))
@@ -455,6 +624,7 @@ def test_distributed_logprob_forward_and_backward(self):
         seq_len = 8
         full_vocab_size = 1024
         vocab_part_size = full_vocab_size // self.tp_size
+        chunk_size = self.chunk_size
 
         # Calculate vocab partition for this rank
         vocab_start_index = rank * vocab_part_size
@@ -490,14 +660,25 @@ def test_distributed_logprob_forward_and_backward(self):
         )
 
         # Compute using DistributedLogprob (forward only first)
-        distributed_log_probs_inference = DistributedLogprob.apply(
-            vocab_parallel_logits.clone().detach(),  # Clone to avoid affecting backward test
-            target,
-            vocab_start_index,
-            vocab_end_index,
-            self.tp_group,
-            True,  # inference_only=True for forward test
-        )
+        if chunk_size is not None:
+            distributed_log_probs_inference = ChunkedDistributedLogprob.apply(
+                vocab_parallel_logits.clone().detach(),  # Clone to avoid affecting backward test
+                target,
+                vocab_start_index,
+                vocab_end_index,
+                chunk_size,
+                self.tp_group,
+                True,  # inference_only=True for forward test
+            )
+        else:
+            distributed_log_probs_inference = DistributedLogprob.apply(
+                vocab_parallel_logits.clone().detach(),  # Clone to avoid affecting backward test
+                target,
+                vocab_start_index,
+                vocab_end_index,
+                self.tp_group,
+                True,  # inference_only=True for forward test
+            )
 
         # Compare forward results
         torch.testing.assert_close(
@@ -700,9 +881,17 @@ def register_distributed_logprob_test_actor():
             )
 
 
-@pytest.mark.parametrize("tp_size", [1, 2])
+@pytest.mark.parametrize(
+    "tp_size, chunk_size",
+    [
+        (1, None),
+        (2, None),
+        (1, 4),
+        (2, 4),
+    ],
+)
 def test_distributed_logprob_all_tests(
-    register_distributed_logprob_test_actor, tp_size
+    register_distributed_logprob_test_actor, tp_size, chunk_size
 ):
     """Test all DistributedLogprob functionality for a given TP size."""
     # Skip if not enough GPUs
@@ -718,7 +907,7 @@ def test_distributed_logprob_all_tests(
 
         # Create sharding for TP
         sharding = NamedSharding(layout=list(range(tp_size)), names=["tp"])
-        builder = RayWorkerBuilder(actor_fqn, tp_size)
+        builder = RayWorkerBuilder(actor_fqn, tp_size, chunk_size)
 
         worker_group = RayWorkerGroup(
             cluster=cluster,
@@ -728,7 +917,9 @@ def test_distributed_logprob_all_tests(
         )
 
         # Test 1: Combined Forward and Backward pass
-        print(f"\n=== Testing TP={tp_size}: Forward & Backward Pass ===")
+        print(
+            f"\n=== Testing TP={tp_size} ChunkSize={chunk_size}: Forward & Backward Pass ==="
+        )
         futures = worker_group.run_all_workers_single_data(
             "test_distributed_logprob_forward_and_backward"
         )
@@ -743,7 +934,7 @@ def test_distributed_logprob_all_tests(
                 )
 
         # Test 2: Log softmax function
-        print(f"\n=== Testing TP={tp_size}: Log Softmax ===")
+        print(f"\n=== Testing TP={tp_size} ChunkSize={chunk_size}: Log Softmax ===")
         futures = worker_group.run_all_workers_single_data(
             "test_distributed_log_softmax"
         )
@@ -756,7 +947,7 @@ def test_distributed_logprob_all_tests(
 
         # Test 3: Edge cases (only for TP=2)
         if tp_size == 2:
-            print(f"\n=== Testing TP={tp_size}: Edge Cases ===")
+            print(f"\n=== Testing TP={tp_size} ChunkSize={chunk_size}: Edge Cases ===")
             futures = worker_group.run_all_workers_single_data("test_edge_cases")
             results = ray.get(futures)
             print("Edge cases test completed successfully")
diff --git a/tests/unit/distributed/test_virtual_cluster.py b/tests/unit/distributed/test_virtual_cluster.py
index 405082b6b9..7695c09a38 100644
--- a/tests/unit/distributed/test_virtual_cluster.py
+++ b/tests/unit/distributed/test_virtual_cluster.py
@@ -214,7 +214,7 @@ def test_mcore_py_executable():
                     venv_python,
                     "-c",
                     # Importing nemo_rl must be first to ensure all of megatron is importable
-                    "import nemo_rl; print('nemo_rl is imported'); import transformer_engine.pytorch as te; print('te is imported'); import nemo.tron; print('nemo-tron is imported'); import megatron.core; print('megatron-core is imported'); import megatron.training; print('megatron-training is imported');",
+                    "import nemo_rl; print('nemo_rl is imported'); import transformer_engine.pytorch as te; print('te is imported'); import megatron.bridge; print('megatron-bridge is imported'); import megatron.core; print('megatron-core is imported'); import megatron.training; print('megatron-training is imported');",
                 ],
                 capture_output=True,
                 text=True,
@@ -226,6 +226,23 @@ def test_mcore_py_executable():
             )
             assert "nemo_rl is imported" in result.stdout
             assert "te is imported" in result.stdout
-            assert "nemo-tron is imported" in result.stdout
+            assert "megatron-bridge is imported" in result.stdout
             assert "megatron-core is imported" in result.stdout
             assert "megatron-training is imported" in result.stdout
+
+
+def test_create_sorted_bundle_indices_for_unified_pg():
+    """Test that sorted bundle indices are created for a unified placement group."""
+    cluster = RayVirtualCluster(bundle_ct_per_node_list=[2], use_gpus=True)
+    cluster._init_placement_groups(strategy=None, use_unified_pg=True)
+    assert cluster._sorted_bundle_indices is not None
+    assert len(cluster._sorted_bundle_indices) == 2
+    assert 0 in cluster._sorted_bundle_indices
+    assert 1 in cluster._sorted_bundle_indices
+
+
+def test_not_create_sorted_bundle_indices_for_per_node_pg():
+    """Test that sorted bundle indices are not created for a per-node placement group."""
+    cluster = RayVirtualCluster(bundle_ct_per_node_list=[2], use_gpus=True)
+    cluster._init_placement_groups(strategy=None, use_unified_pg=False)
+    assert cluster._sorted_bundle_indices is None
diff --git a/tests/unit/distributed/test_worker_groups.py b/tests/unit/distributed/test_worker_groups.py
index 57976a94d8..04daa4335f 100644
--- a/tests/unit/distributed/test_worker_groups.py
+++ b/tests/unit/distributed/test_worker_groups.py
@@ -104,6 +104,43 @@ def configure_worker(num_gpus, bundle_indices):
         return resources, env_vars_update, init_kwargs_update
 
 
+@ray.remote(
+    runtime_env={
+        "env_vars": {
+            "TEST_VAR_1": "ray_remote_runtime_value",
+            "TEST_VAR_3": "ray_remote_runtime_value",
+            "RAY_REMOTE_VAR": "ray_remote_only",
+        }
+    }
+)
+class PrecedenceActor:
+    """Actor with configure_worker method that sets environment variables."""
+
+    _default_options = {}
+
+    def __init__(self, *args, **kwargs):
+        self.init_args = args
+        self.init_kwargs = kwargs
+        self.env_vars = dict(os.environ)
+
+    def get_env_var(self, var_name):
+        return self.env_vars.get(var_name)
+
+    def get_all_env_vars(self):
+        return dict(self.env_vars)
+
+    @classmethod
+    def configure_worker(cls, num_gpus, bundle_indices=None):
+        return (
+            {"num_gpus": num_gpus},  # resources
+            {
+                "TEST_VAR_1": "configure_worker_value",
+                "WORKER_VAR": "worker_only",
+            },  # env_vars
+            {},  # init_kwargs
+        )
+
+
 MY_TEST_ACTOR_FQN = f"{MyTestActor.__module__}.MyTestActor"
 
 
@@ -138,7 +175,6 @@ def check_nsight_config(self):
 def register_test_actor(request):
     # Default to PY_EXECUTABLES.SYSTEM if no param is given
     py_exec_to_register = getattr(request, "param", PY_EXECUTABLES.SYSTEM)
-
     original_registry_value = ACTOR_ENVIRONMENT_REGISTRY.get(MY_TEST_ACTOR_FQN)
     ACTOR_ENVIRONMENT_REGISTRY[MY_TEST_ACTOR_FQN] = py_exec_to_register
 
@@ -152,6 +188,27 @@ def register_test_actor(request):
             ACTOR_ENVIRONMENT_REGISTRY[MY_TEST_ACTOR_FQN] = original_registry_value
 
 
+# Create fixtures for each actor class
+@pytest.fixture
+def register_precedence_actor(request):
+    """Fixture for ConfigureWorkerActor."""
+    # Default to PY_EXECUTABLES.SYSTEM if no param is given
+    py_exec_to_register = getattr(request, "param", PY_EXECUTABLES.SYSTEM)
+    actor_fqn = f"{PrecedenceActor.__module__}.PrecedenceActor"
+
+    original_registry_value = ACTOR_ENVIRONMENT_REGISTRY.get(actor_fqn)
+    ACTOR_ENVIRONMENT_REGISTRY[actor_fqn] = py_exec_to_register
+
+    yield actor_fqn  # Provide the FQN to the test
+
+    # Clean up: revert ACTOR_ENVIRONMENT_REGISTRY to its original state for this FQN
+    if actor_fqn in ACTOR_ENVIRONMENT_REGISTRY:  # Check if key still exists
+        if original_registry_value is None:
+            del ACTOR_ENVIRONMENT_REGISTRY[actor_fqn]
+        else:
+            ACTOR_ENVIRONMENT_REGISTRY[actor_fqn] = original_registry_value
+
+
 @pytest.fixture
 def virtual_cluster():
     # 1 node, 2 CPU bundles. use_gpus=False means num_gpus passed to workers will be 0.
@@ -1063,3 +1120,75 @@ def test_get_nsight_config_output_format():
         assert "env_vars" in combined_runtime_env_no_match
         assert "py_executable" in combined_runtime_env_no_match
         assert "nsight" not in combined_runtime_env_no_match
+
+
+# Environment Variable Precedence Test
+def test_environment_variable_precedence_full(
+    register_precedence_actor, virtual_cluster
+):
+    """Test that the order of precedence is as follows (from highest to lowest):
+    - configure_worker
+    - RayWorkerGroup
+    - system
+    - @ray.remote runtime_env
+    """
+    # Set up system environment variables
+    original_env = dict(os.environ)
+    os.environ["TEST_VAR_1"] = "system_value"
+    os.environ["TEST_VAR_2"] = "system_value"
+    os.environ["TEST_VAR_3"] = "system_value"
+    os.environ["TEST_VAR_4"] = "system_value"
+
+    try:
+        # Create a worker builder with configure_worker method
+        builder = RayWorkerBuilder(register_precedence_actor)
+
+        # Create RayWorkerGroup environment variables
+        env_vars = {
+            "TEST_VAR_1": "yaml_worker_group_value",
+            "TEST_VAR_2": "yaml_worker_group_value",
+        }
+
+        # Create worker group
+        worker_group = RayWorkerGroup(
+            cluster=virtual_cluster,
+            remote_worker_builder=builder,
+            workers_per_node=1,
+            ## passing env_vars here mimics passing env vars from yaml config
+            ## because lm_policy automicatically passes env vars to RayWorkerGroup
+            env_vars=env_vars,
+        )
+
+        assert len(worker_group.workers) == 1
+        worker = worker_group.workers[0]
+
+        # Verify configure_worker has highest precedence
+        assert (
+            ray.get(worker.get_env_var.remote("TEST_VAR_1")) == "configure_worker_value"
+        )  # configure_worker overrides all
+        assert (
+            ray.get(worker.get_env_var.remote("TEST_VAR_2"))
+            == "yaml_worker_group_value"
+        )  # RayWorkerGroup value preserved
+        assert (
+            ray.get(worker.get_env_var.remote("TEST_VAR_3")) == "system_value"
+        )  # system value takes precedence over worker env vars
+        assert (
+            ray.get(worker.get_env_var.remote("TEST_VAR_4")) == "system_value"
+        )  # system value preserved
+        assert (
+            ray.get(worker.get_env_var.remote("WORKER_VAR")) == "worker_only"
+        )  # configure_worker value set
+        assert (
+            ray.get(worker.get_env_var.remote("RAY_REMOTE_VAR")) == "ray_remote_only"
+        )  # ray.remote runtime_env value preserved
+
+        worker_group.shutdown(force=True)
+
+    finally:
+        # Restore original environment
+        for key in ["TEST_VAR_1", "TEST_VAR_2", "TEST_VAR_3", "TEST_VAR_4"]:
+            if key in original_env:
+                os.environ[key] = original_env[key]
+            else:
+                os.environ.pop(key, None)
diff --git a/tests/unit/environments/penguin_test_data/test_penguin_sanity.json b/tests/unit/environments/penguin_test_data/test_penguin_sanity.json
new file mode 100644
index 0000000000..4a15173e9c
--- /dev/null
+++ b/tests/unit/environments/penguin_test_data/test_penguin_sanity.json
@@ -0,0 +1 @@
+{"input": [{"id": 0, "responses_create_params": {"input": [{"role": "system", "content": "# Instructions\nYou are an extraction agent. You will be provided a user query and you need to use the tools provided to you to extract list of synonym values. You will be provided with a bunch of synonyms for each. For each term, please see if it's relevant to the user query and get the values for each synonym as appropriate. You must get and extract the values for every synonym that appears in this list. Please output synonym values in the order they appear in the available synonyms below.\n\n# Available synonyms\nThe term 'Win' has a synonym 'Outperform'.\nThe term 'Empty' has a synonym 'Desolate'.\nThe term 'Open' has a synonym 'Revealed'.\nThe term 'Dry' has a synonym 'Arid'.\nThe term 'Bad' has a synonym 'Wicked'.\nThe term 'Retreat' has a synonym 'Draw back'.\nThe term 'Empty' has a synonym 'Vacant'.\nThe term 'Thick' has a synonym 'Fat'.\nThe term 'Stormy' has a synonym 'Gale-force'.\nThe term 'Rough' has a synonym 'Coarse'.\nThe term 'Day' has a synonym 'Afternoon'.\nThe term 'Quiet' has a synonym 'Hushed'.\nThe term 'Day' has a synonym 'Sunrise'.\nThe term 'Closed' has a synonym 'Covered'.\nThe term 'Early' has a synonym 'Preliminary'.\nThe term 'Night' has a synonym 'Midnight'.\nThe term 'Light' has a synonym 'Clear'.\nThe term 'Wide' has a synonym 'Comprehensive'.\nThe term 'Ugly' has a synonym 'Grotesque'.\nThe term 'Insult' has a synonym 'Belittle'.\nThe term 'Far' has a synonym 'Remote'.\nThe term 'Up' has a synonym 'Higher'.\nThe term 'Stormy' has a synonym 'Tempestuous'.\nThe term 'Dead' has a synonym 'Deceased'.\nThe term 'Dim' has a synonym 'Faint'.\nThe term 'Thick' has a synonym 'Heavy'.\nThe term 'Failure' has a synonym 'Loss'.\nThe term 'Sad' has a synonym 'Depressed'.\nThe term 'Thin' has a synonym 'Slender'.\nThe term 'Dry' has a synonym 'Dehydrated'.\nThe term 'Dirty' has a synonym 'Muddy'.\nThe term 'Fast' has a synonym 'Brisk'.\nThe term 'Defeat' has a synonym 'Failure'.\nThe term 'Sharp' has a synonym 'Tapered'.\nThe term 'Sharp' has a synonym 'Piercing'.\nThe term 'Rough' has a synonym 'Grainy'.\nThe term 'Cowardly' has a synonym 'Craven'.\nThe term 'False' has a synonym 'Incorrect'.\nThe term 'Sad' has a synonym 'Unhappy'.\nThe term 'Brave' has a synonym 'Stouthearted'.\nThe term 'Cowardly' has a synonym 'Yellow'.\nThe term 'Thin' has a synonym 'Skinny'.\nThe term 'Outside' has a synonym 'External'.\nThe term 'Wet' has a synonym 'Soaked'.\nThe term 'Sad' has a synonym 'Heartbroken'.\nThe term 'Success' has a synonym 'Victory'.\nThe term 'Cowardly' has a synonym 'Spineless'.\nThe term 'Tight' has a synonym 'Compact'.\nThe term 'Strong' has a synonym 'Muscular'.\nThe term 'Difficult' has a synonym 'Demanding'.\nThe term 'Old' has a synonym 'Historic'.\nThe term 'Rich' has a synonym 'Opulent'.\nThe term 'Far' has a synonym 'Away'.\nThe term 'Easy' has a synonym 'Effortless'.\nThe term 'Short' has a synonym 'Little'.\nThe term 'Win' has a synonym 'Achieve'.\nThe term 'Compliment' has a synonym 'Extol'.\nThe term 'Advance' has a synonym 'Rise'.\nThe term 'Soft' has a synonym 'Tender'.\nThe term 'Narrow' has a synonym 'Restrictive'.\nThe term 'Dark' has a synonym 'Dusky'.\nThe term 'High' has a synonym 'Prominent'.\nThe term 'Calm' has a synonym 'Undisturbed'.\nThe term 'Closed' has a synonym 'Locked'.\nThe term 'Compulsory' has a synonym 'Statutory'.\nThe term 'Alive' has a synonym 'Awake'.\nThe term 'Weak' has a synonym 'Powerless'.\nThe term 'Difficult' has a synonym 'Grueling'.\nThe term 'Reject' has a synonym 'Rebuff'.\nThe term 'Slow' has a synonym 'Leisurely'.\nThe term 'Clean' has a synonym 'Unsoiled'.\nThe term 'Compulsory' has a synonym 'Obligatory'.\nThe term 'Short' has a synonym 'Diminutive'.\nThe term 'Night' has a synonym 'Nightfall'.\nThe term 'Near' has a synonym 'Proximate'.\nThe term 'Ugly' has a synonym 'Homely'.\nThe term 'Wrong' has a synonym 'Amiss'.\nThe term 'Bad' has a synonym 'Terrible'.\nThe term 'Visible' has a synonym 'Noticeable'.\nThe term 'Near' has a synonym 'In proximity'.\nThe term 'Cold' has a synonym 'Frosty'.\nThe term 'Wrong' has a synonym 'False'.\nThe term 'Soft' has a synonym 'Velvety'.\nThe term 'Day' has a synonym 'Bright'.\nThe term 'Young' has a synonym 'Budding'.\nThe term 'Smelly' has a synonym 'Rancid'.\nThe term 'Low' has a synonym 'Diminished'.\nThe term 'Small' has a synonym 'Microscopic'.\nThe term 'Calm' has a synonym 'Unruffled'.\nThe term 'Empty' has a synonym 'Void'.\nThe term 'Open' has a synonym 'Available'.\nThe term 'Far' has a synonym 'Removed'.\nThe term 'Young' has a synonym 'New'.\nThe term 'Ascend' has a synonym 'Mount'.\nThe term 'Ugly' has a synonym 'Hideous'.\nThe term 'Weak' has a synonym 'Frail'.\nThe term 'Wet' has a synonym 'Damp'.\nThe term 'Tall' has a synonym 'Sky-high'.\nThe term 'Down' has a synonym 'Depressed'.\nThe term 'Happy' has a synonym 'Cheerful'.\nThe term 'Alive' has a synonym 'Alert'.\nThe term 'Easy' has a synonym 'Light'.\nThe term 'Accept' has a synonym 'Receive'.\nThe term 'Advance' has a synonym 'Headway'.\nThe term 'Dim' has a synonym 'Dull'.\nThe term 'Tall' has a synonym 'Towering'.\nThe term 'Fragrant' has a synonym 'Balmy'.\nThe term 'Happy' has a synonym 'Pleased'.\nThe term 'Down' has a synonym 'Drop'.\nThe term 'Hard' has a synonym 'Rigid'.\nThe term 'Loud' has a synonym 'Noisy'.\nThe term 'Light' has a synonym 'Shiny'.\nThe term 'Early' has a synonym 'Prior'.\nThe term 'Hot' has a synonym 'Blazing'.\nThe term 'Light (weight)' has a synonym 'Slim'.\nThe term 'Accept' has a synonym 'Acknowledge'.\nThe term 'Quiet' has a synonym 'Peaceful'.\nThe term 'Outside' has a synonym 'Outdoors'.\nThe term 'Easy' has a synonym 'Painless'.\nThe term 'Success' has a synonym 'Conquest'.\nThe term 'Hard' has a synonym 'Solid'.\nThe term 'Failure' has a synonym 'Setback'.\nThe term 'Low' has a synonym 'Short'.\nThe term 'Late' has a synonym 'Overdue'.\nThe term 'Wet' has a synonym 'Waterlogged'.\nThe term 'Strong' has a synonym 'Forceful'.\nThe term 'Hot' has a synonym 'Warm'.\nThe term 'Dark' has a synonym 'Tenebrous'.\nThe term 'Light (weight)' has a synonym 'Flimsy'.\nThe term 'Smelly' has a synonym 'Pungent'.\nThe term 'Soft' has a synonym 'Mild'.\nThe term 'Early' has a synonym 'First'.\nThe term 'Dirty' has a synonym 'Squalid'.\nThe term 'Dead' has a synonym 'Lifeless'.\nThe term 'Bitter' has a synonym 'Astringent'.\nThe term 'False' has a synonym 'Fallacious'.\nThe term 'Defeat' has a synonym 'Collapse'.\nThe term 'Loud' has a synonym 'Blaring'.\nThe term 'Dull' has a synonym 'Dim'.\nThe term 'Stormy' has a synonym 'Wild'.\nThe term 'Narrow' has a synonym 'Compressed'.\nThe term 'Rich' has a synonym 'Flush'.\nThe term 'Invisible' has a synonym 'Obscured'.\nThe term 'Slow' has a synonym 'Dragging'.\nThe term 'Young' has a synonym 'Juvenile'.\nThe term 'Bitter' has a synonym 'Caustic'.\nThe term 'Old' has a synonym 'Elderly'.\nThe term 'Slow' has a synonym 'Sluggish'.\nThe term 'Ascend' has a synonym 'Go up'.\nThe term 'Down' has a synonym 'Sink'.\nThe term 'Descend' has a synonym 'Subside'.\nThe term 'Small' has a synonym 'Little'.\nThe term 'High' has a synonym 'Soaring'.\nThe term 'Up' has a synonym 'Climb'.\nThe term 'Calm' has a synonym 'Relaxed'.\nThe term 'Rich' has a synonym 'Well-off'.\nThe term 'Light (weight)' has a synonym 'Breezy'.\nThe term 'Wrong' has a synonym 'Inaccurate'.\nThe term 'Dirty' has a synonym 'Soiled'.\nThe term 'Late' has a synonym 'Unpunctual'.\nThe term 'Quiet' has a synonym 'Low'.\nThe term 'Descend' has a synonym 'Dismount'.\nThe term 'Compliment' has a synonym 'Praise'.\nThe term 'Open' has a synonym 'Unsealed'.\nThe term 'Dull' has a synonym 'Blunt'.\nThe term 'Small' has a synonym 'Minor'.\nThe term 'Retreat' has a synonym 'Escape'.\nThe term 'Fast' has a synonym 'Hasty'.\nThe term 'Invisible' has a synonym 'Secret'.\nThe term 'Success' has a synonym 'Achievement'.\nThe term 'Retreat' has a synonym 'Fall back'.\nThe term 'Cold' has a synonym 'Icy'.\nThe term 'Hard' has a synonym 'Stiff'.\nThe term 'Insult' has a synonym 'Deride'.\nThe term 'Night' has a synonym 'Nocturne'.\nThe term 'Tight' has a synonym 'Firm'.\nThe term 'Accept' has a synonym 'Consent'.\nThe term 'Victory' has a synonym 'Supremacy'.\nThe term 'Old' has a synonym 'Vintage'.\nThe term 'Dry' has a synonym 'Desiccated'.\nThe term 'Narrow' has a synonym 'Pinched'.\nThe term 'Clean' has a synonym 'Sterile'.\nThe term 'Visible' has a synonym 'Perceptible'.\nThe term 'Victory' has a synonym 'Win'.\nThe term 'Advance' has a synonym 'Step up'.\nThe term 'Sharp' has a synonym 'Edged'.\nThe term 'Wide' has a synonym 'Outspread'.\nThe term 'Low' has a synonym 'Flat'.\nThe term 'Closed' has a synonym 'Fastened'.\nThe term 'False' has a synonym 'Untrue'.\nThe term 'Brave' has a synonym 'Bold'.\nThe term 'Reject' has a synonym 'Refuse'.\nThe term 'Fragrant' has a synonym 'Perfumed'.\n\n# Example\nFor example, if the user query is \"I'm very warm\", the term you should focus on is \"hot\". According to the synonyms above, the term \"hot\" has the synonyms \"Blazing\" and \"Warm\", in that order. You need to get synonym values for \"Blazing\" and \"Warm\", let's say those are 5 and 6 respectively, and extract the result of those synonym values i.e. [5, 6] with 5 (blazing) first then 6 (warm) since that is the order they appear in the list of synonyms above."}, {"role": "user", "content": "What factors contribute to a region experiencing extremely high temperatures, and how do these factors interact? Please call the get_synonym_value tool on Blazing and Warm, and then extract_synonym_values on the results."}], "tools": [{"type": "function", "name": "get_synonym_value", "description": "Get the synonym value for a synonym.\nThis operation returns a value that conforms to the following JSON Schema: {\"properties\": {\"synonym_value\": {\"type\": \"integer\", \"title\": \"Synonym Value\", \"description\": \"The value for this synonym.\"}}, \"type\": \"object\", \"required\": [\"synonym_value\"]}\n", "parameters": {"properties": {"synonym": {"type": "string", "title": "Synonym", "description": "The synonym to get the value for."}}, "type": "object", "required": ["synonym"], "additionalProperties": false}, "strict": true}, {"type": "function", "name": "extract_synonym_values", "description": "Extract the synonym values you retrieved for the term that is relevant to the user query.\nThis operation returns a value that conforms to the following JSON Schema: {\"properties\": {\"success\": {\"type\": \"boolean\", \"title\": \"Success\", \"description\": \"Success.\"}}, \"type\": \"object\", \"required\": [\"success\"]}\n", "parameters": {"properties": {"synonym_values": {"items": {"type": "integer"}, "type": "array", "title": "Synonym Values", "description": "The synonym values corresponding to the term for the user query."}}, "type": "object", "required": ["synonym_values"], "additionalProperties": false}, "strict": true}], "parallel_tool_calls": false}, "expected_synonyms": ["Blazing", "Warm"], "expected_synonym_values": [711, 407], "minefield_label": "Hot", "minefield_label_value": 299, "agent_ref": {"type": "responses_api_agents", "name": "example_multi_step_simple_agent"}}, {"id": 1, "responses_create_params": {"input": [{"role": "system", "content": "# Instructions\nYou are an extraction agent. You will be provided a user query and you need to use the tools provided to you to extract list of synonym values. You will be provided with a bunch of synonyms for each. For each term, please see if it's relevant to the user query and get the values for each synonym as appropriate. You must get and extract the values for every synonym that appears in this list. Please output synonym values in the order they appear in the available synonyms below.\n\n# Available synonyms\nThe term 'Win' has a synonym 'Outperform'.\nThe term 'Empty' has a synonym 'Desolate'.\nThe term 'Open' has a synonym 'Revealed'.\nThe term 'Dry' has a synonym 'Arid'.\nThe term 'Bad' has a synonym 'Wicked'.\nThe term 'Retreat' has a synonym 'Draw back'.\nThe term 'Empty' has a synonym 'Vacant'.\nThe term 'Thick' has a synonym 'Fat'.\nThe term 'Stormy' has a synonym 'Gale-force'.\nThe term 'Rough' has a synonym 'Coarse'.\nThe term 'Day' has a synonym 'Afternoon'.\nThe term 'Quiet' has a synonym 'Hushed'.\nThe term 'Day' has a synonym 'Sunrise'.\nThe term 'Closed' has a synonym 'Covered'.\nThe term 'Early' has a synonym 'Preliminary'.\nThe term 'Night' has a synonym 'Midnight'.\nThe term 'Light' has a synonym 'Clear'.\nThe term 'Wide' has a synonym 'Comprehensive'.\nThe term 'Ugly' has a synonym 'Grotesque'.\nThe term 'Insult' has a synonym 'Belittle'.\nThe term 'Far' has a synonym 'Remote'.\nThe term 'Up' has a synonym 'Higher'.\nThe term 'Stormy' has a synonym 'Tempestuous'.\nThe term 'Dead' has a synonym 'Deceased'.\nThe term 'Dim' has a synonym 'Faint'.\nThe term 'Thick' has a synonym 'Heavy'.\nThe term 'Failure' has a synonym 'Loss'.\nThe term 'Sad' has a synonym 'Depressed'.\nThe term 'Thin' has a synonym 'Slender'.\nThe term 'Dry' has a synonym 'Dehydrated'.\nThe term 'Dirty' has a synonym 'Muddy'.\nThe term 'Fast' has a synonym 'Brisk'.\nThe term 'Defeat' has a synonym 'Failure'.\nThe term 'Sharp' has a synonym 'Tapered'.\nThe term 'Sharp' has a synonym 'Piercing'.\nThe term 'Rough' has a synonym 'Grainy'.\nThe term 'Cowardly' has a synonym 'Craven'.\nThe term 'False' has a synonym 'Incorrect'.\nThe term 'Sad' has a synonym 'Unhappy'.\nThe term 'Brave' has a synonym 'Stouthearted'.\nThe term 'Cowardly' has a synonym 'Yellow'.\nThe term 'Thin' has a synonym 'Skinny'.\nThe term 'Outside' has a synonym 'External'.\nThe term 'Wet' has a synonym 'Soaked'.\nThe term 'Sad' has a synonym 'Heartbroken'.\nThe term 'Success' has a synonym 'Victory'.\nThe term 'Cowardly' has a synonym 'Spineless'.\nThe term 'Tight' has a synonym 'Compact'.\nThe term 'Strong' has a synonym 'Muscular'.\nThe term 'Difficult' has a synonym 'Demanding'.\nThe term 'Old' has a synonym 'Historic'.\nThe term 'Rich' has a synonym 'Opulent'.\nThe term 'Far' has a synonym 'Away'.\nThe term 'Easy' has a synonym 'Effortless'.\nThe term 'Short' has a synonym 'Little'.\nThe term 'Win' has a synonym 'Achieve'.\nThe term 'Compliment' has a synonym 'Extol'.\nThe term 'Advance' has a synonym 'Rise'.\nThe term 'Soft' has a synonym 'Tender'.\nThe term 'Narrow' has a synonym 'Restrictive'.\nThe term 'Dark' has a synonym 'Dusky'.\nThe term 'High' has a synonym 'Prominent'.\nThe term 'Calm' has a synonym 'Undisturbed'.\nThe term 'Closed' has a synonym 'Locked'.\nThe term 'Compulsory' has a synonym 'Statutory'.\nThe term 'Alive' has a synonym 'Awake'.\nThe term 'Weak' has a synonym 'Powerless'.\nThe term 'Difficult' has a synonym 'Grueling'.\nThe term 'Reject' has a synonym 'Rebuff'.\nThe term 'Slow' has a synonym 'Leisurely'.\nThe term 'Clean' has a synonym 'Unsoiled'.\nThe term 'Compulsory' has a synonym 'Obligatory'.\nThe term 'Short' has a synonym 'Diminutive'.\nThe term 'Night' has a synonym 'Nightfall'.\nThe term 'Near' has a synonym 'Proximate'.\nThe term 'Ugly' has a synonym 'Homely'.\nThe term 'Wrong' has a synonym 'Amiss'.\nThe term 'Bad' has a synonym 'Terrible'.\nThe term 'Visible' has a synonym 'Noticeable'.\nThe term 'Near' has a synonym 'In proximity'.\nThe term 'Cold' has a synonym 'Frosty'.\nThe term 'Wrong' has a synonym 'False'.\nThe term 'Soft' has a synonym 'Velvety'.\nThe term 'Day' has a synonym 'Bright'.\nThe term 'Young' has a synonym 'Budding'.\nThe term 'Smelly' has a synonym 'Rancid'.\nThe term 'Low' has a synonym 'Diminished'.\nThe term 'Small' has a synonym 'Microscopic'.\nThe term 'Calm' has a synonym 'Unruffled'.\nThe term 'Empty' has a synonym 'Void'.\nThe term 'Open' has a synonym 'Available'.\nThe term 'Far' has a synonym 'Removed'.\nThe term 'Young' has a synonym 'New'.\nThe term 'Ascend' has a synonym 'Mount'.\nThe term 'Ugly' has a synonym 'Hideous'.\nThe term 'Weak' has a synonym 'Frail'.\nThe term 'Wet' has a synonym 'Damp'.\nThe term 'Tall' has a synonym 'Sky-high'.\nThe term 'Down' has a synonym 'Depressed'.\nThe term 'Happy' has a synonym 'Cheerful'.\nThe term 'Alive' has a synonym 'Alert'.\nThe term 'Easy' has a synonym 'Light'.\nThe term 'Accept' has a synonym 'Receive'.\nThe term 'Advance' has a synonym 'Headway'.\nThe term 'Dim' has a synonym 'Dull'.\nThe term 'Tall' has a synonym 'Towering'.\nThe term 'Fragrant' has a synonym 'Balmy'.\nThe term 'Happy' has a synonym 'Pleased'.\nThe term 'Down' has a synonym 'Drop'.\nThe term 'Hard' has a synonym 'Rigid'.\nThe term 'Loud' has a synonym 'Noisy'.\nThe term 'Light' has a synonym 'Shiny'.\nThe term 'Early' has a synonym 'Prior'.\nThe term 'Hot' has a synonym 'Blazing'.\nThe term 'Light (weight)' has a synonym 'Slim'.\nThe term 'Accept' has a synonym 'Acknowledge'.\nThe term 'Quiet' has a synonym 'Peaceful'.\nThe term 'Outside' has a synonym 'Outdoors'.\nThe term 'Easy' has a synonym 'Painless'.\nThe term 'Success' has a synonym 'Conquest'.\nThe term 'Hard' has a synonym 'Solid'.\nThe term 'Failure' has a synonym 'Setback'.\nThe term 'Low' has a synonym 'Short'.\nThe term 'Late' has a synonym 'Overdue'.\nThe term 'Wet' has a synonym 'Waterlogged'.\nThe term 'Strong' has a synonym 'Forceful'.\nThe term 'Hot' has a synonym 'Warm'.\nThe term 'Dark' has a synonym 'Tenebrous'.\nThe term 'Light (weight)' has a synonym 'Flimsy'.\nThe term 'Smelly' has a synonym 'Pungent'.\nThe term 'Soft' has a synonym 'Mild'.\nThe term 'Early' has a synonym 'First'.\nThe term 'Dirty' has a synonym 'Squalid'.\nThe term 'Dead' has a synonym 'Lifeless'.\nThe term 'Bitter' has a synonym 'Astringent'.\nThe term 'False' has a synonym 'Fallacious'.\nThe term 'Defeat' has a synonym 'Collapse'.\nThe term 'Loud' has a synonym 'Blaring'.\nThe term 'Dull' has a synonym 'Dim'.\nThe term 'Stormy' has a synonym 'Wild'.\nThe term 'Narrow' has a synonym 'Compressed'.\nThe term 'Rich' has a synonym 'Flush'.\nThe term 'Invisible' has a synonym 'Obscured'.\nThe term 'Slow' has a synonym 'Dragging'.\nThe term 'Young' has a synonym 'Juvenile'.\nThe term 'Bitter' has a synonym 'Caustic'.\nThe term 'Old' has a synonym 'Elderly'.\nThe term 'Slow' has a synonym 'Sluggish'.\nThe term 'Ascend' has a synonym 'Go up'.\nThe term 'Down' has a synonym 'Sink'.\nThe term 'Descend' has a synonym 'Subside'.\nThe term 'Small' has a synonym 'Little'.\nThe term 'High' has a synonym 'Soaring'.\nThe term 'Up' has a synonym 'Climb'.\nThe term 'Calm' has a synonym 'Relaxed'.\nThe term 'Rich' has a synonym 'Well-off'.\nThe term 'Light (weight)' has a synonym 'Breezy'.\nThe term 'Wrong' has a synonym 'Inaccurate'.\nThe term 'Dirty' has a synonym 'Soiled'.\nThe term 'Late' has a synonym 'Unpunctual'.\nThe term 'Quiet' has a synonym 'Low'.\nThe term 'Descend' has a synonym 'Dismount'.\nThe term 'Compliment' has a synonym 'Praise'.\nThe term 'Open' has a synonym 'Unsealed'.\nThe term 'Dull' has a synonym 'Blunt'.\nThe term 'Small' has a synonym 'Minor'.\nThe term 'Retreat' has a synonym 'Escape'.\nThe term 'Fast' has a synonym 'Hasty'.\nThe term 'Invisible' has a synonym 'Secret'.\nThe term 'Success' has a synonym 'Achievement'.\nThe term 'Retreat' has a synonym 'Fall back'.\nThe term 'Cold' has a synonym 'Icy'.\nThe term 'Hard' has a synonym 'Stiff'.\nThe term 'Insult' has a synonym 'Deride'.\nThe term 'Night' has a synonym 'Nocturne'.\nThe term 'Tight' has a synonym 'Firm'.\nThe term 'Accept' has a synonym 'Consent'.\nThe term 'Victory' has a synonym 'Supremacy'.\nThe term 'Old' has a synonym 'Vintage'.\nThe term 'Dry' has a synonym 'Desiccated'.\nThe term 'Narrow' has a synonym 'Pinched'.\nThe term 'Clean' has a synonym 'Sterile'.\nThe term 'Visible' has a synonym 'Perceptible'.\nThe term 'Victory' has a synonym 'Win'.\nThe term 'Advance' has a synonym 'Step up'.\nThe term 'Sharp' has a synonym 'Edged'.\nThe term 'Wide' has a synonym 'Outspread'.\nThe term 'Low' has a synonym 'Flat'.\nThe term 'Closed' has a synonym 'Fastened'.\nThe term 'False' has a synonym 'Untrue'.\nThe term 'Brave' has a synonym 'Bold'.\nThe term 'Reject' has a synonym 'Refuse'.\nThe term 'Fragrant' has a synonym 'Perfumed'.\n\n# Example\nFor example, if the user query is \"I'm very warm\", the term you should focus on is \"hot\". According to the synonyms above, the term \"hot\" has the synonyms \"Blazing\" and \"Warm\", in that order. You need to get synonym values for \"Blazing\" and \"Warm\", let's say those are 5 and 6 respectively, and extract the result of those synonym values i.e. [5, 6] with 5 (blazing) first then 6 (warm) since that is the order they appear in the list of synonyms above."}, {"role": "user", "content": "How do animals adapt to living in hot climates?"}], "tools": [{"type": "function", "name": "get_synonym_value", "description": "Get the synonym value for a synonym.\nThis operation returns a value that conforms to the following JSON Schema: {\"properties\": {\"synonym_value\": {\"type\": \"integer\", \"title\": \"Synonym Value\", \"description\": \"The value for this synonym.\"}}, \"type\": \"object\", \"required\": [\"synonym_value\"]}\n", "parameters": {"properties": {"synonym": {"type": "string", "title": "Synonym", "description": "The synonym to get the value for."}}, "type": "object", "required": ["synonym"], "additionalProperties": false}, "strict": true}, {"type": "function", "name": "extract_synonym_values", "description": "Extract the synonym values you retrieved for the term that is relevant to the user query.\nThis operation returns a value that conforms to the following JSON Schema: {\"properties\": {\"success\": {\"type\": \"boolean\", \"title\": \"Success\", \"description\": \"Success.\"}}, \"type\": \"object\", \"required\": [\"success\"]}\n", "parameters": {"properties": {"synonym_values": {"items": {"type": "integer"}, "type": "array", "title": "Synonym Values", "description": "The synonym values corresponding to the term for the user query."}}, "type": "object", "required": ["synonym_values"], "additionalProperties": false}, "strict": true}], "parallel_tool_calls": false}, "expected_synonyms": ["Blazing", "Warm"], "expected_synonym_values": [711, 407], "minefield_label": "Hot", "minefield_label_value": 299, "agent_ref": {"type": "responses_api_agents", "name": "example_multi_step_simple_agent"}}], "expected_output": [{"message_log": [{"role": "user", "content": "", "token_ids": [151644, 8948, 198, 2, 38297, 198, 2610, 525, 458, 32189, 8315, 13, 1446, 686, 387, 3897, 264, 1196, 3239, 323, 498, 1184, 311, 990, 279, 7375, 3897, 311, 498, 311, 8649, 1140, 315, 73350, 2750, 13, 1446, 686, 387, 3897, 448, 264, 15493, 315, 85406, 369, 1817, 13, 1752, 1817, 4647, 11, 4486, 1490, 421, 432, 594, 9760, 311, 279, 1196, 3239, 323, 633, 279, 2750, 369, 1817, 73350, 438, 8311, 13, 1446, 1969, 633, 323, 8649, 279, 2750, 369, 1449, 73350, 429, 7952, 304, 419, 1140, 13, 5209, 2550, 73350, 2750, 304, 279, 1973, 807, 4994, 304, 279, 2500, 85406, 3685, 382, 2, 16136, 85406, 198, 785, 4647, 364, 16970, 6, 702, 264, 73350, 364, 2662, 28488, 23569, 785, 4647, 364, 3522, 6, 702, 264, 73350, 364, 4896, 33066, 23569, 785, 4647, 364, 5002, 6, 702, 264, 73350, 364, 693, 586, 5838, 23569, 785, 4647, 364, 85215, 6, 702, 264, 73350, 364, 32, 1869, 23569, 785, 4647, 364, 17082, 6, 702, 264, 73350, 364, 54, 18504, 23569, 785, 4647, 364, 12020, 1222, 6, 702, 264, 73350, 364, 8137, 1182, 23569, 785, 4647, 364, 3522, 6, 702, 264, 73350, 364, 81789, 517, 23569, 785, 4647, 364, 1001, 865, 6, 702, 264, 73350, 364, 68457, 23569, 785, 4647, 364, 34291, 88, 6, 702, 264, 73350, 364, 38, 1574, 80049, 23569, 785, 4647, 364, 49, 1384, 6, 702, 264, 73350, 364, 7339, 2583, 23569, 785, 4647, 364, 10159, 6, 702, 264, 73350, 364, 6025, 12402, 23569, 785, 4647, 364, 94984, 6, 702, 264, 73350, 364, 39, 51978, 23569, 785, 4647, 364, 10159, 6, 702, 264, 73350, 364, 30092, 31509, 23569, 785, 4647, 364, 26884, 6, 702, 264, 73350, 364, 30896, 291, 23569, 785, 4647, 364, 41198, 6, 702, 264, 73350, 364, 47, 3748, 77873, 23569, 785, 4647, 364, 50437, 6, 702, 264, 73350, 364, 33648, 9287, 23569, 785, 4647, 364, 13911, 6, 702, 264, 73350, 364, 14008, 23569, 785, 4647, 364, 60970, 6, 702, 264, 73350, 364, 1092, 52899, 23569, 785, 4647, 364, 52, 22945, 6, 702, 264, 73350, 364, 76335, 2338, 591, 23569, 785, 4647, 364, 15474, 494, 6, 702, 264, 73350, 364, 21666, 2377, 23569, 785, 4647, 364, 32887, 6, 702, 264, 73350, 364, 24703, 23569, 785, 4647, 364, 2324, 6, 702, 264, 73350, 364, 87445, 23569, 785, 4647, 364, 34291, 88, 6, 702, 264, 73350, 364, 21988, 29123, 9193, 23569, 785, 4647, 364, 28320, 6, 702, 264, 73350, 364, 1912, 94204, 23569, 785, 4647, 364, 10344, 6, 702, 264, 73350, 364, 37, 1641, 23569, 785, 4647, 364, 1001, 865, 6, 702, 264, 73350, 364, 64469, 23569, 785, 4647, 364, 17507, 6, 702, 264, 73350, 364, 39838, 23569, 785, 4647, 364, 59665, 6, 702, 264, 73350, 364, 7839, 14378, 23569, 785, 4647, 364, 93088, 6, 702, 264, 73350, 364, 7442, 1659, 23569, 785, 4647, 364, 85215, 6, 702, 264, 73350, 364, 1912, 25172, 657, 23569, 785, 4647, 364, 36485, 6, 702, 264, 73350, 364, 44, 33917, 23569, 785, 4647, 364, 32174, 6, 702, 264, 73350, 364, 6828, 3187, 23569, 785, 4647, 364, 2620, 32066, 6, 702, 264, 73350, 364, 17507, 23569, 785, 4647, 364, 24056, 6, 702, 264, 73350, 364, 51, 3191, 291, 23569, 785, 4647, 364, 24056, 6, 702, 264, 73350, 364, 47, 1268, 6125, 23569, 785, 4647, 364, 49, 1384, 6, 702, 264, 73350, 364, 6464, 466, 88, 23569, 785, 4647, 364, 34, 81971, 398, 6, 702, 264, 73350, 364, 16001, 5276, 23569, 785, 4647, 364, 4049, 6, 702, 264, 73350, 364, 40468, 23569, 785, 4647, 364, 59665, 6, 702, 264, 73350, 364, 1806, 56521, 23569, 785, 4647, 364, 6828, 523, 6, 702, 264, 73350, 364, 623, 283, 1782, 471, 291, 23569, 785, 4647, 364, 34, 81971, 398, 6, 702, 264, 73350, 364, 47699, 23569, 785, 4647, 364, 93088, 6, 702, 264, 73350, 364, 37186, 3834, 23569, 785, 4647, 364, 41365, 6, 702, 264, 73350, 364, 25913, 23569, 785, 4647, 364, 54, 295, 6, 702, 264, 73350, 364, 4416, 7741, 23569, 785, 4647, 364, 59665, 6, 702, 264, 73350, 364, 45384, 48909, 23569, 785, 4647, 364, 7188, 6, 702, 264, 73350, 364, 36125, 679, 23569, 785, 4647, 364, 34, 81971, 398, 6, 702, 264, 73350, 364, 6406, 482, 1717, 23569, 785, 4647, 364, 51, 491, 6, 702, 264, 73350, 364, 98335, 23569, 785, 4647, 364, 47586, 6, 702, 264, 73350, 364, 61598, 21366, 23569, 785, 4647, 364, 21751, 3866, 6, 702, 264, 73350, 364, 81027, 287, 23569, 785, 4647, 364, 18284, 6, 702, 264, 73350, 364, 48983, 292, 23569, 785, 4647, 364, 27177, 6, 702, 264, 73350, 364, 7125, 28480, 23569, 785, 4647, 364, 32887, 6, 702, 264, 73350, 364, 78284, 23569, 785, 4647, 364, 36730, 6, 702, 264, 73350, 364, 46588, 371, 1717, 23569, 785, 4647, 364, 12472, 6, 702, 264, 73350, 364, 38103, 23569, 785, 4647, 364, 16970, 6, 702, 264, 73350, 364, 71585, 586, 23569, 785, 4647, 364, 1092, 500, 3819, 6, 702, 264, 73350, 364, 6756, 337, 23569, 785, 4647, 364, 95027, 6, 702, 264, 73350, 364, 49, 1064, 23569, 785, 4647, 364, 30531, 6, 702, 264, 73350, 364, 51, 1659, 23569, 785, 4647, 364, 45, 6044, 6, 702, 264, 73350, 364, 50360, 849, 533, 23569, 785, 4647, 364, 25830, 6, 702, 264, 73350, 364, 35, 355, 7891, 23569, 785, 4647, 364, 11976, 6, 702, 264, 73350, 364, 35186, 13847, 23569, 785, 4647, 364, 34, 7673, 6, 702, 264, 73350, 364, 19957, 380, 74225, 23569, 785, 4647, 364, 26884, 6, 702, 264, 73350, 364, 49010, 23569, 785, 4647, 364, 13552, 14295, 679, 6, 702, 264, 73350, 364, 15878, 36145, 23569, 785, 4647, 364, 32637, 6, 702, 264, 73350, 364, 22600, 726, 23569, 785, 4647, 364, 49649, 6, 702, 264, 73350, 364, 14986, 1717, 23569, 785, 4647, 364, 21751, 3866, 6, 702, 264, 73350, 364, 38, 2672, 20058, 23569, 785, 4647, 364, 78413, 6, 702, 264, 73350, 364, 693, 25976, 23569, 785, 4647, 364, 58289, 6, 702, 264, 73350, 364, 2304, 32137, 398, 23569, 785, 4647, 364, 27529, 6, 702, 264, 73350, 364, 1806, 704, 2181, 23569, 785, 4647, 364, 13552, 14295, 679, 6, 702, 264, 73350, 364, 46, 2024, 343, 5269, 23569, 785, 4647, 364, 12472, 6, 702, 264, 73350, 364, 10344, 258, 6704, 23569, 785, 4647, 364, 50437, 6, 702, 264, 73350, 364, 50437, 13464, 23569, 785, 4647, 364, 51962, 6, 702, 264, 73350, 364, 1336, 87, 3426, 23569, 785, 4647, 364, 52, 22945, 6, 702, 264, 73350, 364, 49642, 974, 23569, 785, 4647, 364, 29185, 6, 702, 264, 73350, 364, 32, 1831, 23569, 785, 4647, 364, 17082, 6, 702, 264, 73350, 364, 21209, 12280, 23569, 785, 4647, 364, 5715, 6, 702, 264, 73350, 364, 34193, 480, 23569, 785, 4647, 364, 51962, 6, 702, 264, 73350, 364, 641, 36743, 23569, 785, 4647, 364, 76418, 6, 702, 264, 73350, 364, 37, 41502, 88, 23569, 785, 4647, 364, 29185, 6, 702, 264, 73350, 364, 4049, 23569, 785, 4647, 364, 30531, 6, 702, 264, 73350, 364, 49506, 85, 2611, 23569, 785, 4647, 364, 10159, 6, 702, 264, 73350, 364, 74676, 23569, 785, 4647, 364, 40572, 6, 702, 264, 73350, 364, 33, 8347, 287, 23569, 785, 4647, 364, 10673, 11896, 6, 702, 264, 73350, 364, 49, 1129, 307, 23569, 785, 4647, 364, 24187, 6, 702, 264, 73350, 364, 10344, 24657, 23569, 785, 4647, 364, 25307, 6, 702, 264, 73350, 364, 34609, 57410, 23569, 785, 4647, 364, 34, 7673, 6, 702, 264, 73350, 364, 1806, 81, 42335, 23569, 785, 4647, 364, 3522, 6, 702, 264, 73350, 364, 35882, 23569, 785, 4647, 364, 5002, 6, 702, 264, 73350, 364, 16485, 23569, 785, 4647, 364, 32887, 6, 702, 264, 73350, 364, 42642, 23569, 785, 4647, 364, 40572, 6, 702, 264, 73350, 364, 3564, 23569, 785, 4647, 364, 40103, 408, 6, 702, 264, 73350, 364, 16284, 23569, 785, 4647, 364, 52, 22945, 6, 702, 264, 73350, 364, 21692, 782, 23569, 785, 4647, 364, 49649, 6, 702, 264, 73350, 364, 22560, 604, 23569, 785, 4647, 364, 54, 295, 6, 702, 264, 73350, 364, 35, 1121, 23569, 785, 4647, 364, 51, 541, 6, 702, 264, 73350, 364, 45948, 27561, 23569, 785, 4647, 364, 4454, 6, 702, 264, 73350, 364, 7839, 14378, 23569, 785, 4647, 364, 32847, 6, 702, 264, 73350, 364, 26843, 261, 1262, 23569, 785, 4647, 364, 32637, 6, 702, 264, 73350, 364, 9676, 23569, 785, 4647, 364, 36730, 6, 702, 264, 73350, 364, 13911, 23569, 785, 4647, 364, 16646, 6, 702, 264, 73350, 364, 14742, 23569, 785, 4647, 364, 95027, 6, 702, 264, 73350, 364, 12346, 3117, 23569, 785, 4647, 364, 10344, 6, 702, 264, 73350, 364, 35, 617, 23569, 785, 4647, 364, 51, 541, 6, 702, 264, 73350, 364, 51, 89614, 23569, 785, 4647, 364, 37, 4101, 34434, 6, 702, 264, 73350, 364, 37889, 2408, 23569, 785, 4647, 364, 32847, 6, 702, 264, 73350, 364, 47, 4673, 23569, 785, 4647, 364, 4454, 6, 702, 264, 73350, 364, 19871, 23569, 785, 4647, 364, 26907, 6, 702, 264, 73350, 364, 49, 20926, 23569, 785, 4647, 364, 43, 2950, 6, 702, 264, 73350, 364, 2753, 28013, 23569, 785, 4647, 364, 13911, 6, 702, 264, 73350, 364, 2016, 6441, 23569, 785, 4647, 364, 41198, 6, 702, 264, 73350, 364, 49471, 23569, 785, 4647, 364, 20170, 6, 702, 264, 73350, 364, 4923, 6657, 23569, 785, 4647, 364, 13911, 320, 4765, 21636, 702, 264, 73350, 364, 87208, 23569, 785, 4647, 364, 16646, 6, 702, 264, 73350, 364, 55559, 51186, 23569, 785, 4647, 364, 94984, 6, 702, 264, 73350, 364, 84643, 1262, 23569, 785, 4647, 364, 41365, 6, 702, 264, 73350, 364, 2662, 27304, 23569, 785, 4647, 364, 36730, 6, 702, 264, 73350, 364, 47, 16459, 23569, 785, 4647, 364, 7188, 6, 702, 264, 73350, 364, 1109, 719, 23569, 785, 4647, 364, 26907, 6, 702, 264, 73350, 364, 45941, 23569, 785, 4647, 364, 17507, 6, 702, 264, 73350, 364, 1649, 1419, 23569, 785, 4647, 364, 24187, 6, 702, 264, 73350, 364, 12472, 23569, 785, 4647, 364, 61457, 6, 702, 264, 73350, 364, 1918, 23646, 23569, 785, 4647, 364, 54, 295, 6, 702, 264, 73350, 364, 28253, 24867, 23569, 785, 4647, 364, 47586, 6, 702, 264, 73350, 364, 18573, 1262, 23569, 785, 4647, 364, 20170, 6, 702, 264, 73350, 364, 95275, 23569, 785, 4647, 364, 25830, 6, 702, 264, 73350, 364, 51, 1952, 65, 26522, 23569, 785, 4647, 364, 13911, 320, 4765, 21636, 702, 264, 73350, 364, 3882, 5742, 88, 23569, 785, 4647, 364, 10673, 11896, 6, 702, 264, 73350, 364, 47, 2185, 306, 23569, 785, 4647, 364, 30531, 6, 702, 264, 73350, 364, 44, 695, 23569, 785, 4647, 364, 41198, 6, 702, 264, 73350, 364, 5338, 23569, 785, 4647, 364, 36485, 6, 702, 264, 73350, 364, 50, 1751, 307, 23569, 785, 4647, 364, 28320, 6, 702, 264, 73350, 364, 25749, 1717, 23569, 785, 4647, 364, 33, 3248, 6, 702, 264, 73350, 364, 32, 917, 306, 23569, 785, 4647, 364, 4049, 6, 702, 264, 73350, 364, 49772, 19430, 23569, 785, 4647, 364, 2620, 32066, 6, 702, 264, 73350, 364, 59164, 23569, 785, 4647, 364, 43, 2950, 6, 702, 264, 73350, 364, 4923, 3249, 23569, 785, 4647, 364, 35, 617, 6, 702, 264, 73350, 364, 10344, 23569, 785, 4647, 364, 34291, 88, 6, 702, 264, 73350, 364, 40603, 23569, 785, 4647, 364, 45, 6044, 6, 702, 264, 73350, 364, 1092, 14318, 23569, 785, 4647, 364, 27177, 6, 702, 264, 73350, 364, 46874, 23569, 785, 4647, 364, 641, 12601, 6, 702, 264, 73350, 364, 66111, 66, 3073, 23569, 785, 4647, 364, 58289, 6, 702, 264, 73350, 364, 78627, 23569, 785, 4647, 364, 40572, 6, 702, 264, 73350, 364, 62604, 39104, 23569, 785, 4647, 364, 33, 3248, 6, 702, 264, 73350, 364, 22571, 590, 292, 23569, 785, 4647, 364, 18284, 6, 702, 264, 73350, 364, 36, 76869, 398, 23569, 785, 4647, 364, 58289, 6, 702, 264, 73350, 364, 7442, 2596, 812, 23569, 785, 4647, 364, 40103, 408, 6, 702, 264, 73350, 364, 10850, 705, 23569, 785, 4647, 364, 4454, 6, 702, 264, 73350, 364, 45094, 23569, 785, 4647, 364, 11065, 408, 6, 702, 264, 73350, 364, 3136, 2929, 23569, 785, 4647, 364, 25307, 6, 702, 264, 73350, 364, 38103, 23569, 785, 4647, 364, 11976, 6, 702, 264, 73350, 364, 4416, 3249, 23569, 785, 4647, 364, 2324, 6, 702, 264, 73350, 364, 34, 4659, 65, 23569, 785, 4647, 364, 34, 7673, 6, 702, 264, 73350, 364, 6740, 51451, 23569, 785, 4647, 364, 27177, 6, 702, 264, 73350, 364, 11395, 12462, 23569, 785, 4647, 364, 13911, 320, 4765, 21636, 702, 264, 73350, 364, 33, 765, 4246, 23569, 785, 4647, 364, 29185, 6, 702, 264, 73350, 364, 641, 35921, 349, 23569, 785, 4647, 364, 36485, 6, 702, 264, 73350, 364, 4416, 2181, 23569, 785, 4647, 364, 61457, 6, 702, 264, 73350, 364, 1806, 79, 19931, 928, 23569, 785, 4647, 364, 94984, 6, 702, 264, 73350, 364, 24187, 23569, 785, 4647, 364, 11065, 408, 6, 702, 264, 73350, 364, 35, 2142, 629, 23569, 785, 4647, 364, 1092, 500, 3819, 6, 702, 264, 73350, 364, 47, 18704, 23569, 785, 4647, 364, 5002, 6, 702, 264, 73350, 364, 1806, 75940, 23569, 785, 4647, 364, 35, 617, 6, 702, 264, 73350, 364, 4923, 3850, 23569, 785, 4647, 364, 25307, 6, 702, 264, 73350, 364, 57024, 23569, 785, 4647, 364, 12020, 1222, 6, 702, 264, 73350, 364, 48124, 23569, 785, 4647, 364, 32174, 6, 702, 264, 73350, 364, 39, 14980, 23569, 785, 4647, 364, 641, 12601, 6, 702, 264, 73350, 364, 19773, 23569, 785, 4647, 364, 7188, 6, 702, 264, 73350, 364, 71585, 7830, 23569, 785, 4647, 364, 12020, 1222, 6, 702, 264, 73350, 364, 49772, 1182, 23569, 785, 4647, 364, 76418, 6, 702, 264, 73350, 364, 40, 11130, 23569, 785, 4647, 364, 26907, 6, 702, 264, 73350, 364, 623, 3092, 23569, 785, 4647, 364, 15474, 494, 6, 702, 264, 73350, 364, 22171, 577, 23569, 785, 4647, 364, 50437, 6, 702, 264, 73350, 364, 2753, 302, 399, 68, 23569, 785, 4647, 364, 51, 491, 6, 702, 264, 73350, 364, 37, 2853, 23569, 785, 4647, 364, 16646, 6, 702, 264, 73350, 364, 15220, 306, 23569, 785, 4647, 364, 36125, 679, 6, 702, 264, 73350, 364, 10048, 1826, 2757, 23569, 785, 4647, 364, 18284, 6, 702, 264, 73350, 364, 88467, 23569, 785, 4647, 364, 85215, 6, 702, 264, 73350, 364, 4896, 47638, 657, 23569, 785, 4647, 364, 45, 6044, 6, 702, 264, 73350, 364, 19861, 2397, 23569, 785, 4647, 364, 27529, 6, 702, 264, 73350, 364, 80350, 457, 23569, 785, 4647, 364, 5715, 6, 702, 264, 73350, 364, 3889, 1484, 1238, 23569, 785, 4647, 364, 36125, 679, 6, 702, 264, 73350, 364, 16970, 23569, 785, 4647, 364, 95027, 6, 702, 264, 73350, 364, 8304, 705, 23569, 785, 4647, 364, 24056, 6, 702, 264, 73350, 364, 2715, 3556, 23569, 785, 4647, 364, 60970, 6, 702, 264, 73350, 364, 2662, 58195, 23569, 785, 4647, 364, 24187, 6, 702, 264, 73350, 364, 31019, 23569, 785, 4647, 364, 26884, 6, 702, 264, 73350, 364, 32174, 6758, 23569, 785, 4647, 364, 4049, 6, 702, 264, 73350, 364, 1806, 1866, 23569, 785, 4647, 364, 6828, 523, 6, 702, 264, 73350, 364, 42800, 23569, 785, 4647, 364, 78413, 6, 702, 264, 73350, 364, 3945, 810, 23569, 785, 4647, 364, 37, 4101, 34434, 6, 702, 264, 73350, 364, 3889, 69, 38155, 29636, 2, 13383, 198, 2461, 3110, 11, 421, 279, 1196, 3239, 374, 330, 40, 2776, 1602, 8205, 497, 279, 4647, 498, 1265, 5244, 389, 374, 330, 10622, 3263, 10548, 311, 279, 85406, 3403, 11, 279, 4647, 330, 10622, 1, 702, 279, 85406, 330, 4923, 6657, 1, 323, 330, 95275, 497, 304, 429, 1973, 13, 1446, 1184, 311, 633, 73350, 2750, 369, 330, 4923, 6657, 1, 323, 330, 95275, 497, 1077, 594, 1977, 1846, 525, 220, 20, 323, 220, 21, 15576, 11, 323, 8649, 279, 1102, 315, 1846, 73350, 2750, 600, 1734, 13, 508, 20, 11, 220, 21, 60, 448, 220, 20, 320, 2024, 6657, 8, 1156, 1221, 220, 21, 320, 82597, 8, 2474, 429, 374, 279, 1973, 807, 4994, 304, 279, 1140, 315, 85406, 3403, 382, 2, 13852, 271, 2610, 1231, 1618, 825, 476, 803, 5746, 311, 7789, 448, 279, 1196, 3239, 382, 2610, 525, 3897, 448, 729, 32628, 2878, 366, 15918, 1472, 15918, 29, 11874, 9492, 510, 27, 15918, 397, 4913, 1313, 788, 330, 1688, 497, 330, 1688, 788, 5212, 606, 788, 330, 455, 51393, 7831, 3142, 497, 330, 4684, 788, 330, 1949, 279, 73350, 897, 369, 264, 73350, 7110, 77, 1986, 5666, 4675, 264, 897, 429, 95164, 311, 279, 2701, 4718, 12539, 25, 314, 2105, 13193, 11693, 314, 2105, 20339, 7831, 3142, 11693, 314, 2105, 1313, 11693, 7245, 11662, 16215, 7245, 2102, 11693, 7245, 37134, 7831, 5162, 16215, 7245, 4684, 11693, 7245, 785, 897, 369, 419, 73350, 86865, 38154, 7245, 1313, 11693, 7245, 1700, 16215, 7245, 6279, 11693, 508, 2105, 20339, 7831, 3142, 75104, 11035, 77, 497, 330, 13786, 788, 5212, 13193, 788, 5212, 20339, 7831, 788, 5212, 1313, 788, 330, 917, 497, 330, 2102, 788, 330, 37134, 7831, 497, 330, 4684, 788, 330, 785, 73350, 311, 633, 279, 897, 369, 1189, 38154, 330, 1313, 788, 330, 1700, 497, 330, 6279, 788, 4383, 20339, 7831, 7914, 330, 35499, 7903, 788, 895, 2137, 330, 6627, 788, 830, 11248, 4913, 1313, 788, 330, 1688, 497, 330, 1688, 788, 5212, 606, 788, 330, 23493, 51393, 7831, 9146, 497, 330, 4684, 788, 330, 28959, 279, 73350, 2750, 498, 30403, 369, 279, 4647, 429, 374, 9760, 311, 279, 1196, 3239, 7110, 77, 1986, 5666, 4675, 264, 897, 429, 95164, 311, 279, 2701, 4718, 12539, 25, 314, 2105, 13193, 11693, 314, 2105, 5630, 11693, 314, 2105, 1313, 11693, 7245, 6117, 16215, 7245, 2102, 11693, 7245, 7188, 16215, 7245, 4684, 11693, 7245, 7188, 86865, 38154, 7245, 1313, 11693, 7245, 1700, 16215, 7245, 6279, 11693, 508, 2105, 5630, 75104, 11035, 77, 497, 330, 13786, 788, 5212, 13193, 788, 5212, 20339, 7831, 9146, 788, 5212, 3615, 788, 5212, 1313, 788, 330, 11662, 14345, 330, 1313, 788, 330, 1653, 497, 330, 2102, 788, 330, 37134, 7831, 24979, 497, 330, 4684, 788, 330, 785, 73350, 2750, 12159, 311, 279, 4647, 369, 279, 1196, 3239, 1189, 38154, 330, 1313, 788, 330, 1700, 497, 330, 6279, 788, 4383, 20339, 7831, 9146, 7914, 330, 35499, 7903, 788, 895, 2137, 330, 6627, 788, 830, 11248, 522, 15918, 1339, 2461, 1817, 729, 1618, 11, 470, 264, 2951, 1633, 448, 729, 829, 323, 5977, 2878, 220, 151657, 151658, 11874, 9492, 510, 151657, 198, 4913, 606, 788, 366, 1688, 11494, 8066, 330, 16370, 788, 366, 2116, 56080, 40432, 31296, 151658, 151645, 198, 151644, 872, 198, 3838, 9363, 16792, 311, 264, 5537, 24084, 9016, 1550, 19879, 11, 323, 1246, 653, 1493, 9363, 16282, 30, 5209, 1618, 279, 633, 51393, 7831, 3142, 5392, 389, 2502, 6657, 323, 45763, 11, 323, 1221, 8649, 51393, 7831, 9146, 389, 279, 3059, 13, 151645, 198, 151644, 77091, 198]}, {"role": "assistant", "content": "", "token_ids": [], "generation_logprobs": []}, {"role": "user", "content": "", "token_ids": []}, {"role": "assistant", "content": "", "token_ids": [], "generation_logprobs": []}], "input_message_log": [{"role": "user", "content": "", "token_ids": [151644, 8948, 198, 2, 38297, 198, 2610, 525, 458, 32189, 8315, 13, 1446, 686, 387, 3897, 264, 1196, 3239, 323, 498, 1184, 311, 990, 279, 7375, 3897, 311, 498, 311, 8649, 1140, 315, 73350, 2750, 13, 1446, 686, 387, 3897, 448, 264, 15493, 315, 85406, 369, 1817, 13, 1752, 1817, 4647, 11, 4486, 1490, 421, 432, 594, 9760, 311, 279, 1196, 3239, 323, 633, 279, 2750, 369, 1817, 73350, 438, 8311, 13, 1446, 1969, 633, 323, 8649, 279, 2750, 369, 1449, 73350, 429, 7952, 304, 419, 1140, 13, 5209, 2550, 73350, 2750, 304, 279, 1973, 807, 4994, 304, 279, 2500, 85406, 3685, 382, 2, 16136, 85406, 198, 785, 4647, 364, 16970, 6, 702, 264, 73350, 364, 2662, 28488, 23569, 785, 4647, 364, 3522, 6, 702, 264, 73350, 364, 4896, 33066, 23569, 785, 4647, 364, 5002, 6, 702, 264, 73350, 364, 693, 586, 5838, 23569, 785, 4647, 364, 85215, 6, 702, 264, 73350, 364, 32, 1869, 23569, 785, 4647, 364, 17082, 6, 702, 264, 73350, 364, 54, 18504, 23569, 785, 4647, 364, 12020, 1222, 6, 702, 264, 73350, 364, 8137, 1182, 23569, 785, 4647, 364, 3522, 6, 702, 264, 73350, 364, 81789, 517, 23569, 785, 4647, 364, 1001, 865, 6, 702, 264, 73350, 364, 68457, 23569, 785, 4647, 364, 34291, 88, 6, 702, 264, 73350, 364, 38, 1574, 80049, 23569, 785, 4647, 364, 49, 1384, 6, 702, 264, 73350, 364, 7339, 2583, 23569, 785, 4647, 364, 10159, 6, 702, 264, 73350, 364, 6025, 12402, 23569, 785, 4647, 364, 94984, 6, 702, 264, 73350, 364, 39, 51978, 23569, 785, 4647, 364, 10159, 6, 702, 264, 73350, 364, 30092, 31509, 23569, 785, 4647, 364, 26884, 6, 702, 264, 73350, 364, 30896, 291, 23569, 785, 4647, 364, 41198, 6, 702, 264, 73350, 364, 47, 3748, 77873, 23569, 785, 4647, 364, 50437, 6, 702, 264, 73350, 364, 33648, 9287, 23569, 785, 4647, 364, 13911, 6, 702, 264, 73350, 364, 14008, 23569, 785, 4647, 364, 60970, 6, 702, 264, 73350, 364, 1092, 52899, 23569, 785, 4647, 364, 52, 22945, 6, 702, 264, 73350, 364, 76335, 2338, 591, 23569, 785, 4647, 364, 15474, 494, 6, 702, 264, 73350, 364, 21666, 2377, 23569, 785, 4647, 364, 32887, 6, 702, 264, 73350, 364, 24703, 23569, 785, 4647, 364, 2324, 6, 702, 264, 73350, 364, 87445, 23569, 785, 4647, 364, 34291, 88, 6, 702, 264, 73350, 364, 21988, 29123, 9193, 23569, 785, 4647, 364, 28320, 6, 702, 264, 73350, 364, 1912, 94204, 23569, 785, 4647, 364, 10344, 6, 702, 264, 73350, 364, 37, 1641, 23569, 785, 4647, 364, 1001, 865, 6, 702, 264, 73350, 364, 64469, 23569, 785, 4647, 364, 17507, 6, 702, 264, 73350, 364, 39838, 23569, 785, 4647, 364, 59665, 6, 702, 264, 73350, 364, 7839, 14378, 23569, 785, 4647, 364, 93088, 6, 702, 264, 73350, 364, 7442, 1659, 23569, 785, 4647, 364, 85215, 6, 702, 264, 73350, 364, 1912, 25172, 657, 23569, 785, 4647, 364, 36485, 6, 702, 264, 73350, 364, 44, 33917, 23569, 785, 4647, 364, 32174, 6, 702, 264, 73350, 364, 6828, 3187, 23569, 785, 4647, 364, 2620, 32066, 6, 702, 264, 73350, 364, 17507, 23569, 785, 4647, 364, 24056, 6, 702, 264, 73350, 364, 51, 3191, 291, 23569, 785, 4647, 364, 24056, 6, 702, 264, 73350, 364, 47, 1268, 6125, 23569, 785, 4647, 364, 49, 1384, 6, 702, 264, 73350, 364, 6464, 466, 88, 23569, 785, 4647, 364, 34, 81971, 398, 6, 702, 264, 73350, 364, 16001, 5276, 23569, 785, 4647, 364, 4049, 6, 702, 264, 73350, 364, 40468, 23569, 785, 4647, 364, 59665, 6, 702, 264, 73350, 364, 1806, 56521, 23569, 785, 4647, 364, 6828, 523, 6, 702, 264, 73350, 364, 623, 283, 1782, 471, 291, 23569, 785, 4647, 364, 34, 81971, 398, 6, 702, 264, 73350, 364, 47699, 23569, 785, 4647, 364, 93088, 6, 702, 264, 73350, 364, 37186, 3834, 23569, 785, 4647, 364, 41365, 6, 702, 264, 73350, 364, 25913, 23569, 785, 4647, 364, 54, 295, 6, 702, 264, 73350, 364, 4416, 7741, 23569, 785, 4647, 364, 59665, 6, 702, 264, 73350, 364, 45384, 48909, 23569, 785, 4647, 364, 7188, 6, 702, 264, 73350, 364, 36125, 679, 23569, 785, 4647, 364, 34, 81971, 398, 6, 702, 264, 73350, 364, 6406, 482, 1717, 23569, 785, 4647, 364, 51, 491, 6, 702, 264, 73350, 364, 98335, 23569, 785, 4647, 364, 47586, 6, 702, 264, 73350, 364, 61598, 21366, 23569, 785, 4647, 364, 21751, 3866, 6, 702, 264, 73350, 364, 81027, 287, 23569, 785, 4647, 364, 18284, 6, 702, 264, 73350, 364, 48983, 292, 23569, 785, 4647, 364, 27177, 6, 702, 264, 73350, 364, 7125, 28480, 23569, 785, 4647, 364, 32887, 6, 702, 264, 73350, 364, 78284, 23569, 785, 4647, 364, 36730, 6, 702, 264, 73350, 364, 46588, 371, 1717, 23569, 785, 4647, 364, 12472, 6, 702, 264, 73350, 364, 38103, 23569, 785, 4647, 364, 16970, 6, 702, 264, 73350, 364, 71585, 586, 23569, 785, 4647, 364, 1092, 500, 3819, 6, 702, 264, 73350, 364, 6756, 337, 23569, 785, 4647, 364, 95027, 6, 702, 264, 73350, 364, 49, 1064, 23569, 785, 4647, 364, 30531, 6, 702, 264, 73350, 364, 51, 1659, 23569, 785, 4647, 364, 45, 6044, 6, 702, 264, 73350, 364, 50360, 849, 533, 23569, 785, 4647, 364, 25830, 6, 702, 264, 73350, 364, 35, 355, 7891, 23569, 785, 4647, 364, 11976, 6, 702, 264, 73350, 364, 35186, 13847, 23569, 785, 4647, 364, 34, 7673, 6, 702, 264, 73350, 364, 19957, 380, 74225, 23569, 785, 4647, 364, 26884, 6, 702, 264, 73350, 364, 49010, 23569, 785, 4647, 364, 13552, 14295, 679, 6, 702, 264, 73350, 364, 15878, 36145, 23569, 785, 4647, 364, 32637, 6, 702, 264, 73350, 364, 22600, 726, 23569, 785, 4647, 364, 49649, 6, 702, 264, 73350, 364, 14986, 1717, 23569, 785, 4647, 364, 21751, 3866, 6, 702, 264, 73350, 364, 38, 2672, 20058, 23569, 785, 4647, 364, 78413, 6, 702, 264, 73350, 364, 693, 25976, 23569, 785, 4647, 364, 58289, 6, 702, 264, 73350, 364, 2304, 32137, 398, 23569, 785, 4647, 364, 27529, 6, 702, 264, 73350, 364, 1806, 704, 2181, 23569, 785, 4647, 364, 13552, 14295, 679, 6, 702, 264, 73350, 364, 46, 2024, 343, 5269, 23569, 785, 4647, 364, 12472, 6, 702, 264, 73350, 364, 10344, 258, 6704, 23569, 785, 4647, 364, 50437, 6, 702, 264, 73350, 364, 50437, 13464, 23569, 785, 4647, 364, 51962, 6, 702, 264, 73350, 364, 1336, 87, 3426, 23569, 785, 4647, 364, 52, 22945, 6, 702, 264, 73350, 364, 49642, 974, 23569, 785, 4647, 364, 29185, 6, 702, 264, 73350, 364, 32, 1831, 23569, 785, 4647, 364, 17082, 6, 702, 264, 73350, 364, 21209, 12280, 23569, 785, 4647, 364, 5715, 6, 702, 264, 73350, 364, 34193, 480, 23569, 785, 4647, 364, 51962, 6, 702, 264, 73350, 364, 641, 36743, 23569, 785, 4647, 364, 76418, 6, 702, 264, 73350, 364, 37, 41502, 88, 23569, 785, 4647, 364, 29185, 6, 702, 264, 73350, 364, 4049, 23569, 785, 4647, 364, 30531, 6, 702, 264, 73350, 364, 49506, 85, 2611, 23569, 785, 4647, 364, 10159, 6, 702, 264, 73350, 364, 74676, 23569, 785, 4647, 364, 40572, 6, 702, 264, 73350, 364, 33, 8347, 287, 23569, 785, 4647, 364, 10673, 11896, 6, 702, 264, 73350, 364, 49, 1129, 307, 23569, 785, 4647, 364, 24187, 6, 702, 264, 73350, 364, 10344, 24657, 23569, 785, 4647, 364, 25307, 6, 702, 264, 73350, 364, 34609, 57410, 23569, 785, 4647, 364, 34, 7673, 6, 702, 264, 73350, 364, 1806, 81, 42335, 23569, 785, 4647, 364, 3522, 6, 702, 264, 73350, 364, 35882, 23569, 785, 4647, 364, 5002, 6, 702, 264, 73350, 364, 16485, 23569, 785, 4647, 364, 32887, 6, 702, 264, 73350, 364, 42642, 23569, 785, 4647, 364, 40572, 6, 702, 264, 73350, 364, 3564, 23569, 785, 4647, 364, 40103, 408, 6, 702, 264, 73350, 364, 16284, 23569, 785, 4647, 364, 52, 22945, 6, 702, 264, 73350, 364, 21692, 782, 23569, 785, 4647, 364, 49649, 6, 702, 264, 73350, 364, 22560, 604, 23569, 785, 4647, 364, 54, 295, 6, 702, 264, 73350, 364, 35, 1121, 23569, 785, 4647, 364, 51, 541, 6, 702, 264, 73350, 364, 45948, 27561, 23569, 785, 4647, 364, 4454, 6, 702, 264, 73350, 364, 7839, 14378, 23569, 785, 4647, 364, 32847, 6, 702, 264, 73350, 364, 26843, 261, 1262, 23569, 785, 4647, 364, 32637, 6, 702, 264, 73350, 364, 9676, 23569, 785, 4647, 364, 36730, 6, 702, 264, 73350, 364, 13911, 23569, 785, 4647, 364, 16646, 6, 702, 264, 73350, 364, 14742, 23569, 785, 4647, 364, 95027, 6, 702, 264, 73350, 364, 12346, 3117, 23569, 785, 4647, 364, 10344, 6, 702, 264, 73350, 364, 35, 617, 23569, 785, 4647, 364, 51, 541, 6, 702, 264, 73350, 364, 51, 89614, 23569, 785, 4647, 364, 37, 4101, 34434, 6, 702, 264, 73350, 364, 37889, 2408, 23569, 785, 4647, 364, 32847, 6, 702, 264, 73350, 364, 47, 4673, 23569, 785, 4647, 364, 4454, 6, 702, 264, 73350, 364, 19871, 23569, 785, 4647, 364, 26907, 6, 702, 264, 73350, 364, 49, 20926, 23569, 785, 4647, 364, 43, 2950, 6, 702, 264, 73350, 364, 2753, 28013, 23569, 785, 4647, 364, 13911, 6, 702, 264, 73350, 364, 2016, 6441, 23569, 785, 4647, 364, 41198, 6, 702, 264, 73350, 364, 49471, 23569, 785, 4647, 364, 20170, 6, 702, 264, 73350, 364, 4923, 6657, 23569, 785, 4647, 364, 13911, 320, 4765, 21636, 702, 264, 73350, 364, 87208, 23569, 785, 4647, 364, 16646, 6, 702, 264, 73350, 364, 55559, 51186, 23569, 785, 4647, 364, 94984, 6, 702, 264, 73350, 364, 84643, 1262, 23569, 785, 4647, 364, 41365, 6, 702, 264, 73350, 364, 2662, 27304, 23569, 785, 4647, 364, 36730, 6, 702, 264, 73350, 364, 47, 16459, 23569, 785, 4647, 364, 7188, 6, 702, 264, 73350, 364, 1109, 719, 23569, 785, 4647, 364, 26907, 6, 702, 264, 73350, 364, 45941, 23569, 785, 4647, 364, 17507, 6, 702, 264, 73350, 364, 1649, 1419, 23569, 785, 4647, 364, 24187, 6, 702, 264, 73350, 364, 12472, 23569, 785, 4647, 364, 61457, 6, 702, 264, 73350, 364, 1918, 23646, 23569, 785, 4647, 364, 54, 295, 6, 702, 264, 73350, 364, 28253, 24867, 23569, 785, 4647, 364, 47586, 6, 702, 264, 73350, 364, 18573, 1262, 23569, 785, 4647, 364, 20170, 6, 702, 264, 73350, 364, 95275, 23569, 785, 4647, 364, 25830, 6, 702, 264, 73350, 364, 51, 1952, 65, 26522, 23569, 785, 4647, 364, 13911, 320, 4765, 21636, 702, 264, 73350, 364, 3882, 5742, 88, 23569, 785, 4647, 364, 10673, 11896, 6, 702, 264, 73350, 364, 47, 2185, 306, 23569, 785, 4647, 364, 30531, 6, 702, 264, 73350, 364, 44, 695, 23569, 785, 4647, 364, 41198, 6, 702, 264, 73350, 364, 5338, 23569, 785, 4647, 364, 36485, 6, 702, 264, 73350, 364, 50, 1751, 307, 23569, 785, 4647, 364, 28320, 6, 702, 264, 73350, 364, 25749, 1717, 23569, 785, 4647, 364, 33, 3248, 6, 702, 264, 73350, 364, 32, 917, 306, 23569, 785, 4647, 364, 4049, 6, 702, 264, 73350, 364, 49772, 19430, 23569, 785, 4647, 364, 2620, 32066, 6, 702, 264, 73350, 364, 59164, 23569, 785, 4647, 364, 43, 2950, 6, 702, 264, 73350, 364, 4923, 3249, 23569, 785, 4647, 364, 35, 617, 6, 702, 264, 73350, 364, 10344, 23569, 785, 4647, 364, 34291, 88, 6, 702, 264, 73350, 364, 40603, 23569, 785, 4647, 364, 45, 6044, 6, 702, 264, 73350, 364, 1092, 14318, 23569, 785, 4647, 364, 27177, 6, 702, 264, 73350, 364, 46874, 23569, 785, 4647, 364, 641, 12601, 6, 702, 264, 73350, 364, 66111, 66, 3073, 23569, 785, 4647, 364, 58289, 6, 702, 264, 73350, 364, 78627, 23569, 785, 4647, 364, 40572, 6, 702, 264, 73350, 364, 62604, 39104, 23569, 785, 4647, 364, 33, 3248, 6, 702, 264, 73350, 364, 22571, 590, 292, 23569, 785, 4647, 364, 18284, 6, 702, 264, 73350, 364, 36, 76869, 398, 23569, 785, 4647, 364, 58289, 6, 702, 264, 73350, 364, 7442, 2596, 812, 23569, 785, 4647, 364, 40103, 408, 6, 702, 264, 73350, 364, 10850, 705, 23569, 785, 4647, 364, 4454, 6, 702, 264, 73350, 364, 45094, 23569, 785, 4647, 364, 11065, 408, 6, 702, 264, 73350, 364, 3136, 2929, 23569, 785, 4647, 364, 25307, 6, 702, 264, 73350, 364, 38103, 23569, 785, 4647, 364, 11976, 6, 702, 264, 73350, 364, 4416, 3249, 23569, 785, 4647, 364, 2324, 6, 702, 264, 73350, 364, 34, 4659, 65, 23569, 785, 4647, 364, 34, 7673, 6, 702, 264, 73350, 364, 6740, 51451, 23569, 785, 4647, 364, 27177, 6, 702, 264, 73350, 364, 11395, 12462, 23569, 785, 4647, 364, 13911, 320, 4765, 21636, 702, 264, 73350, 364, 33, 765, 4246, 23569, 785, 4647, 364, 29185, 6, 702, 264, 73350, 364, 641, 35921, 349, 23569, 785, 4647, 364, 36485, 6, 702, 264, 73350, 364, 4416, 2181, 23569, 785, 4647, 364, 61457, 6, 702, 264, 73350, 364, 1806, 79, 19931, 928, 23569, 785, 4647, 364, 94984, 6, 702, 264, 73350, 364, 24187, 23569, 785, 4647, 364, 11065, 408, 6, 702, 264, 73350, 364, 35, 2142, 629, 23569, 785, 4647, 364, 1092, 500, 3819, 6, 702, 264, 73350, 364, 47, 18704, 23569, 785, 4647, 364, 5002, 6, 702, 264, 73350, 364, 1806, 75940, 23569, 785, 4647, 364, 35, 617, 6, 702, 264, 73350, 364, 4923, 3850, 23569, 785, 4647, 364, 25307, 6, 702, 264, 73350, 364, 57024, 23569, 785, 4647, 364, 12020, 1222, 6, 702, 264, 73350, 364, 48124, 23569, 785, 4647, 364, 32174, 6, 702, 264, 73350, 364, 39, 14980, 23569, 785, 4647, 364, 641, 12601, 6, 702, 264, 73350, 364, 19773, 23569, 785, 4647, 364, 7188, 6, 702, 264, 73350, 364, 71585, 7830, 23569, 785, 4647, 364, 12020, 1222, 6, 702, 264, 73350, 364, 49772, 1182, 23569, 785, 4647, 364, 76418, 6, 702, 264, 73350, 364, 40, 11130, 23569, 785, 4647, 364, 26907, 6, 702, 264, 73350, 364, 623, 3092, 23569, 785, 4647, 364, 15474, 494, 6, 702, 264, 73350, 364, 22171, 577, 23569, 785, 4647, 364, 50437, 6, 702, 264, 73350, 364, 2753, 302, 399, 68, 23569, 785, 4647, 364, 51, 491, 6, 702, 264, 73350, 364, 37, 2853, 23569, 785, 4647, 364, 16646, 6, 702, 264, 73350, 364, 15220, 306, 23569, 785, 4647, 364, 36125, 679, 6, 702, 264, 73350, 364, 10048, 1826, 2757, 23569, 785, 4647, 364, 18284, 6, 702, 264, 73350, 364, 88467, 23569, 785, 4647, 364, 85215, 6, 702, 264, 73350, 364, 4896, 47638, 657, 23569, 785, 4647, 364, 45, 6044, 6, 702, 264, 73350, 364, 19861, 2397, 23569, 785, 4647, 364, 27529, 6, 702, 264, 73350, 364, 80350, 457, 23569, 785, 4647, 364, 5715, 6, 702, 264, 73350, 364, 3889, 1484, 1238, 23569, 785, 4647, 364, 36125, 679, 6, 702, 264, 73350, 364, 16970, 23569, 785, 4647, 364, 95027, 6, 702, 264, 73350, 364, 8304, 705, 23569, 785, 4647, 364, 24056, 6, 702, 264, 73350, 364, 2715, 3556, 23569, 785, 4647, 364, 60970, 6, 702, 264, 73350, 364, 2662, 58195, 23569, 785, 4647, 364, 24187, 6, 702, 264, 73350, 364, 31019, 23569, 785, 4647, 364, 26884, 6, 702, 264, 73350, 364, 32174, 6758, 23569, 785, 4647, 364, 4049, 6, 702, 264, 73350, 364, 1806, 1866, 23569, 785, 4647, 364, 6828, 523, 6, 702, 264, 73350, 364, 42800, 23569, 785, 4647, 364, 78413, 6, 702, 264, 73350, 364, 3945, 810, 23569, 785, 4647, 364, 37, 4101, 34434, 6, 702, 264, 73350, 364, 3889, 69, 38155, 29636, 2, 13383, 198, 2461, 3110, 11, 421, 279, 1196, 3239, 374, 330, 40, 2776, 1602, 8205, 497, 279, 4647, 498, 1265, 5244, 389, 374, 330, 10622, 3263, 10548, 311, 279, 85406, 3403, 11, 279, 4647, 330, 10622, 1, 702, 279, 85406, 330, 4923, 6657, 1, 323, 330, 95275, 497, 304, 429, 1973, 13, 1446, 1184, 311, 633, 73350, 2750, 369, 330, 4923, 6657, 1, 323, 330, 95275, 497, 1077, 594, 1977, 1846, 525, 220, 20, 323, 220, 21, 15576, 11, 323, 8649, 279, 1102, 315, 1846, 73350, 2750, 600, 1734, 13, 508, 20, 11, 220, 21, 60, 448, 220, 20, 320, 2024, 6657, 8, 1156, 1221, 220, 21, 320, 82597, 8, 2474, 429, 374, 279, 1973, 807, 4994, 304, 279, 1140, 315, 85406, 3403, 382, 2, 13852, 271, 2610, 1231, 1618, 825, 476, 803, 5746, 311, 7789, 448, 279, 1196, 3239, 382, 2610, 525, 3897, 448, 729, 32628, 2878, 366, 15918, 1472, 15918, 29, 11874, 9492, 510, 27, 15918, 397, 4913, 1313, 788, 330, 1688, 497, 330, 1688, 788, 5212, 606, 788, 330, 455, 51393, 7831, 3142, 497, 330, 4684, 788, 330, 1949, 279, 73350, 897, 369, 264, 73350, 7110, 77, 1986, 5666, 4675, 264, 897, 429, 95164, 311, 279, 2701, 4718, 12539, 25, 314, 2105, 13193, 11693, 314, 2105, 20339, 7831, 3142, 11693, 314, 2105, 1313, 11693, 7245, 11662, 16215, 7245, 2102, 11693, 7245, 37134, 7831, 5162, 16215, 7245, 4684, 11693, 7245, 785, 897, 369, 419, 73350, 86865, 38154, 7245, 1313, 11693, 7245, 1700, 16215, 7245, 6279, 11693, 508, 2105, 20339, 7831, 3142, 75104, 11035, 77, 497, 330, 13786, 788, 5212, 13193, 788, 5212, 20339, 7831, 788, 5212, 1313, 788, 330, 917, 497, 330, 2102, 788, 330, 37134, 7831, 497, 330, 4684, 788, 330, 785, 73350, 311, 633, 279, 897, 369, 1189, 38154, 330, 1313, 788, 330, 1700, 497, 330, 6279, 788, 4383, 20339, 7831, 7914, 330, 35499, 7903, 788, 895, 2137, 330, 6627, 788, 830, 11248, 4913, 1313, 788, 330, 1688, 497, 330, 1688, 788, 5212, 606, 788, 330, 23493, 51393, 7831, 9146, 497, 330, 4684, 788, 330, 28959, 279, 73350, 2750, 498, 30403, 369, 279, 4647, 429, 374, 9760, 311, 279, 1196, 3239, 7110, 77, 1986, 5666, 4675, 264, 897, 429, 95164, 311, 279, 2701, 4718, 12539, 25, 314, 2105, 13193, 11693, 314, 2105, 5630, 11693, 314, 2105, 1313, 11693, 7245, 6117, 16215, 7245, 2102, 11693, 7245, 7188, 16215, 7245, 4684, 11693, 7245, 7188, 86865, 38154, 7245, 1313, 11693, 7245, 1700, 16215, 7245, 6279, 11693, 508, 2105, 5630, 75104, 11035, 77, 497, 330, 13786, 788, 5212, 13193, 788, 5212, 20339, 7831, 9146, 788, 5212, 3615, 788, 5212, 1313, 788, 330, 11662, 14345, 330, 1313, 788, 330, 1653, 497, 330, 2102, 788, 330, 37134, 7831, 24979, 497, 330, 4684, 788, 330, 785, 73350, 2750, 12159, 311, 279, 4647, 369, 279, 1196, 3239, 1189, 38154, 330, 1313, 788, 330, 1700, 497, 330, 6279, 788, 4383, 20339, 7831, 9146, 7914, 330, 35499, 7903, 788, 895, 2137, 330, 6627, 788, 830, 11248, 522, 15918, 1339, 2461, 1817, 729, 1618, 11, 470, 264, 2951, 1633, 448, 729, 829, 323, 5977, 2878, 220, 151657, 151658, 11874, 9492, 510, 151657, 198, 4913, 606, 788, 366, 1688, 11494, 8066, 330, 16370, 788, 366, 2116, 56080, 40432, 31296, 151658, 151645, 198, 151644, 872, 198, 3838, 9363, 16792, 311, 264, 5537, 24084, 9016, 1550, 19879, 11, 323, 1246, 653, 1493, 9363, 16282, 30, 5209, 1618, 279, 633, 51393, 7831, 3142, 5392, 389, 2502, 6657, 323, 45763, 11, 323, 1221, 8649, 51393, 7831, 9146, 389, 279, 3059, 13, 151645, 198, 151644, 77091, 198]}]}, {"message_log": [{"role": "user", "content": "", "token_ids": [151644, 8948, 198, 2, 38297, 198, 2610, 525, 458, 32189, 8315, 13, 1446, 686, 387, 3897, 264, 1196, 3239, 323, 498, 1184, 311, 990, 279, 7375, 3897, 311, 498, 311, 8649, 1140, 315, 73350, 2750, 13, 1446, 686, 387, 3897, 448, 264, 15493, 315, 85406, 369, 1817, 13, 1752, 1817, 4647, 11, 4486, 1490, 421, 432, 594, 9760, 311, 279, 1196, 3239, 323, 633, 279, 2750, 369, 1817, 73350, 438, 8311, 13, 1446, 1969, 633, 323, 8649, 279, 2750, 369, 1449, 73350, 429, 7952, 304, 419, 1140, 13, 5209, 2550, 73350, 2750, 304, 279, 1973, 807, 4994, 304, 279, 2500, 85406, 3685, 382, 2, 16136, 85406, 198, 785, 4647, 364, 16970, 6, 702, 264, 73350, 364, 2662, 28488, 23569, 785, 4647, 364, 3522, 6, 702, 264, 73350, 364, 4896, 33066, 23569, 785, 4647, 364, 5002, 6, 702, 264, 73350, 364, 693, 586, 5838, 23569, 785, 4647, 364, 85215, 6, 702, 264, 73350, 364, 32, 1869, 23569, 785, 4647, 364, 17082, 6, 702, 264, 73350, 364, 54, 18504, 23569, 785, 4647, 364, 12020, 1222, 6, 702, 264, 73350, 364, 8137, 1182, 23569, 785, 4647, 364, 3522, 6, 702, 264, 73350, 364, 81789, 517, 23569, 785, 4647, 364, 1001, 865, 6, 702, 264, 73350, 364, 68457, 23569, 785, 4647, 364, 34291, 88, 6, 702, 264, 73350, 364, 38, 1574, 80049, 23569, 785, 4647, 364, 49, 1384, 6, 702, 264, 73350, 364, 7339, 2583, 23569, 785, 4647, 364, 10159, 6, 702, 264, 73350, 364, 6025, 12402, 23569, 785, 4647, 364, 94984, 6, 702, 264, 73350, 364, 39, 51978, 23569, 785, 4647, 364, 10159, 6, 702, 264, 73350, 364, 30092, 31509, 23569, 785, 4647, 364, 26884, 6, 702, 264, 73350, 364, 30896, 291, 23569, 785, 4647, 364, 41198, 6, 702, 264, 73350, 364, 47, 3748, 77873, 23569, 785, 4647, 364, 50437, 6, 702, 264, 73350, 364, 33648, 9287, 23569, 785, 4647, 364, 13911, 6, 702, 264, 73350, 364, 14008, 23569, 785, 4647, 364, 60970, 6, 702, 264, 73350, 364, 1092, 52899, 23569, 785, 4647, 364, 52, 22945, 6, 702, 264, 73350, 364, 76335, 2338, 591, 23569, 785, 4647, 364, 15474, 494, 6, 702, 264, 73350, 364, 21666, 2377, 23569, 785, 4647, 364, 32887, 6, 702, 264, 73350, 364, 24703, 23569, 785, 4647, 364, 2324, 6, 702, 264, 73350, 364, 87445, 23569, 785, 4647, 364, 34291, 88, 6, 702, 264, 73350, 364, 21988, 29123, 9193, 23569, 785, 4647, 364, 28320, 6, 702, 264, 73350, 364, 1912, 94204, 23569, 785, 4647, 364, 10344, 6, 702, 264, 73350, 364, 37, 1641, 23569, 785, 4647, 364, 1001, 865, 6, 702, 264, 73350, 364, 64469, 23569, 785, 4647, 364, 17507, 6, 702, 264, 73350, 364, 39838, 23569, 785, 4647, 364, 59665, 6, 702, 264, 73350, 364, 7839, 14378, 23569, 785, 4647, 364, 93088, 6, 702, 264, 73350, 364, 7442, 1659, 23569, 785, 4647, 364, 85215, 6, 702, 264, 73350, 364, 1912, 25172, 657, 23569, 785, 4647, 364, 36485, 6, 702, 264, 73350, 364, 44, 33917, 23569, 785, 4647, 364, 32174, 6, 702, 264, 73350, 364, 6828, 3187, 23569, 785, 4647, 364, 2620, 32066, 6, 702, 264, 73350, 364, 17507, 23569, 785, 4647, 364, 24056, 6, 702, 264, 73350, 364, 51, 3191, 291, 23569, 785, 4647, 364, 24056, 6, 702, 264, 73350, 364, 47, 1268, 6125, 23569, 785, 4647, 364, 49, 1384, 6, 702, 264, 73350, 364, 6464, 466, 88, 23569, 785, 4647, 364, 34, 81971, 398, 6, 702, 264, 73350, 364, 16001, 5276, 23569, 785, 4647, 364, 4049, 6, 702, 264, 73350, 364, 40468, 23569, 785, 4647, 364, 59665, 6, 702, 264, 73350, 364, 1806, 56521, 23569, 785, 4647, 364, 6828, 523, 6, 702, 264, 73350, 364, 623, 283, 1782, 471, 291, 23569, 785, 4647, 364, 34, 81971, 398, 6, 702, 264, 73350, 364, 47699, 23569, 785, 4647, 364, 93088, 6, 702, 264, 73350, 364, 37186, 3834, 23569, 785, 4647, 364, 41365, 6, 702, 264, 73350, 364, 25913, 23569, 785, 4647, 364, 54, 295, 6, 702, 264, 73350, 364, 4416, 7741, 23569, 785, 4647, 364, 59665, 6, 702, 264, 73350, 364, 45384, 48909, 23569, 785, 4647, 364, 7188, 6, 702, 264, 73350, 364, 36125, 679, 23569, 785, 4647, 364, 34, 81971, 398, 6, 702, 264, 73350, 364, 6406, 482, 1717, 23569, 785, 4647, 364, 51, 491, 6, 702, 264, 73350, 364, 98335, 23569, 785, 4647, 364, 47586, 6, 702, 264, 73350, 364, 61598, 21366, 23569, 785, 4647, 364, 21751, 3866, 6, 702, 264, 73350, 364, 81027, 287, 23569, 785, 4647, 364, 18284, 6, 702, 264, 73350, 364, 48983, 292, 23569, 785, 4647, 364, 27177, 6, 702, 264, 73350, 364, 7125, 28480, 23569, 785, 4647, 364, 32887, 6, 702, 264, 73350, 364, 78284, 23569, 785, 4647, 364, 36730, 6, 702, 264, 73350, 364, 46588, 371, 1717, 23569, 785, 4647, 364, 12472, 6, 702, 264, 73350, 364, 38103, 23569, 785, 4647, 364, 16970, 6, 702, 264, 73350, 364, 71585, 586, 23569, 785, 4647, 364, 1092, 500, 3819, 6, 702, 264, 73350, 364, 6756, 337, 23569, 785, 4647, 364, 95027, 6, 702, 264, 73350, 364, 49, 1064, 23569, 785, 4647, 364, 30531, 6, 702, 264, 73350, 364, 51, 1659, 23569, 785, 4647, 364, 45, 6044, 6, 702, 264, 73350, 364, 50360, 849, 533, 23569, 785, 4647, 364, 25830, 6, 702, 264, 73350, 364, 35, 355, 7891, 23569, 785, 4647, 364, 11976, 6, 702, 264, 73350, 364, 35186, 13847, 23569, 785, 4647, 364, 34, 7673, 6, 702, 264, 73350, 364, 19957, 380, 74225, 23569, 785, 4647, 364, 26884, 6, 702, 264, 73350, 364, 49010, 23569, 785, 4647, 364, 13552, 14295, 679, 6, 702, 264, 73350, 364, 15878, 36145, 23569, 785, 4647, 364, 32637, 6, 702, 264, 73350, 364, 22600, 726, 23569, 785, 4647, 364, 49649, 6, 702, 264, 73350, 364, 14986, 1717, 23569, 785, 4647, 364, 21751, 3866, 6, 702, 264, 73350, 364, 38, 2672, 20058, 23569, 785, 4647, 364, 78413, 6, 702, 264, 73350, 364, 693, 25976, 23569, 785, 4647, 364, 58289, 6, 702, 264, 73350, 364, 2304, 32137, 398, 23569, 785, 4647, 364, 27529, 6, 702, 264, 73350, 364, 1806, 704, 2181, 23569, 785, 4647, 364, 13552, 14295, 679, 6, 702, 264, 73350, 364, 46, 2024, 343, 5269, 23569, 785, 4647, 364, 12472, 6, 702, 264, 73350, 364, 10344, 258, 6704, 23569, 785, 4647, 364, 50437, 6, 702, 264, 73350, 364, 50437, 13464, 23569, 785, 4647, 364, 51962, 6, 702, 264, 73350, 364, 1336, 87, 3426, 23569, 785, 4647, 364, 52, 22945, 6, 702, 264, 73350, 364, 49642, 974, 23569, 785, 4647, 364, 29185, 6, 702, 264, 73350, 364, 32, 1831, 23569, 785, 4647, 364, 17082, 6, 702, 264, 73350, 364, 21209, 12280, 23569, 785, 4647, 364, 5715, 6, 702, 264, 73350, 364, 34193, 480, 23569, 785, 4647, 364, 51962, 6, 702, 264, 73350, 364, 641, 36743, 23569, 785, 4647, 364, 76418, 6, 702, 264, 73350, 364, 37, 41502, 88, 23569, 785, 4647, 364, 29185, 6, 702, 264, 73350, 364, 4049, 23569, 785, 4647, 364, 30531, 6, 702, 264, 73350, 364, 49506, 85, 2611, 23569, 785, 4647, 364, 10159, 6, 702, 264, 73350, 364, 74676, 23569, 785, 4647, 364, 40572, 6, 702, 264, 73350, 364, 33, 8347, 287, 23569, 785, 4647, 364, 10673, 11896, 6, 702, 264, 73350, 364, 49, 1129, 307, 23569, 785, 4647, 364, 24187, 6, 702, 264, 73350, 364, 10344, 24657, 23569, 785, 4647, 364, 25307, 6, 702, 264, 73350, 364, 34609, 57410, 23569, 785, 4647, 364, 34, 7673, 6, 702, 264, 73350, 364, 1806, 81, 42335, 23569, 785, 4647, 364, 3522, 6, 702, 264, 73350, 364, 35882, 23569, 785, 4647, 364, 5002, 6, 702, 264, 73350, 364, 16485, 23569, 785, 4647, 364, 32887, 6, 702, 264, 73350, 364, 42642, 23569, 785, 4647, 364, 40572, 6, 702, 264, 73350, 364, 3564, 23569, 785, 4647, 364, 40103, 408, 6, 702, 264, 73350, 364, 16284, 23569, 785, 4647, 364, 52, 22945, 6, 702, 264, 73350, 364, 21692, 782, 23569, 785, 4647, 364, 49649, 6, 702, 264, 73350, 364, 22560, 604, 23569, 785, 4647, 364, 54, 295, 6, 702, 264, 73350, 364, 35, 1121, 23569, 785, 4647, 364, 51, 541, 6, 702, 264, 73350, 364, 45948, 27561, 23569, 785, 4647, 364, 4454, 6, 702, 264, 73350, 364, 7839, 14378, 23569, 785, 4647, 364, 32847, 6, 702, 264, 73350, 364, 26843, 261, 1262, 23569, 785, 4647, 364, 32637, 6, 702, 264, 73350, 364, 9676, 23569, 785, 4647, 364, 36730, 6, 702, 264, 73350, 364, 13911, 23569, 785, 4647, 364, 16646, 6, 702, 264, 73350, 364, 14742, 23569, 785, 4647, 364, 95027, 6, 702, 264, 73350, 364, 12346, 3117, 23569, 785, 4647, 364, 10344, 6, 702, 264, 73350, 364, 35, 617, 23569, 785, 4647, 364, 51, 541, 6, 702, 264, 73350, 364, 51, 89614, 23569, 785, 4647, 364, 37, 4101, 34434, 6, 702, 264, 73350, 364, 37889, 2408, 23569, 785, 4647, 364, 32847, 6, 702, 264, 73350, 364, 47, 4673, 23569, 785, 4647, 364, 4454, 6, 702, 264, 73350, 364, 19871, 23569, 785, 4647, 364, 26907, 6, 702, 264, 73350, 364, 49, 20926, 23569, 785, 4647, 364, 43, 2950, 6, 702, 264, 73350, 364, 2753, 28013, 23569, 785, 4647, 364, 13911, 6, 702, 264, 73350, 364, 2016, 6441, 23569, 785, 4647, 364, 41198, 6, 702, 264, 73350, 364, 49471, 23569, 785, 4647, 364, 20170, 6, 702, 264, 73350, 364, 4923, 6657, 23569, 785, 4647, 364, 13911, 320, 4765, 21636, 702, 264, 73350, 364, 87208, 23569, 785, 4647, 364, 16646, 6, 702, 264, 73350, 364, 55559, 51186, 23569, 785, 4647, 364, 94984, 6, 702, 264, 73350, 364, 84643, 1262, 23569, 785, 4647, 364, 41365, 6, 702, 264, 73350, 364, 2662, 27304, 23569, 785, 4647, 364, 36730, 6, 702, 264, 73350, 364, 47, 16459, 23569, 785, 4647, 364, 7188, 6, 702, 264, 73350, 364, 1109, 719, 23569, 785, 4647, 364, 26907, 6, 702, 264, 73350, 364, 45941, 23569, 785, 4647, 364, 17507, 6, 702, 264, 73350, 364, 1649, 1419, 23569, 785, 4647, 364, 24187, 6, 702, 264, 73350, 364, 12472, 23569, 785, 4647, 364, 61457, 6, 702, 264, 73350, 364, 1918, 23646, 23569, 785, 4647, 364, 54, 295, 6, 702, 264, 73350, 364, 28253, 24867, 23569, 785, 4647, 364, 47586, 6, 702, 264, 73350, 364, 18573, 1262, 23569, 785, 4647, 364, 20170, 6, 702, 264, 73350, 364, 95275, 23569, 785, 4647, 364, 25830, 6, 702, 264, 73350, 364, 51, 1952, 65, 26522, 23569, 785, 4647, 364, 13911, 320, 4765, 21636, 702, 264, 73350, 364, 3882, 5742, 88, 23569, 785, 4647, 364, 10673, 11896, 6, 702, 264, 73350, 364, 47, 2185, 306, 23569, 785, 4647, 364, 30531, 6, 702, 264, 73350, 364, 44, 695, 23569, 785, 4647, 364, 41198, 6, 702, 264, 73350, 364, 5338, 23569, 785, 4647, 364, 36485, 6, 702, 264, 73350, 364, 50, 1751, 307, 23569, 785, 4647, 364, 28320, 6, 702, 264, 73350, 364, 25749, 1717, 23569, 785, 4647, 364, 33, 3248, 6, 702, 264, 73350, 364, 32, 917, 306, 23569, 785, 4647, 364, 4049, 6, 702, 264, 73350, 364, 49772, 19430, 23569, 785, 4647, 364, 2620, 32066, 6, 702, 264, 73350, 364, 59164, 23569, 785, 4647, 364, 43, 2950, 6, 702, 264, 73350, 364, 4923, 3249, 23569, 785, 4647, 364, 35, 617, 6, 702, 264, 73350, 364, 10344, 23569, 785, 4647, 364, 34291, 88, 6, 702, 264, 73350, 364, 40603, 23569, 785, 4647, 364, 45, 6044, 6, 702, 264, 73350, 364, 1092, 14318, 23569, 785, 4647, 364, 27177, 6, 702, 264, 73350, 364, 46874, 23569, 785, 4647, 364, 641, 12601, 6, 702, 264, 73350, 364, 66111, 66, 3073, 23569, 785, 4647, 364, 58289, 6, 702, 264, 73350, 364, 78627, 23569, 785, 4647, 364, 40572, 6, 702, 264, 73350, 364, 62604, 39104, 23569, 785, 4647, 364, 33, 3248, 6, 702, 264, 73350, 364, 22571, 590, 292, 23569, 785, 4647, 364, 18284, 6, 702, 264, 73350, 364, 36, 76869, 398, 23569, 785, 4647, 364, 58289, 6, 702, 264, 73350, 364, 7442, 2596, 812, 23569, 785, 4647, 364, 40103, 408, 6, 702, 264, 73350, 364, 10850, 705, 23569, 785, 4647, 364, 4454, 6, 702, 264, 73350, 364, 45094, 23569, 785, 4647, 364, 11065, 408, 6, 702, 264, 73350, 364, 3136, 2929, 23569, 785, 4647, 364, 25307, 6, 702, 264, 73350, 364, 38103, 23569, 785, 4647, 364, 11976, 6, 702, 264, 73350, 364, 4416, 3249, 23569, 785, 4647, 364, 2324, 6, 702, 264, 73350, 364, 34, 4659, 65, 23569, 785, 4647, 364, 34, 7673, 6, 702, 264, 73350, 364, 6740, 51451, 23569, 785, 4647, 364, 27177, 6, 702, 264, 73350, 364, 11395, 12462, 23569, 785, 4647, 364, 13911, 320, 4765, 21636, 702, 264, 73350, 364, 33, 765, 4246, 23569, 785, 4647, 364, 29185, 6, 702, 264, 73350, 364, 641, 35921, 349, 23569, 785, 4647, 364, 36485, 6, 702, 264, 73350, 364, 4416, 2181, 23569, 785, 4647, 364, 61457, 6, 702, 264, 73350, 364, 1806, 79, 19931, 928, 23569, 785, 4647, 364, 94984, 6, 702, 264, 73350, 364, 24187, 23569, 785, 4647, 364, 11065, 408, 6, 702, 264, 73350, 364, 35, 2142, 629, 23569, 785, 4647, 364, 1092, 500, 3819, 6, 702, 264, 73350, 364, 47, 18704, 23569, 785, 4647, 364, 5002, 6, 702, 264, 73350, 364, 1806, 75940, 23569, 785, 4647, 364, 35, 617, 6, 702, 264, 73350, 364, 4923, 3850, 23569, 785, 4647, 364, 25307, 6, 702, 264, 73350, 364, 57024, 23569, 785, 4647, 364, 12020, 1222, 6, 702, 264, 73350, 364, 48124, 23569, 785, 4647, 364, 32174, 6, 702, 264, 73350, 364, 39, 14980, 23569, 785, 4647, 364, 641, 12601, 6, 702, 264, 73350, 364, 19773, 23569, 785, 4647, 364, 7188, 6, 702, 264, 73350, 364, 71585, 7830, 23569, 785, 4647, 364, 12020, 1222, 6, 702, 264, 73350, 364, 49772, 1182, 23569, 785, 4647, 364, 76418, 6, 702, 264, 73350, 364, 40, 11130, 23569, 785, 4647, 364, 26907, 6, 702, 264, 73350, 364, 623, 3092, 23569, 785, 4647, 364, 15474, 494, 6, 702, 264, 73350, 364, 22171, 577, 23569, 785, 4647, 364, 50437, 6, 702, 264, 73350, 364, 2753, 302, 399, 68, 23569, 785, 4647, 364, 51, 491, 6, 702, 264, 73350, 364, 37, 2853, 23569, 785, 4647, 364, 16646, 6, 702, 264, 73350, 364, 15220, 306, 23569, 785, 4647, 364, 36125, 679, 6, 702, 264, 73350, 364, 10048, 1826, 2757, 23569, 785, 4647, 364, 18284, 6, 702, 264, 73350, 364, 88467, 23569, 785, 4647, 364, 85215, 6, 702, 264, 73350, 364, 4896, 47638, 657, 23569, 785, 4647, 364, 45, 6044, 6, 702, 264, 73350, 364, 19861, 2397, 23569, 785, 4647, 364, 27529, 6, 702, 264, 73350, 364, 80350, 457, 23569, 785, 4647, 364, 5715, 6, 702, 264, 73350, 364, 3889, 1484, 1238, 23569, 785, 4647, 364, 36125, 679, 6, 702, 264, 73350, 364, 16970, 23569, 785, 4647, 364, 95027, 6, 702, 264, 73350, 364, 8304, 705, 23569, 785, 4647, 364, 24056, 6, 702, 264, 73350, 364, 2715, 3556, 23569, 785, 4647, 364, 60970, 6, 702, 264, 73350, 364, 2662, 58195, 23569, 785, 4647, 364, 24187, 6, 702, 264, 73350, 364, 31019, 23569, 785, 4647, 364, 26884, 6, 702, 264, 73350, 364, 32174, 6758, 23569, 785, 4647, 364, 4049, 6, 702, 264, 73350, 364, 1806, 1866, 23569, 785, 4647, 364, 6828, 523, 6, 702, 264, 73350, 364, 42800, 23569, 785, 4647, 364, 78413, 6, 702, 264, 73350, 364, 3945, 810, 23569, 785, 4647, 364, 37, 4101, 34434, 6, 702, 264, 73350, 364, 3889, 69, 38155, 29636, 2, 13383, 198, 2461, 3110, 11, 421, 279, 1196, 3239, 374, 330, 40, 2776, 1602, 8205, 497, 279, 4647, 498, 1265, 5244, 389, 374, 330, 10622, 3263, 10548, 311, 279, 85406, 3403, 11, 279, 4647, 330, 10622, 1, 702, 279, 85406, 330, 4923, 6657, 1, 323, 330, 95275, 497, 304, 429, 1973, 13, 1446, 1184, 311, 633, 73350, 2750, 369, 330, 4923, 6657, 1, 323, 330, 95275, 497, 1077, 594, 1977, 1846, 525, 220, 20, 323, 220, 21, 15576, 11, 323, 8649, 279, 1102, 315, 1846, 73350, 2750, 600, 1734, 13, 508, 20, 11, 220, 21, 60, 448, 220, 20, 320, 2024, 6657, 8, 1156, 1221, 220, 21, 320, 82597, 8, 2474, 429, 374, 279, 1973, 807, 4994, 304, 279, 1140, 315, 85406, 3403, 382, 2, 13852, 271, 2610, 1231, 1618, 825, 476, 803, 5746, 311, 7789, 448, 279, 1196, 3239, 382, 2610, 525, 3897, 448, 729, 32628, 2878, 366, 15918, 1472, 15918, 29, 11874, 9492, 510, 27, 15918, 397, 4913, 1313, 788, 330, 1688, 497, 330, 1688, 788, 5212, 606, 788, 330, 455, 51393, 7831, 3142, 497, 330, 4684, 788, 330, 1949, 279, 73350, 897, 369, 264, 73350, 7110, 77, 1986, 5666, 4675, 264, 897, 429, 95164, 311, 279, 2701, 4718, 12539, 25, 314, 2105, 13193, 11693, 314, 2105, 20339, 7831, 3142, 11693, 314, 2105, 1313, 11693, 7245, 11662, 16215, 7245, 2102, 11693, 7245, 37134, 7831, 5162, 16215, 7245, 4684, 11693, 7245, 785, 897, 369, 419, 73350, 86865, 38154, 7245, 1313, 11693, 7245, 1700, 16215, 7245, 6279, 11693, 508, 2105, 20339, 7831, 3142, 75104, 11035, 77, 497, 330, 13786, 788, 5212, 13193, 788, 5212, 20339, 7831, 788, 5212, 1313, 788, 330, 917, 497, 330, 2102, 788, 330, 37134, 7831, 497, 330, 4684, 788, 330, 785, 73350, 311, 633, 279, 897, 369, 1189, 38154, 330, 1313, 788, 330, 1700, 497, 330, 6279, 788, 4383, 20339, 7831, 7914, 330, 35499, 7903, 788, 895, 2137, 330, 6627, 788, 830, 11248, 4913, 1313, 788, 330, 1688, 497, 330, 1688, 788, 5212, 606, 788, 330, 23493, 51393, 7831, 9146, 497, 330, 4684, 788, 330, 28959, 279, 73350, 2750, 498, 30403, 369, 279, 4647, 429, 374, 9760, 311, 279, 1196, 3239, 7110, 77, 1986, 5666, 4675, 264, 897, 429, 95164, 311, 279, 2701, 4718, 12539, 25, 314, 2105, 13193, 11693, 314, 2105, 5630, 11693, 314, 2105, 1313, 11693, 7245, 6117, 16215, 7245, 2102, 11693, 7245, 7188, 16215, 7245, 4684, 11693, 7245, 7188, 86865, 38154, 7245, 1313, 11693, 7245, 1700, 16215, 7245, 6279, 11693, 508, 2105, 5630, 75104, 11035, 77, 497, 330, 13786, 788, 5212, 13193, 788, 5212, 20339, 7831, 9146, 788, 5212, 3615, 788, 5212, 1313, 788, 330, 11662, 14345, 330, 1313, 788, 330, 1653, 497, 330, 2102, 788, 330, 37134, 7831, 24979, 497, 330, 4684, 788, 330, 785, 73350, 2750, 12159, 311, 279, 4647, 369, 279, 1196, 3239, 1189, 38154, 330, 1313, 788, 330, 1700, 497, 330, 6279, 788, 4383, 20339, 7831, 9146, 7914, 330, 35499, 7903, 788, 895, 2137, 330, 6627, 788, 830, 11248, 522, 15918, 1339, 2461, 1817, 729, 1618, 11, 470, 264, 2951, 1633, 448, 729, 829, 323, 5977, 2878, 220, 151657, 151658, 11874, 9492, 510, 151657, 198, 4913, 606, 788, 366, 1688, 11494, 8066, 330, 16370, 788, 366, 2116, 56080, 40432, 31296, 151658, 151645, 198, 151644, 872, 198, 4340, 653, 9898, 10515, 311, 5382, 304, 4017, 91299, 30, 151645, 198, 151644, 77091, 198]}, {"role": "assistant", "content": "", "token_ids": [], "generation_logprobs": []}, {"role": "user", "content": "", "token_ids": []}, {"role": "assistant", "content": "", "token_ids": [], "generation_logprobs": []}], "input_message_log": [{"role": "user", "content": "", "token_ids": [151644, 8948, 198, 2, 38297, 198, 2610, 525, 458, 32189, 8315, 13, 1446, 686, 387, 3897, 264, 1196, 3239, 323, 498, 1184, 311, 990, 279, 7375, 3897, 311, 498, 311, 8649, 1140, 315, 73350, 2750, 13, 1446, 686, 387, 3897, 448, 264, 15493, 315, 85406, 369, 1817, 13, 1752, 1817, 4647, 11, 4486, 1490, 421, 432, 594, 9760, 311, 279, 1196, 3239, 323, 633, 279, 2750, 369, 1817, 73350, 438, 8311, 13, 1446, 1969, 633, 323, 8649, 279, 2750, 369, 1449, 73350, 429, 7952, 304, 419, 1140, 13, 5209, 2550, 73350, 2750, 304, 279, 1973, 807, 4994, 304, 279, 2500, 85406, 3685, 382, 2, 16136, 85406, 198, 785, 4647, 364, 16970, 6, 702, 264, 73350, 364, 2662, 28488, 23569, 785, 4647, 364, 3522, 6, 702, 264, 73350, 364, 4896, 33066, 23569, 785, 4647, 364, 5002, 6, 702, 264, 73350, 364, 693, 586, 5838, 23569, 785, 4647, 364, 85215, 6, 702, 264, 73350, 364, 32, 1869, 23569, 785, 4647, 364, 17082, 6, 702, 264, 73350, 364, 54, 18504, 23569, 785, 4647, 364, 12020, 1222, 6, 702, 264, 73350, 364, 8137, 1182, 23569, 785, 4647, 364, 3522, 6, 702, 264, 73350, 364, 81789, 517, 23569, 785, 4647, 364, 1001, 865, 6, 702, 264, 73350, 364, 68457, 23569, 785, 4647, 364, 34291, 88, 6, 702, 264, 73350, 364, 38, 1574, 80049, 23569, 785, 4647, 364, 49, 1384, 6, 702, 264, 73350, 364, 7339, 2583, 23569, 785, 4647, 364, 10159, 6, 702, 264, 73350, 364, 6025, 12402, 23569, 785, 4647, 364, 94984, 6, 702, 264, 73350, 364, 39, 51978, 23569, 785, 4647, 364, 10159, 6, 702, 264, 73350, 364, 30092, 31509, 23569, 785, 4647, 364, 26884, 6, 702, 264, 73350, 364, 30896, 291, 23569, 785, 4647, 364, 41198, 6, 702, 264, 73350, 364, 47, 3748, 77873, 23569, 785, 4647, 364, 50437, 6, 702, 264, 73350, 364, 33648, 9287, 23569, 785, 4647, 364, 13911, 6, 702, 264, 73350, 364, 14008, 23569, 785, 4647, 364, 60970, 6, 702, 264, 73350, 364, 1092, 52899, 23569, 785, 4647, 364, 52, 22945, 6, 702, 264, 73350, 364, 76335, 2338, 591, 23569, 785, 4647, 364, 15474, 494, 6, 702, 264, 73350, 364, 21666, 2377, 23569, 785, 4647, 364, 32887, 6, 702, 264, 73350, 364, 24703, 23569, 785, 4647, 364, 2324, 6, 702, 264, 73350, 364, 87445, 23569, 785, 4647, 364, 34291, 88, 6, 702, 264, 73350, 364, 21988, 29123, 9193, 23569, 785, 4647, 364, 28320, 6, 702, 264, 73350, 364, 1912, 94204, 23569, 785, 4647, 364, 10344, 6, 702, 264, 73350, 364, 37, 1641, 23569, 785, 4647, 364, 1001, 865, 6, 702, 264, 73350, 364, 64469, 23569, 785, 4647, 364, 17507, 6, 702, 264, 73350, 364, 39838, 23569, 785, 4647, 364, 59665, 6, 702, 264, 73350, 364, 7839, 14378, 23569, 785, 4647, 364, 93088, 6, 702, 264, 73350, 364, 7442, 1659, 23569, 785, 4647, 364, 85215, 6, 702, 264, 73350, 364, 1912, 25172, 657, 23569, 785, 4647, 364, 36485, 6, 702, 264, 73350, 364, 44, 33917, 23569, 785, 4647, 364, 32174, 6, 702, 264, 73350, 364, 6828, 3187, 23569, 785, 4647, 364, 2620, 32066, 6, 702, 264, 73350, 364, 17507, 23569, 785, 4647, 364, 24056, 6, 702, 264, 73350, 364, 51, 3191, 291, 23569, 785, 4647, 364, 24056, 6, 702, 264, 73350, 364, 47, 1268, 6125, 23569, 785, 4647, 364, 49, 1384, 6, 702, 264, 73350, 364, 6464, 466, 88, 23569, 785, 4647, 364, 34, 81971, 398, 6, 702, 264, 73350, 364, 16001, 5276, 23569, 785, 4647, 364, 4049, 6, 702, 264, 73350, 364, 40468, 23569, 785, 4647, 364, 59665, 6, 702, 264, 73350, 364, 1806, 56521, 23569, 785, 4647, 364, 6828, 523, 6, 702, 264, 73350, 364, 623, 283, 1782, 471, 291, 23569, 785, 4647, 364, 34, 81971, 398, 6, 702, 264, 73350, 364, 47699, 23569, 785, 4647, 364, 93088, 6, 702, 264, 73350, 364, 37186, 3834, 23569, 785, 4647, 364, 41365, 6, 702, 264, 73350, 364, 25913, 23569, 785, 4647, 364, 54, 295, 6, 702, 264, 73350, 364, 4416, 7741, 23569, 785, 4647, 364, 59665, 6, 702, 264, 73350, 364, 45384, 48909, 23569, 785, 4647, 364, 7188, 6, 702, 264, 73350, 364, 36125, 679, 23569, 785, 4647, 364, 34, 81971, 398, 6, 702, 264, 73350, 364, 6406, 482, 1717, 23569, 785, 4647, 364, 51, 491, 6, 702, 264, 73350, 364, 98335, 23569, 785, 4647, 364, 47586, 6, 702, 264, 73350, 364, 61598, 21366, 23569, 785, 4647, 364, 21751, 3866, 6, 702, 264, 73350, 364, 81027, 287, 23569, 785, 4647, 364, 18284, 6, 702, 264, 73350, 364, 48983, 292, 23569, 785, 4647, 364, 27177, 6, 702, 264, 73350, 364, 7125, 28480, 23569, 785, 4647, 364, 32887, 6, 702, 264, 73350, 364, 78284, 23569, 785, 4647, 364, 36730, 6, 702, 264, 73350, 364, 46588, 371, 1717, 23569, 785, 4647, 364, 12472, 6, 702, 264, 73350, 364, 38103, 23569, 785, 4647, 364, 16970, 6, 702, 264, 73350, 364, 71585, 586, 23569, 785, 4647, 364, 1092, 500, 3819, 6, 702, 264, 73350, 364, 6756, 337, 23569, 785, 4647, 364, 95027, 6, 702, 264, 73350, 364, 49, 1064, 23569, 785, 4647, 364, 30531, 6, 702, 264, 73350, 364, 51, 1659, 23569, 785, 4647, 364, 45, 6044, 6, 702, 264, 73350, 364, 50360, 849, 533, 23569, 785, 4647, 364, 25830, 6, 702, 264, 73350, 364, 35, 355, 7891, 23569, 785, 4647, 364, 11976, 6, 702, 264, 73350, 364, 35186, 13847, 23569, 785, 4647, 364, 34, 7673, 6, 702, 264, 73350, 364, 19957, 380, 74225, 23569, 785, 4647, 364, 26884, 6, 702, 264, 73350, 364, 49010, 23569, 785, 4647, 364, 13552, 14295, 679, 6, 702, 264, 73350, 364, 15878, 36145, 23569, 785, 4647, 364, 32637, 6, 702, 264, 73350, 364, 22600, 726, 23569, 785, 4647, 364, 49649, 6, 702, 264, 73350, 364, 14986, 1717, 23569, 785, 4647, 364, 21751, 3866, 6, 702, 264, 73350, 364, 38, 2672, 20058, 23569, 785, 4647, 364, 78413, 6, 702, 264, 73350, 364, 693, 25976, 23569, 785, 4647, 364, 58289, 6, 702, 264, 73350, 364, 2304, 32137, 398, 23569, 785, 4647, 364, 27529, 6, 702, 264, 73350, 364, 1806, 704, 2181, 23569, 785, 4647, 364, 13552, 14295, 679, 6, 702, 264, 73350, 364, 46, 2024, 343, 5269, 23569, 785, 4647, 364, 12472, 6, 702, 264, 73350, 364, 10344, 258, 6704, 23569, 785, 4647, 364, 50437, 6, 702, 264, 73350, 364, 50437, 13464, 23569, 785, 4647, 364, 51962, 6, 702, 264, 73350, 364, 1336, 87, 3426, 23569, 785, 4647, 364, 52, 22945, 6, 702, 264, 73350, 364, 49642, 974, 23569, 785, 4647, 364, 29185, 6, 702, 264, 73350, 364, 32, 1831, 23569, 785, 4647, 364, 17082, 6, 702, 264, 73350, 364, 21209, 12280, 23569, 785, 4647, 364, 5715, 6, 702, 264, 73350, 364, 34193, 480, 23569, 785, 4647, 364, 51962, 6, 702, 264, 73350, 364, 641, 36743, 23569, 785, 4647, 364, 76418, 6, 702, 264, 73350, 364, 37, 41502, 88, 23569, 785, 4647, 364, 29185, 6, 702, 264, 73350, 364, 4049, 23569, 785, 4647, 364, 30531, 6, 702, 264, 73350, 364, 49506, 85, 2611, 23569, 785, 4647, 364, 10159, 6, 702, 264, 73350, 364, 74676, 23569, 785, 4647, 364, 40572, 6, 702, 264, 73350, 364, 33, 8347, 287, 23569, 785, 4647, 364, 10673, 11896, 6, 702, 264, 73350, 364, 49, 1129, 307, 23569, 785, 4647, 364, 24187, 6, 702, 264, 73350, 364, 10344, 24657, 23569, 785, 4647, 364, 25307, 6, 702, 264, 73350, 364, 34609, 57410, 23569, 785, 4647, 364, 34, 7673, 6, 702, 264, 73350, 364, 1806, 81, 42335, 23569, 785, 4647, 364, 3522, 6, 702, 264, 73350, 364, 35882, 23569, 785, 4647, 364, 5002, 6, 702, 264, 73350, 364, 16485, 23569, 785, 4647, 364, 32887, 6, 702, 264, 73350, 364, 42642, 23569, 785, 4647, 364, 40572, 6, 702, 264, 73350, 364, 3564, 23569, 785, 4647, 364, 40103, 408, 6, 702, 264, 73350, 364, 16284, 23569, 785, 4647, 364, 52, 22945, 6, 702, 264, 73350, 364, 21692, 782, 23569, 785, 4647, 364, 49649, 6, 702, 264, 73350, 364, 22560, 604, 23569, 785, 4647, 364, 54, 295, 6, 702, 264, 73350, 364, 35, 1121, 23569, 785, 4647, 364, 51, 541, 6, 702, 264, 73350, 364, 45948, 27561, 23569, 785, 4647, 364, 4454, 6, 702, 264, 73350, 364, 7839, 14378, 23569, 785, 4647, 364, 32847, 6, 702, 264, 73350, 364, 26843, 261, 1262, 23569, 785, 4647, 364, 32637, 6, 702, 264, 73350, 364, 9676, 23569, 785, 4647, 364, 36730, 6, 702, 264, 73350, 364, 13911, 23569, 785, 4647, 364, 16646, 6, 702, 264, 73350, 364, 14742, 23569, 785, 4647, 364, 95027, 6, 702, 264, 73350, 364, 12346, 3117, 23569, 785, 4647, 364, 10344, 6, 702, 264, 73350, 364, 35, 617, 23569, 785, 4647, 364, 51, 541, 6, 702, 264, 73350, 364, 51, 89614, 23569, 785, 4647, 364, 37, 4101, 34434, 6, 702, 264, 73350, 364, 37889, 2408, 23569, 785, 4647, 364, 32847, 6, 702, 264, 73350, 364, 47, 4673, 23569, 785, 4647, 364, 4454, 6, 702, 264, 73350, 364, 19871, 23569, 785, 4647, 364, 26907, 6, 702, 264, 73350, 364, 49, 20926, 23569, 785, 4647, 364, 43, 2950, 6, 702, 264, 73350, 364, 2753, 28013, 23569, 785, 4647, 364, 13911, 6, 702, 264, 73350, 364, 2016, 6441, 23569, 785, 4647, 364, 41198, 6, 702, 264, 73350, 364, 49471, 23569, 785, 4647, 364, 20170, 6, 702, 264, 73350, 364, 4923, 6657, 23569, 785, 4647, 364, 13911, 320, 4765, 21636, 702, 264, 73350, 364, 87208, 23569, 785, 4647, 364, 16646, 6, 702, 264, 73350, 364, 55559, 51186, 23569, 785, 4647, 364, 94984, 6, 702, 264, 73350, 364, 84643, 1262, 23569, 785, 4647, 364, 41365, 6, 702, 264, 73350, 364, 2662, 27304, 23569, 785, 4647, 364, 36730, 6, 702, 264, 73350, 364, 47, 16459, 23569, 785, 4647, 364, 7188, 6, 702, 264, 73350, 364, 1109, 719, 23569, 785, 4647, 364, 26907, 6, 702, 264, 73350, 364, 45941, 23569, 785, 4647, 364, 17507, 6, 702, 264, 73350, 364, 1649, 1419, 23569, 785, 4647, 364, 24187, 6, 702, 264, 73350, 364, 12472, 23569, 785, 4647, 364, 61457, 6, 702, 264, 73350, 364, 1918, 23646, 23569, 785, 4647, 364, 54, 295, 6, 702, 264, 73350, 364, 28253, 24867, 23569, 785, 4647, 364, 47586, 6, 702, 264, 73350, 364, 18573, 1262, 23569, 785, 4647, 364, 20170, 6, 702, 264, 73350, 364, 95275, 23569, 785, 4647, 364, 25830, 6, 702, 264, 73350, 364, 51, 1952, 65, 26522, 23569, 785, 4647, 364, 13911, 320, 4765, 21636, 702, 264, 73350, 364, 3882, 5742, 88, 23569, 785, 4647, 364, 10673, 11896, 6, 702, 264, 73350, 364, 47, 2185, 306, 23569, 785, 4647, 364, 30531, 6, 702, 264, 73350, 364, 44, 695, 23569, 785, 4647, 364, 41198, 6, 702, 264, 73350, 364, 5338, 23569, 785, 4647, 364, 36485, 6, 702, 264, 73350, 364, 50, 1751, 307, 23569, 785, 4647, 364, 28320, 6, 702, 264, 73350, 364, 25749, 1717, 23569, 785, 4647, 364, 33, 3248, 6, 702, 264, 73350, 364, 32, 917, 306, 23569, 785, 4647, 364, 4049, 6, 702, 264, 73350, 364, 49772, 19430, 23569, 785, 4647, 364, 2620, 32066, 6, 702, 264, 73350, 364, 59164, 23569, 785, 4647, 364, 43, 2950, 6, 702, 264, 73350, 364, 4923, 3249, 23569, 785, 4647, 364, 35, 617, 6, 702, 264, 73350, 364, 10344, 23569, 785, 4647, 364, 34291, 88, 6, 702, 264, 73350, 364, 40603, 23569, 785, 4647, 364, 45, 6044, 6, 702, 264, 73350, 364, 1092, 14318, 23569, 785, 4647, 364, 27177, 6, 702, 264, 73350, 364, 46874, 23569, 785, 4647, 364, 641, 12601, 6, 702, 264, 73350, 364, 66111, 66, 3073, 23569, 785, 4647, 364, 58289, 6, 702, 264, 73350, 364, 78627, 23569, 785, 4647, 364, 40572, 6, 702, 264, 73350, 364, 62604, 39104, 23569, 785, 4647, 364, 33, 3248, 6, 702, 264, 73350, 364, 22571, 590, 292, 23569, 785, 4647, 364, 18284, 6, 702, 264, 73350, 364, 36, 76869, 398, 23569, 785, 4647, 364, 58289, 6, 702, 264, 73350, 364, 7442, 2596, 812, 23569, 785, 4647, 364, 40103, 408, 6, 702, 264, 73350, 364, 10850, 705, 23569, 785, 4647, 364, 4454, 6, 702, 264, 73350, 364, 45094, 23569, 785, 4647, 364, 11065, 408, 6, 702, 264, 73350, 364, 3136, 2929, 23569, 785, 4647, 364, 25307, 6, 702, 264, 73350, 364, 38103, 23569, 785, 4647, 364, 11976, 6, 702, 264, 73350, 364, 4416, 3249, 23569, 785, 4647, 364, 2324, 6, 702, 264, 73350, 364, 34, 4659, 65, 23569, 785, 4647, 364, 34, 7673, 6, 702, 264, 73350, 364, 6740, 51451, 23569, 785, 4647, 364, 27177, 6, 702, 264, 73350, 364, 11395, 12462, 23569, 785, 4647, 364, 13911, 320, 4765, 21636, 702, 264, 73350, 364, 33, 765, 4246, 23569, 785, 4647, 364, 29185, 6, 702, 264, 73350, 364, 641, 35921, 349, 23569, 785, 4647, 364, 36485, 6, 702, 264, 73350, 364, 4416, 2181, 23569, 785, 4647, 364, 61457, 6, 702, 264, 73350, 364, 1806, 79, 19931, 928, 23569, 785, 4647, 364, 94984, 6, 702, 264, 73350, 364, 24187, 23569, 785, 4647, 364, 11065, 408, 6, 702, 264, 73350, 364, 35, 2142, 629, 23569, 785, 4647, 364, 1092, 500, 3819, 6, 702, 264, 73350, 364, 47, 18704, 23569, 785, 4647, 364, 5002, 6, 702, 264, 73350, 364, 1806, 75940, 23569, 785, 4647, 364, 35, 617, 6, 702, 264, 73350, 364, 4923, 3850, 23569, 785, 4647, 364, 25307, 6, 702, 264, 73350, 364, 57024, 23569, 785, 4647, 364, 12020, 1222, 6, 702, 264, 73350, 364, 48124, 23569, 785, 4647, 364, 32174, 6, 702, 264, 73350, 364, 39, 14980, 23569, 785, 4647, 364, 641, 12601, 6, 702, 264, 73350, 364, 19773, 23569, 785, 4647, 364, 7188, 6, 702, 264, 73350, 364, 71585, 7830, 23569, 785, 4647, 364, 12020, 1222, 6, 702, 264, 73350, 364, 49772, 1182, 23569, 785, 4647, 364, 76418, 6, 702, 264, 73350, 364, 40, 11130, 23569, 785, 4647, 364, 26907, 6, 702, 264, 73350, 364, 623, 3092, 23569, 785, 4647, 364, 15474, 494, 6, 702, 264, 73350, 364, 22171, 577, 23569, 785, 4647, 364, 50437, 6, 702, 264, 73350, 364, 2753, 302, 399, 68, 23569, 785, 4647, 364, 51, 491, 6, 702, 264, 73350, 364, 37, 2853, 23569, 785, 4647, 364, 16646, 6, 702, 264, 73350, 364, 15220, 306, 23569, 785, 4647, 364, 36125, 679, 6, 702, 264, 73350, 364, 10048, 1826, 2757, 23569, 785, 4647, 364, 18284, 6, 702, 264, 73350, 364, 88467, 23569, 785, 4647, 364, 85215, 6, 702, 264, 73350, 364, 4896, 47638, 657, 23569, 785, 4647, 364, 45, 6044, 6, 702, 264, 73350, 364, 19861, 2397, 23569, 785, 4647, 364, 27529, 6, 702, 264, 73350, 364, 80350, 457, 23569, 785, 4647, 364, 5715, 6, 702, 264, 73350, 364, 3889, 1484, 1238, 23569, 785, 4647, 364, 36125, 679, 6, 702, 264, 73350, 364, 16970, 23569, 785, 4647, 364, 95027, 6, 702, 264, 73350, 364, 8304, 705, 23569, 785, 4647, 364, 24056, 6, 702, 264, 73350, 364, 2715, 3556, 23569, 785, 4647, 364, 60970, 6, 702, 264, 73350, 364, 2662, 58195, 23569, 785, 4647, 364, 24187, 6, 702, 264, 73350, 364, 31019, 23569, 785, 4647, 364, 26884, 6, 702, 264, 73350, 364, 32174, 6758, 23569, 785, 4647, 364, 4049, 6, 702, 264, 73350, 364, 1806, 1866, 23569, 785, 4647, 364, 6828, 523, 6, 702, 264, 73350, 364, 42800, 23569, 785, 4647, 364, 78413, 6, 702, 264, 73350, 364, 3945, 810, 23569, 785, 4647, 364, 37, 4101, 34434, 6, 702, 264, 73350, 364, 3889, 69, 38155, 29636, 2, 13383, 198, 2461, 3110, 11, 421, 279, 1196, 3239, 374, 330, 40, 2776, 1602, 8205, 497, 279, 4647, 498, 1265, 5244, 389, 374, 330, 10622, 3263, 10548, 311, 279, 85406, 3403, 11, 279, 4647, 330, 10622, 1, 702, 279, 85406, 330, 4923, 6657, 1, 323, 330, 95275, 497, 304, 429, 1973, 13, 1446, 1184, 311, 633, 73350, 2750, 369, 330, 4923, 6657, 1, 323, 330, 95275, 497, 1077, 594, 1977, 1846, 525, 220, 20, 323, 220, 21, 15576, 11, 323, 8649, 279, 1102, 315, 1846, 73350, 2750, 600, 1734, 13, 508, 20, 11, 220, 21, 60, 448, 220, 20, 320, 2024, 6657, 8, 1156, 1221, 220, 21, 320, 82597, 8, 2474, 429, 374, 279, 1973, 807, 4994, 304, 279, 1140, 315, 85406, 3403, 382, 2, 13852, 271, 2610, 1231, 1618, 825, 476, 803, 5746, 311, 7789, 448, 279, 1196, 3239, 382, 2610, 525, 3897, 448, 729, 32628, 2878, 366, 15918, 1472, 15918, 29, 11874, 9492, 510, 27, 15918, 397, 4913, 1313, 788, 330, 1688, 497, 330, 1688, 788, 5212, 606, 788, 330, 455, 51393, 7831, 3142, 497, 330, 4684, 788, 330, 1949, 279, 73350, 897, 369, 264, 73350, 7110, 77, 1986, 5666, 4675, 264, 897, 429, 95164, 311, 279, 2701, 4718, 12539, 25, 314, 2105, 13193, 11693, 314, 2105, 20339, 7831, 3142, 11693, 314, 2105, 1313, 11693, 7245, 11662, 16215, 7245, 2102, 11693, 7245, 37134, 7831, 5162, 16215, 7245, 4684, 11693, 7245, 785, 897, 369, 419, 73350, 86865, 38154, 7245, 1313, 11693, 7245, 1700, 16215, 7245, 6279, 11693, 508, 2105, 20339, 7831, 3142, 75104, 11035, 77, 497, 330, 13786, 788, 5212, 13193, 788, 5212, 20339, 7831, 788, 5212, 1313, 788, 330, 917, 497, 330, 2102, 788, 330, 37134, 7831, 497, 330, 4684, 788, 330, 785, 73350, 311, 633, 279, 897, 369, 1189, 38154, 330, 1313, 788, 330, 1700, 497, 330, 6279, 788, 4383, 20339, 7831, 7914, 330, 35499, 7903, 788, 895, 2137, 330, 6627, 788, 830, 11248, 4913, 1313, 788, 330, 1688, 497, 330, 1688, 788, 5212, 606, 788, 330, 23493, 51393, 7831, 9146, 497, 330, 4684, 788, 330, 28959, 279, 73350, 2750, 498, 30403, 369, 279, 4647, 429, 374, 9760, 311, 279, 1196, 3239, 7110, 77, 1986, 5666, 4675, 264, 897, 429, 95164, 311, 279, 2701, 4718, 12539, 25, 314, 2105, 13193, 11693, 314, 2105, 5630, 11693, 314, 2105, 1313, 11693, 7245, 6117, 16215, 7245, 2102, 11693, 7245, 7188, 16215, 7245, 4684, 11693, 7245, 7188, 86865, 38154, 7245, 1313, 11693, 7245, 1700, 16215, 7245, 6279, 11693, 508, 2105, 5630, 75104, 11035, 77, 497, 330, 13786, 788, 5212, 13193, 788, 5212, 20339, 7831, 9146, 788, 5212, 3615, 788, 5212, 1313, 788, 330, 11662, 14345, 330, 1313, 788, 330, 1653, 497, 330, 2102, 788, 330, 37134, 7831, 24979, 497, 330, 4684, 788, 330, 785, 73350, 2750, 12159, 311, 279, 4647, 369, 279, 1196, 3239, 1189, 38154, 330, 1313, 788, 330, 1700, 497, 330, 6279, 788, 4383, 20339, 7831, 9146, 7914, 330, 35499, 7903, 788, 895, 2137, 330, 6627, 788, 830, 11248, 522, 15918, 1339, 2461, 1817, 729, 1618, 11, 470, 264, 2951, 1633, 448, 729, 829, 323, 5977, 2878, 220, 151657, 151658, 11874, 9492, 510, 151657, 198, 4913, 606, 788, 366, 1688, 11494, 8066, 330, 16370, 788, 366, 2116, 56080, 40432, 31296, 151658, 151645, 198, 151644, 872, 198, 4340, 653, 9898, 10515, 311, 5382, 304, 4017, 91299, 30, 151645, 198, 151644, 77091, 198]}]}]}
\ No newline at end of file
diff --git a/tests/unit/environments/test_code_environment.py b/tests/unit/environments/test_code_environment.py
new file mode 100644
index 0000000000..d32550aba1
--- /dev/null
+++ b/tests/unit/environments/test_code_environment.py
@@ -0,0 +1,221 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from tempfile import TemporaryDirectory
+
+import pytest
+import ray
+from transformers import AutoTokenizer
+
+from nemo_rl.distributed.batched_data_dict import BatchedDataDict
+from nemo_rl.distributed.virtual_cluster import RayVirtualCluster
+from nemo_rl.environments.code_environment import (
+    CodeEnvConfig,
+    CodeEnvironment,
+    CodeEnvMetadata,
+)
+from nemo_rl.experience.rollouts import run_multi_turn_rollout
+from nemo_rl.models.generation import configure_generation_config
+from nemo_rl.models.generation.vllm import VllmConfig, VllmGeneration
+
+MODEL_NAME = "meta-llama/Llama-3.2-1B"
+
+cfg: CodeEnvConfig = {
+    "num_workers": 2,
+    "terminate_on_evaluation": True,
+}
+
+# Define basic vLLM test config
+basic_vllm_test_config: VllmConfig = {
+    "backend": "vllm",
+    "model_name": MODEL_NAME,
+    "tokenizer_name": None,
+    "dtype": "bfloat16",
+    "max_new_tokens": 100,
+    "temperature": 1.0,
+    "top_p": 1.0,
+    "top_k": None,
+    "stop_token_ids": None,
+    "stop_strings": None,
+    "vllm_cfg": {
+        "async_engine": False,
+        "precision": "bfloat16",
+        "tensor_parallel_size": 1,
+        "pipeline_parallel_size": 1,
+        "expert_parallel_size": 1,
+        "max_model_len": 1024,
+        "disable_log_stats": True,
+        "disable_log_requests": True,
+        "gpu_memory_utilization": 0.6,
+        "enforce_eager": "False",
+    },
+    "colocated": {
+        "enabled": True,
+        "resources": {
+            "gpus_per_node": None,
+            "num_nodes": None,
+        },
+    },
+}
+
+
+@pytest.fixture(scope="function")
+def code_env():
+    """Create a code environment for testing."""
+    try:
+        env_actor = CodeEnvironment.remote(cfg)
+        yield env_actor
+    finally:
+        if env_actor:
+            ray.kill(env_actor)
+
+
+@pytest.fixture(scope="function")
+def tokenizer():
+    """Loads the tokenizer for the tests."""
+    print(f"Loading tokenizer: {MODEL_NAME}")
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
+    if tokenizer.pad_token is None:
+        tokenizer.pad_token = tokenizer.eos_token
+    print(
+        f"Tokenizer loaded. Pad token: {tokenizer.pad_token} (ID: {tokenizer.pad_token_id}), EOS token: {tokenizer.eos_token} (ID: {tokenizer.eos_token_id})"
+    )
+    return tokenizer
+
+
+@pytest.fixture(scope="function")
+def cluster():
+    """Create a virtual cluster for testing."""
+    cluster_instance = None
+    cluster_name = f"test-code-cluster-{id(cluster_instance)}"
+    print(f"\nCreating virtual cluster '{cluster_name}'...")
+    try:
+        cluster_instance = RayVirtualCluster(
+            name=cluster_name,
+            bundle_ct_per_node_list=[1],
+            use_gpus=True,
+            num_gpus_per_node=1,
+            max_colocated_worker_groups=2,
+        )
+        yield cluster_instance
+    finally:
+        print(f"\nCleaning up cluster '{cluster_name}'...")
+        if cluster_instance:
+            cluster_instance.shutdown()
+
+
+def test_untrusted_code(code_env):
+    """Test whether the code environment can block untrusted code."""
+    codes = [
+        "with open('allowed_file.txt', 'w') as fout:\n"
+        "    fout.write('some content')\n"
+        "with open('allowed_file.txt') as fin:\n"
+        "    content = fin.read()\n"
+        "content",
+        "with open('/etc/passwd', 'r') as fin:\n    fin.read()",
+        "import math\nround(math.sqrt(8))",
+        "import os",
+    ]
+    results = [
+        "\n\n<result>\n'some content'\n</result>",
+        "\n\n<result>\nPermissionError('Access beyond the temporary working directory is blocked')\n</result>",
+        "\n\n<result>\n3\n</result>",
+        "<result>PermissionError('Importing system and network modules is blocked')</result>",
+    ]
+
+    message_log_batch = [
+        [{"role": "user", "content": f"<code>{code}</code>"}] for code in codes
+    ]
+    temp_dirs = [TemporaryDirectory() for _ in codes]
+    metadata_batch = [
+        CodeEnvMetadata(
+            context={},
+            working_dir=temp_dir.name,
+        )
+        for temp_dir in temp_dirs
+    ]
+
+    # Execute the code
+    output = ray.get(code_env.step.remote(message_log_batch, metadata_batch))
+    responses = [obs["content"] for obs in output.observations]
+
+    assert responses == results, f"Got wrong output {responses}"
+
+
+@pytest.mark.hf_gated
+def test_vllm_execute_code(cluster, tokenizer, code_env):
+    """Test that vLLM can call the code executor."""
+    # Prepare test data
+    codes = [
+        "<code>x = 3; y = 4</code>\nThis is some regular text.\n<code>x + y</code>\n",
+        "<code>\ndef f(x):\n    return x * x\n\nf(2)\n</code>\n",
+    ]
+    results = ["<result>7</result>", "\n<result>\n4\n</result>"]
+
+    # Create message logs
+    message_logs = []
+    metadata_batch = []
+    temp_dirs = []
+    for code in codes:
+        # Tokenize the message content
+        prompt = code * 4
+        token_ids = tokenizer(prompt, return_tensors="pt", add_special_tokens=False)[
+            "input_ids"
+        ][0]
+        temp_dir = TemporaryDirectory()
+        message_logs.append(
+            [{"role": "user", "content": prompt, "token_ids": token_ids}]
+        )
+        metadata_batch.append(CodeEnvMetadata(context={}, working_dir=temp_dir.name))
+        temp_dirs.append(temp_dir)
+
+    # Create initial batch
+    initial_batch = BatchedDataDict(
+        {
+            "message_log": message_logs,
+            "extra_env_info": metadata_batch,
+            "task_name": ["code_execution"] * len(codes),
+            "stop_strings": [["</code>"]] * len(codes),
+        }
+    )
+
+    # Create vLLM generation
+    vllm_config = basic_vllm_test_config.copy()
+    vllm_config = configure_generation_config(vllm_config, tokenizer, is_eval=True)
+    vllm_generation = VllmGeneration(cluster, vllm_config)
+
+    # Create code environment
+    task_to_env = {"code_execution": code_env}
+
+    # Run rollout
+    vllm_generation.prepare_for_generation()
+    final_batch, _ = run_multi_turn_rollout(
+        policy_generation=vllm_generation,
+        input_batch=initial_batch,
+        tokenizer=tokenizer,
+        task_to_env=task_to_env,
+        max_seq_len=256,
+        max_rollout_turns=2,
+        greedy=True,
+    )
+    vllm_generation.finish_generation()
+
+    # Check results
+    for i, msg_log in enumerate(final_batch["message_log"]):
+        # Get the last message which should contain the result
+        last_msg = msg_log[-1]
+        assert last_msg["role"] == "environment"
+        assert last_msg["content"] == results[i], (
+            f"Expected {results[i]}, got {last_msg['content']}"
+        )
diff --git a/tests/unit/environments/test_penguin.py b/tests/unit/environments/test_penguin.py
new file mode 100644
index 0000000000..78dd6e5d7c
--- /dev/null
+++ b/tests/unit/environments/test_penguin.py
@@ -0,0 +1,198 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import json
+import time
+from copy import deepcopy
+from pathlib import Path
+
+import pytest
+import ray
+from yaml import safe_load
+
+from nemo_rl.distributed.ray_actor_environment_registry import (
+    get_actor_python_env,
+)
+from nemo_rl.environments.penguin import Penguin, PenguinConfig, setup_penguin_config
+from nemo_rl.models.generation.vllm import VllmGeneration
+
+# cluster and tokenizer are fixture imports
+from tests.unit.models.generation.test_vllm_generation import (
+    basic_vllm_test_config,
+    cluster,  # noqa: F401
+)
+from tests.unit.models.generation.test_vllm_generation import (
+    tokenizer as penguin_tokenizer,  # noqa: F401
+)
+
+try:
+    from penguin import config_types  # noqa: F401
+
+    PENGUIN_INSTALLED = True
+except ImportError:
+    penguin = None
+    PENGUIN_INSTALLED = False
+
+
+@pytest.mark.skipif(
+    not PENGUIN_INSTALLED,
+    reason="Skipping Penguin test since Penguin is not installed!",
+)
+def test_penguin_stub_module():
+    print(f"Penguin test successfully run! Penguin config_types module: {config_types}")
+
+
+@pytest.fixture(scope="function")
+def penguin_vllm_generation(cluster, penguin_tokenizer):  # noqa: F811
+    generation_config = deepcopy(basic_vllm_test_config)
+    master_config = {
+        "policy": {
+            "generation": generation_config,
+        },
+    }
+    setup_penguin_config(master_config, penguin_tokenizer)
+
+    generation_config["vllm_cfg"]["max_model_len"] = 16_384
+    # This is the tool parser for Qwen/Qwen3-0.6B. This needs to be changed for other models.
+    generation_config["vllm_cfg"]["http_server_serving_chat_kwargs"] = {
+        "enable_auto_tools": True,
+        "tool_parser": "hermes",
+    }
+
+    vllm_generation = VllmGeneration(cluster, generation_config)
+
+    yield vllm_generation
+
+    vllm_generation.shutdown()
+
+
+@pytest.fixture(scope="function")
+def penguin(penguin_vllm_generation):
+    """Create a Penguin actor for testing."""
+
+    yaml_str = r"""example_multi_step_resources_server:
+  resources_servers:
+    example_multi_step:
+      entrypoint: app.py
+      domain: instruction_following
+example_multi_step_simple_agent:
+  responses_api_agents:
+    simple_agent:
+      entrypoint: app.py
+      resources_server:
+        type: resources_servers
+        name: example_multi_step_resources_server
+      model_server:
+        type: responses_api_models
+        name: openai_model
+openai_model:
+  responses_api_models:
+    vllm_model:
+      entrypoint: app.py
+      base_url: ${policy_base_url}
+      api_key: ${policy_api_key}
+      model: ${policy_model_name}
+      return_token_id_information: true
+      uses_reasoning_parser: true
+"""
+
+    config = PenguinConfig(
+        model_name=penguin_vllm_generation.cfg["model_name"],
+        base_urls=penguin_vllm_generation.dp_openai_server_base_urls,
+        initial_global_config_dict=safe_load(yaml_str),
+    )
+    env = Penguin.options(
+        runtime_env={
+            "py_executable": get_actor_python_env(
+                "nemo_rl.environments.penguin.Penguin"
+            ),
+        }
+    ).remote(config)
+
+    # Blocking wait for penguin to spin up
+    ray.get(env.health_check.remote())
+
+    yield env
+    # Clean up the actor and wait for it to be killed
+    env.shutdown.remote()
+    ray.kill(env)
+    # Give some time for cleanup
+    time.sleep(0.1)
+
+
+@pytest.fixture(scope="function")
+def penguin_sanity_test_data():
+    fpath = Path(__file__).parent / "penguin_test_data/test_penguin_sanity.json"
+    with open(fpath) as f:
+        data = json.load(f)
+    return data
+
+
+@pytest.mark.skipif(
+    not PENGUIN_INSTALLED,
+    reason="Skipping Penguin test since Penguin is not installed!",
+)
+def test_penguin_sanity(
+    penguin,
+    penguin_sanity_test_data,
+    penguin_vllm_generation,
+    penguin_tokenizer,  # noqa: F811
+):
+    """Test basic functionality of MathEnvironment step with simple messages."""
+
+    # We need to match NeMo RL generation config params before sending to Penguin
+    generation_config = penguin_vllm_generation.cfg
+    examples = penguin_sanity_test_data["input"]
+    for example in examples:
+        example["responses_create_params"]["temperature"] = generation_config[
+            "temperature"
+        ]
+        example["responses_create_params"]["top_p"] = generation_config["top_p"]
+
+    actual_result, _ = ray.get(
+        penguin.run_rollouts.remote(
+            penguin_sanity_test_data["input"], penguin_tokenizer, ""
+        )
+    )
+    expected_result = penguin_sanity_test_data["expected_output"]
+
+    # These are tensors originally and we swap them back to a list for comparison below
+    for d in actual_result:
+        for message in d["input_message_log"]:
+            message["token_ids"] = message["token_ids"].tolist()
+        # Right now, we don't need to swap the token ids in the message log since they pointto the same underlying dictionary as above.
+        # for message in d["message_log"][:1]:
+        #     message["token_ids"] = message["token_ids"].tolist()
+
+    def _standardize_single_result(d: dict):
+        d = deepcopy(d)
+        d.pop("full_result", None)
+
+        # We remove these fields and message from comparison since we cannot guarantee exact generation reproducibility
+        d["message_log"] = d["message_log"][:2]
+        for message in d["message_log"][1:]:
+            if "token_ids" in message:
+                message["token_ids"] = []
+            if "generation_logprobs" in message:
+                message["generation_logprobs"] = []
+            if "prompt_str" in message:
+                message["prompt_str"] = "dummy prompt_str"
+            if "generation_str" in message:
+                message["generation_str"] = "dummy generation_str"
+
+        return d
+
+    def _standardize(l: list[dict]):
+        return list(map(_standardize_single_result, l))
+
+    assert _standardize(expected_result) == _standardize(actual_result)
diff --git a/tests/unit/environments/test_retriever.py b/tests/unit/environments/test_retriever.py
new file mode 100644
index 0000000000..c9413e6759
--- /dev/null
+++ b/tests/unit/environments/test_retriever.py
@@ -0,0 +1,180 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+import ray
+from transformers import AutoTokenizer
+
+from nemo_rl.distributed.batched_data_dict import BatchedDataDict
+from nemo_rl.distributed.virtual_cluster import RayVirtualCluster
+from nemo_rl.environments.tools.retriever import RAGEnvConfig, RAGEnvironment
+from nemo_rl.experience.rollouts import run_multi_turn_rollout
+from nemo_rl.models.generation import configure_generation_config
+from nemo_rl.models.generation.vllm import VllmConfig, VllmGeneration
+
+MODEL_NAME = "meta-llama/Llama-3.2-1B"
+
+cfg: RAGEnvConfig = {
+    "dataset_name": "rahular/simple-wikipedia",
+    "dataset_split": "train",
+    "text_column": "text",
+    "num_results": 1,
+    "k1": 1.5,
+    "b": 0.75,
+    "device": "cpu",
+}
+
+# Define basic vLLM test config
+basic_vllm_test_config: VllmConfig = {
+    "backend": "vllm",
+    "model_name": MODEL_NAME,
+    "tokenizer_name": None,
+    "dtype": "bfloat16",
+    "max_new_tokens": 100,
+    "temperature": 1.0,
+    "top_p": 1.0,
+    "top_k": None,
+    "stop_token_ids": None,
+    "stop_strings": None,
+    "vllm_cfg": {
+        "async_engine": False,
+        "precision": "bfloat16",
+        "tensor_parallel_size": 1,
+        "pipeline_parallel_size": 1,
+        "expert_parallel_size": 1,
+        "max_model_len": 1024,
+        "disable_log_stats": True,
+        "disable_log_requests": True,
+        "gpu_memory_utilization": 0.6,
+        "enforce_eager": "False",
+    },
+    "colocated": {
+        "enabled": True,
+        "resources": {
+            "gpus_per_node": None,
+            "num_nodes": None,
+        },
+    },
+}
+
+
+@pytest.fixture(scope="function")
+def rag_env():
+    """Create a RAG environment for testing."""
+    try:
+        env_actor = RAGEnvironment.remote(cfg)
+        yield env_actor
+    finally:
+        if env_actor:
+            ray.kill(env_actor)
+
+
+@pytest.fixture(scope="function")
+def tokenizer():
+    """Loads the tokenizer for the tests."""
+    print(f"Loading tokenizer: {MODEL_NAME}")
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
+    if tokenizer.pad_token is None:
+        tokenizer.pad_token = tokenizer.eos_token
+    print(
+        f"Tokenizer loaded. Pad token: {tokenizer.pad_token} (ID: {tokenizer.pad_token_id}), EOS token: {tokenizer.eos_token} (ID: {tokenizer.eos_token_id})"
+    )
+    return tokenizer
+
+
+@pytest.fixture(scope="function")
+def cluster():
+    """Create a virtual cluster for testing."""
+    cluster_instance = None
+    cluster_name = f"test-rag-cluster-{id(cluster_instance)}"
+    print(f"\nCreating virtual cluster '{cluster_name}'...")
+    try:
+        cluster_instance = RayVirtualCluster(
+            name=cluster_name,
+            bundle_ct_per_node_list=[1],
+            use_gpus=True,
+            num_gpus_per_node=1,
+            max_colocated_worker_groups=2,
+        )
+        yield cluster_instance
+    finally:
+        print(f"\nCleaning up cluster '{cluster_name}'...")
+        if cluster_instance:
+            cluster_instance.shutdown()
+
+
+@pytest.mark.hf_gated
+def test_vllm_retrieve(cluster, tokenizer, rag_env):
+    """Test that vLLM can use the RAG environment for document retrieval."""
+    # Prepare test data
+    queries = [
+        "<retrieve>Jen-Hsun Huang</retrieve>\n",
+    ]
+    expected_results = [
+        "<result>\n<1>\n"
+        "Nvidia was established in 1993 by Jen-Hsun Huang, Curtis Priem, and Chris Malachowsky. In 2000 Nvidia took intellectual possession of 3dfx, one of the biggest GPU producers in 1990s.\n"
+        "</1>\n</result>\n",
+    ]
+
+    # Create message logs
+    message_logs = []
+    for query in queries:
+        # Tokenize the message content
+        prompt = query * 4
+        token_ids = tokenizer(prompt, return_tensors="pt", add_special_tokens=False)[
+            "input_ids"
+        ][0]
+        message_logs.append(
+            [{"role": "user", "content": prompt, "token_ids": token_ids}]
+        )
+
+    # Create initial batch
+    initial_batch = BatchedDataDict(
+        {
+            "message_log": message_logs,
+            "extra_env_info": [{}] * len(queries),  # No metadata needed for RAG
+            "task_name": ["document_retrieval"] * len(queries),
+            "stop_strings": [["</retrieve>"]] * len(queries),
+        }
+    )
+
+    # Create vLLM generation
+    vllm_config = basic_vllm_test_config.copy()
+    vllm_config = configure_generation_config(vllm_config, tokenizer, is_eval=True)
+    vllm_generation = VllmGeneration(cluster, vllm_config)
+
+    # Create RAG environment
+    task_to_env = {"document_retrieval": rag_env}
+
+    # Run rollout
+    vllm_generation.prepare_for_generation()
+    final_batch, _ = run_multi_turn_rollout(
+        policy_generation=vllm_generation,
+        input_batch=initial_batch,
+        tokenizer=tokenizer,
+        task_to_env=task_to_env,
+        max_seq_len=256,
+        max_rollout_turns=1,
+        greedy=True,
+    )
+    vllm_generation.finish_generation()
+
+    # Check results
+    for i, msg_log in enumerate(final_batch["message_log"]):
+        # Get the last message which should contain the result
+        last_msg = msg_log[-1]
+        assert last_msg["role"] == "environment"
+        assert last_msg["content"] == expected_results[i], (
+            f"Expected {expected_results[i]}, got {last_msg['content']}"
+        )
diff --git a/tests/unit/environments/test_reward_model_environment.py b/tests/unit/environments/test_reward_model_environment.py
new file mode 100644
index 0000000000..588891b52d
--- /dev/null
+++ b/tests/unit/environments/test_reward_model_environment.py
@@ -0,0 +1,215 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import pytest
+import ray
+import torch
+
+from nemo_rl.distributed.ray_actor_environment_registry import get_actor_python_env
+from nemo_rl.environments.reward_model_environment import (
+    RewardModelEnvironment,
+    RewardModelEnvironmentConfig,
+)
+
+# Model configuration constants for testing
+REWARD_MODEL_NAME = "Skywork/Skywork-Reward-V2-Qwen3-0.6B"
+MAX_MODEL_LEN = 1024
+
+# Basic reward model environment configuration for testing
+# This config sets up a minimal reward model environment for unit testing
+basic_env_config: RewardModelEnvironmentConfig = {
+    "enabled": True,
+    "model_name": REWARD_MODEL_NAME,
+    "tokenizer": {"name": REWARD_MODEL_NAME},
+    "precision": "bfloat16",
+    "offload_optimizer_for_logprob": False,
+    "batch_size": 32,
+    "checkpoint_path": None,
+    "max_model_len": MAX_MODEL_LEN,
+    "resources": {"gpus_per_node": 1, "num_nodes": 1},
+    "reward_model_cfg": {
+        "enabled": True,
+        "reward_model_type": "bradley_terry",
+    },
+    "dtensor_cfg": {
+        "_v2": True,
+        "enabled": True,
+        "cpu_offload": False,
+        "sequence_parallel": False,
+        "activation_checkpointing": False,
+        "tensor_parallel_size": 1,
+        "context_parallel_size": 1,
+        "custom_parallel_plan": None,
+    },
+    "dynamic_batching": {"enabled": False},
+    "sequence_packing": {"enabled": False},
+    "max_grad_norm": None,
+}
+
+
+@pytest.fixture(scope="class")
+def reward_model_env():
+    """
+    Create a reward model environment for testing.
+
+    This fixture creates a RewardModelEnvironment instance with the basic
+    configuration and ensures proper cleanup after each test.
+
+    Yields:
+        RewardModelEnvironment: A configured reward model environment instance.
+    """
+    env_actor = None
+    try:
+        assert ray.is_initialized()
+        reward_model_py_executable_class = (
+            "nemo_rl.models.policy.dtensor_policy_worker_v2.DTensorPolicyWorkerV2"
+            if basic_env_config["dtensor_cfg"]["_v2"]
+            else "nemo_rl.models.policy.dtensor_policy_worker.DTensorPolicyWorker"
+        )
+        env_actor = RewardModelEnvironment.options(  # type: ignore # it's wrapped with ray.remote
+            runtime_env={
+                "py_executable": get_actor_python_env(reward_model_py_executable_class),
+                "env_vars": dict(
+                    os.environ
+                ),  # Pass thru all user environment variables
+            }
+        ).remote(basic_env_config)
+        yield env_actor
+    except Exception as e:
+        print(f"Error creating reward model environment: {e}")
+        raise
+    finally:
+        if env_actor:
+            try:
+                env_actor.shutdown.remote()
+            except Exception as e:
+                print(f"Warning: Error during actor shutdown: {e}")
+
+
+class TestRewardModelEnvironment:
+    """
+    Test suite for RewardModelEnvironment functionality.
+
+    This test class contains all unit tests for the RewardModelEnvironment,
+    covering initialization, data processing, reward computation, and resource
+    management. Each test method focuses on a specific aspect of the environment's
+    functionality.
+    """
+
+    def test_reward_model_environment_initialization(self, reward_model_env):
+        """
+        Test that the reward model environment initializes correctly.
+
+        This test verifies that the environment is properly configured
+        and ready for use. It checks that all required components are
+        initialized and accessible.
+
+        Args:
+            reward_model_env: The reward model environment fixture.
+        """
+        # Verify the environment is properly initialized
+        assert reward_model_env is not None
+        assert hasattr(reward_model_env, "shutdown")
+
+    @pytest.mark.parametrize("batch_size", [1, 2, 4, 8])
+    def test_reward_model_environment_preprocess_data(
+        self, reward_model_env, batch_size
+    ):
+        """
+        Test the reward model environment's ability to preprocess data with different batch sizes.
+
+        This test verifies that the environment can preprocess conversation
+        data correctly, including tokenization, formatting, and batching.
+        It ensures that the output format is compatible with the reward model
+        and works correctly with different batch sizes, including edge cases like batch_size=1.
+
+        Args:
+            reward_model_env: The reward model environment fixture.
+            batch_size: The batch size to test (1, 2, 4, 8).
+        """
+        # Create message log batch with the specified batch size
+        message_log_batch = [
+            [
+                {
+                    "role": "user",
+                    "content": f"What is the capital of France? (test {i})",
+                },
+                {
+                    "role": "assistant",
+                    "content": f"The capital of Brazil is Brasilia. (response {i})",
+                },
+            ]
+            for i in range(batch_size)
+        ]
+
+        # Use remote call for Ray Actor
+        future = reward_model_env.preprocess_data.remote(message_log_batch)
+        output = ray.get(future)
+
+        target_length = 39
+        assert output is not None
+        assert output["input_ids"] is not None
+        assert output["input_lengths"] is not None
+
+        # Verify the output shapes match the batch size
+        assert output["input_ids"].shape == (batch_size, target_length)
+        assert output["input_lengths"].shape == (batch_size,)
+        assert all(length == target_length for length in output["input_lengths"])
+
+    def test_reward_model_environment_generate_rewards(self, reward_model_env):
+        """
+        Test the reward model environment's ability to generate responses and compute rewards.
+
+        This test verifies that:
+        1. The environment can process message logs
+        2. Rewards are computed correctly
+        3. The reward values are reasonable (incorrect answer gets lower reward)
+        4. The output format is correct
+
+        Args:
+            reward_model_env: The reward model environment fixture.
+        """
+        # Test data: Two conversation pairs with correct and incorrect answers
+        message_log_batch = [
+            [
+                {"role": "user", "content": "What is the capital of France?"},
+                {
+                    "role": "assistant",
+                    "content": "The capital of Brazil is Brasilia.",
+                },  # Incorrect answer
+            ],
+            [
+                {"role": "user", "content": "What is the capital of France?"},
+                {
+                    "role": "assistant",
+                    "content": "The capital of France is Paris.",
+                },  # Correct answer
+            ],
+        ]
+
+        # Execute the environment step
+        future = reward_model_env.step.remote(message_log_batch, [])
+        output = ray.get(future)
+
+        # Verify the reward model name
+        assert REWARD_MODEL_NAME == "Skywork/Skywork-Reward-V2-Qwen3-0.6B"
+        # Verify output structure and properties
+        assert output.rewards is not None
+        assert output.rewards.shape == (2,)
+        assert output.rewards.dtype == torch.float32
+        # Verify expected reward values (with tolerance for floating point precision)
+        expected_rewards = torch.tensor([-5.3750, 2.6250])
+        assert torch.allclose(output.rewards, expected_rewards, atol=1e-1)
diff --git a/tests/unit/evals/test_eval.py b/tests/unit/evals/test_eval.py
new file mode 100644
index 0000000000..637b1e34d1
--- /dev/null
+++ b/tests/unit/evals/test_eval.py
@@ -0,0 +1,151 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import pytest
+import torch
+
+from nemo_rl.evals.eval import (
+    eval_cons_k,
+    eval_pass_k,
+)
+
+
+def test_eval_pass_k_basic():
+    """Test basic pass@k evaluation."""
+    # Test case: 3 samples, 2 correct, k=1
+    rewards = torch.tensor([1.0, 0.0, 1.0])
+    num_tests_per_prompt = 3
+    score = eval_pass_k(rewards, num_tests_per_prompt=num_tests_per_prompt, k=1)
+    group_size = len(rewards) / num_tests_per_prompt
+    average_score = score / group_size
+    expected = 2 / 3
+    assert isinstance(average_score, float)
+    assert average_score == pytest.approx(expected, rel=1e-6)
+
+
+def test_eval_pass_k_all_correct():
+    """Test pass@k when all samples are correct."""
+    rewards = torch.tensor([1.0, 1.0, 1.0])
+    num_tests_per_prompt = 3
+    score = eval_pass_k(rewards, num_tests_per_prompt=num_tests_per_prompt, k=1)
+    group_size = len(rewards) / num_tests_per_prompt
+    average_score = score / group_size
+    expected = 1.0
+    assert isinstance(average_score, float)
+    assert average_score == pytest.approx(expected, rel=1e-6)
+
+
+def test_eval_pass_k_none_correct():
+    """Test pass@k when no samples are correct."""
+    rewards = torch.tensor([0.0, 0.0, 0.0])
+    num_tests_per_prompt = 3
+    score = eval_pass_k(rewards, num_tests_per_prompt=num_tests_per_prompt, k=1)
+    average_score = score / (len(rewards) / num_tests_per_prompt)
+    expected = 0.0
+    assert isinstance(average_score, float)
+    assert average_score == pytest.approx(expected, rel=1e-6)
+
+
+def test_eval_pass_k_multiple_groups():
+    """Test pass@k with multiple groups."""
+    # Two groups: [1,0,1] and [0,1,0]
+    rewards = torch.tensor([1.0, 0.0, 1.0, 0.0, 1.0, 0.0])
+    num_tests_per_prompt = 3
+    score = eval_pass_k(rewards, num_tests_per_prompt=num_tests_per_prompt, k=1)
+    average_score = score / (len(rewards) / num_tests_per_prompt)
+    expected = 0.5
+    assert isinstance(average_score, float)
+    assert average_score == pytest.approx(expected, rel=1e-6)
+
+
+def test_eval_cons_k_basic():
+    """Test basic cons@k evaluation."""
+    rewards = torch.tensor([1.0, 0.0, 1.0])
+    extracted_answers = ["A", "B", "A"]
+    num_tests_per_prompt = 3
+    group_size = len(rewards) / num_tests_per_prompt
+    score = eval_cons_k(
+        rewards,
+        num_tests_per_prompt=num_tests_per_prompt,
+        k=1,
+        extracted_answers=extracted_answers,
+    )
+    average_score = score / group_size
+    expected = 2 / 3
+    assert isinstance(average_score, float)
+    assert average_score == pytest.approx(expected, rel=1e-6)
+
+
+def test_eval_cons_k_multiple_groups():
+    """Test cons@k with multiple groups."""
+    rewards = torch.tensor([1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0])
+    num_tests_per_prompt = 5
+    extracted_answers = [
+        "Correct",
+        "Wrong1",
+        "Correct",
+        "Wrong2",
+        "Correct",
+        "Wrong3",
+        "Correct",
+        "Wrong4",
+        "Correct",
+        "Wrong4",
+    ]
+    group_size = len(rewards) / num_tests_per_prompt
+    score = eval_cons_k(
+        rewards,
+        num_tests_per_prompt=num_tests_per_prompt,
+        k=3,
+        extracted_answers=extracted_answers,
+    )
+    average_score = score / group_size
+
+    """
+    For the first group, the extracted answers are [Correct, Wrong1, Correct, Wrong2, Correct]
+    When calculating unbiased estimate of cons@3(k=3), we need to consider the majority vote of all Combination(5, 3) = 10 cases.
+    The 10 cases are:
+    - Correct, Wrong1, Correct      Majority: Correct
+    - Correct, Wrong1, Wrong2       Majority: Correct(Choose the first one when there is a tie)
+    - Correct, Wrong1, Correct      Majority: Correct
+    - Correct, Correct, Wrong2      Majority: Correct
+    - Correct, Correct, Correct     Majority: Correct
+    - Correct, Wrong2, Correct      Majority: Correct
+    - Wrong1, Correct, Wrong2       Majority: Wrong1 (Choose the first one when there is a tie)
+    - Wrong1, Correct, Correct      Majority: Correct
+    - Wrong1, Wrong2, Correct       Majority: Wrong1 (Choose the first one when there is a tie)
+    - Correct, Wrong2, Correct      Majority: Correct
+    The final result is 8/10.
+
+    For the second group, the extracted answers are [Wrong3, Correct, Wrong4, Correct, Wrong4]
+    When calculating unbiased estimate of cons@3(k=3), we need to consider the majority vote of all Combination(5, 3) = 10 cases.
+    The 10 cases are:
+    - Wrong3, Correct, Wrong4       Majority: Wrong3 (Choose the first one when there is a tie)
+    - Wrong3, Correct, Correct      Majority: Correct
+    - Wrong3, Correct, Wrong4       Majority: Wrong3 (Choose the first one when there is a tie)
+    - Wrong3, Wrong4, Correct       Majority: Wrong3 (Choose the first one when there is a tie)
+    - Wrong3, Wrong4, Wrong4        Majority: Wrong4
+    - Wrong3, Correct, Wrong4       Majority: Wrong3 (Choose the first one when there is a tie)
+    - Correct, Wrong4, Correct      Majority: Correct
+    - Correct, Wrong4, Wrong4       Majority: Wrong4 (Choose the first one when there is a tie)
+    - Correct, Correct, Wrong4      Majority: Correct
+    - Wrong4, Correct, Wrong4       Majority: Wrong4
+    The final result is 3/10.
+    Since there len(rewards)/num_tests_per_prompt = 10/5 = 2 groups
+    The final result is( 8/10 + 3/10 ) / 2 = 11/20 = 0.55
+    """
+    expected = 11 / 20
+    assert isinstance(average_score, float)
+    assert average_score == pytest.approx(expected, rel=1e-6)
diff --git a/tests/unit/experience/test_rollouts.py b/tests/unit/experience/test_rollouts.py
index 853b19145e..fa8ab0b7a2 100644
--- a/tests/unit/experience/test_rollouts.py
+++ b/tests/unit/experience/test_rollouts.py
@@ -14,12 +14,15 @@
 
 import gc
 from copy import deepcopy
+from dataclasses import asdict
 
 import pytest
 import ray
 import torch
 from transformers import AutoTokenizer
 
+from nemo_rl.data.collate_fn import rl_collate_fn
+from nemo_rl.data.interfaces import DatumSpec
 from nemo_rl.data.llm_message_utils import batched_message_log_to_flat_message
 from nemo_rl.distributed.batched_data_dict import BatchedDataDict
 from nemo_rl.distributed.virtual_cluster import RayVirtualCluster
@@ -29,13 +32,25 @@
     SlidingPuzzleGameLogic,
     SlidingPuzzleMetadata,
 )
+from nemo_rl.environments.penguin import penguin_example_to_nemo_rl_datum_spec
 from nemo_rl.experience.rollouts import (
     run_async_multi_turn_rollout,
+    run_async_penguin_rollout,
     run_multi_turn_rollout,
 )
 from nemo_rl.models.generation import configure_generation_config
 from nemo_rl.models.generation.vllm import VllmConfig, VllmGeneration
 
+# These are all fixtures
+from tests.unit.environments.test_penguin import (
+    PENGUIN_INSTALLED,
+    cluster,  # noqa: F401
+    penguin,  # noqa: F401
+    penguin_sanity_test_data,  # noqa: F401
+    penguin_tokenizer,  # noqa: F401
+    penguin_vllm_generation,  # noqa: F401
+)
+
 # Import the test environment definitions
 from tests.unit.test_envs import (
     MultiStepCalcMetadata,
@@ -198,6 +213,7 @@ def initial_multi_step_calculator_batch(rollout_tokenizer):
         "precision": "bfloat16",
         "tensor_parallel_size": 1,
         "pipeline_parallel_size": 1,
+        "expert_parallel_size": 1,
         "max_model_len": 2048,
         "disable_log_stats": True,
         "disable_log_requests": True,
@@ -729,3 +745,133 @@ def test_run_sliding_puzzle_vllm(sliding_puzzle_setup_vllm):
     assert environment_message_count > 3, "Expected at least one environment message"
 
     print("\nSliding Puzzle VLLM Test assertions passed.")
+
+
+@pytest.mark.skipif(
+    not PENGUIN_INSTALLED,
+    reason="Skipping Penguin test since Penguin is not installed!",
+)
+def test_run_async_penguin_rollout(
+    penguin,  # noqa: F811
+    penguin_vllm_generation,  # noqa: F811
+    penguin_sanity_test_data,  # noqa: F811
+    penguin_tokenizer,  # noqa: F811
+):
+    nemo_rl_compatible_examples: list[DatumSpec] = [
+        penguin_example_to_nemo_rl_datum_spec(penguin_example, idx)
+        for idx, penguin_example in enumerate(penguin_sanity_test_data["input"])
+    ]
+    input_batch: BatchedDataDict[DatumSpec] = rl_collate_fn(nemo_rl_compatible_examples)
+    actual_result = run_async_penguin_rollout(
+        policy_generation=penguin_vllm_generation,
+        input_batch=input_batch,
+        tokenizer=penguin_tokenizer,
+        task_to_env={"penguin": penguin},
+        max_seq_len=None,
+        generation_config=penguin_vllm_generation.cfg,
+        max_rollout_turns=None,
+    )
+    actual_result = asdict(actual_result)
+    actual_result["final_batch"] = actual_result["final_batch"].get_dict()
+
+    expected_result = {
+        "final_batch": {
+            "length": torch.tensor([3088, 3056]),
+            "loss_multiplier": torch.tensor([1.0, 1.0]),
+            "total_reward": torch.tensor([0.0, 0.0]),
+        },
+        "rollout_metrics": {
+            # core metrics
+            "timing/rollout/total": 0.0,
+            "timing/rollout/run_rollouts": 0.0,
+            "timing/rollout/await_results": 0.0,
+            "timing/rollout/postprocess_results": 0.0,
+            "timing/rollout/postprocess_results_pct": 0.0,
+            "timing/rollout/prepare_for_metrics_calculation": 0.0,
+            "timing/rollout/aggregate_metrics": 0.0,
+            "timing/rollout/per_agent_misc_metrics": 0.0,
+            "mean_gen_tokens_per_sample": None,
+            "turns_per_sample/mean": 2.0,
+            "turns_per_sample/max": 2,
+            "turns_per_sample/min": 2,
+            "turns_per_sample/median": 2.0,
+            "turns_per_sample/stddev": 0.0,
+            "turns_per_sample/histogram": None,
+            "total_tokens_per_sample/mean": 3843.0,
+            "total_tokens_per_sample/max": 3848,
+            "total_tokens_per_sample/min": 3838,
+            "total_tokens_per_sample/median": 3843.0,
+            "total_tokens_per_sample/stddev": 7.0710678118654755,
+            "total_tokens_per_sample/histogram": None,
+            "gen_tokens_per_sample/mean": 732.5,
+            "gen_tokens_per_sample/max": 748,
+            "gen_tokens_per_sample/min": 717,
+            "gen_tokens_per_sample/median": 732.5,
+            "gen_tokens_per_sample/stddev": 21.920310216782973,
+            "gen_tokens_per_sample/histogram": None,
+            "total_reward/mean": 0.0,
+            "total_reward/max": 0.0,
+            "total_reward/min": 0.0,
+            "total_reward/median": 0.0,
+            "total_reward/stddev": 0.0,
+            "total_reward/histogram": None,
+            "natural_termination_rate": None,
+            "truncation_rate": None,
+            # per agent metrics
+            "example_multi_step_simple_agent/full_result": None,
+            "example_multi_step_simple_agent/accuracy/histogram": None,
+            "example_multi_step_simple_agent/accuracy/max": 0.0,
+            "example_multi_step_simple_agent/accuracy/mean": 0.0,
+            "example_multi_step_simple_agent/accuracy/median": 0.0,
+            "example_multi_step_simple_agent/accuracy/min": 0.0,
+            "example_multi_step_simple_agent/accuracy/stddev": 0.0,
+            "example_multi_step_simple_agent/order_instruction_following_failure/histogram": None,
+            "example_multi_step_simple_agent/order_instruction_following_failure/max": 0.0,
+            "example_multi_step_simple_agent/order_instruction_following_failure/mean": 0.0,
+            "example_multi_step_simple_agent/order_instruction_following_failure/median": 0.0,
+            "example_multi_step_simple_agent/order_instruction_following_failure/min": 0.0,
+            "example_multi_step_simple_agent/order_instruction_following_failure/stddev": 0.0,
+            "example_multi_step_simple_agent/original_term_minefield_hit/histogram": None,
+            "example_multi_step_simple_agent/original_term_minefield_hit/max": 0.0,
+            "example_multi_step_simple_agent/original_term_minefield_hit/mean": 0.0,
+            "example_multi_step_simple_agent/original_term_minefield_hit/median": 0.0,
+            "example_multi_step_simple_agent/original_term_minefield_hit/min": 0.0,
+            "example_multi_step_simple_agent/original_term_minefield_hit/stddev": 0.0,
+            "example_multi_step_simple_agent/reward/histogram": None,
+            "example_multi_step_simple_agent/reward/max": 0.0,
+            "example_multi_step_simple_agent/reward/mean": 0.0,
+            "example_multi_step_simple_agent/reward/median": 0.0,
+            "example_multi_step_simple_agent/reward/min": 0.0,
+            "example_multi_step_simple_agent/reward/stddev": 0.0,
+            "example_multi_step_simple_agent/set_overlap/histogram": None,
+            "example_multi_step_simple_agent/set_overlap/max": 0.0,
+            "example_multi_step_simple_agent/set_overlap/mean": 0.0,
+            "example_multi_step_simple_agent/set_overlap/median": 0.0,
+            "example_multi_step_simple_agent/set_overlap/min": 0.0,
+            "example_multi_step_simple_agent/set_overlap/stddev": 0.0,
+        },
+    }
+
+    def _standardize(d: dict) -> dict:
+        final_batch = d["final_batch"].copy()
+        final_batch.pop("message_log", None)
+        final_batch["total_reward"] = final_batch["total_reward"].tolist()
+        final_batch["loss_multiplier"] = final_batch["loss_multiplier"].tolist()
+        final_batch["length"] = final_batch["length"].tolist()
+
+        for key in d["rollout_metrics"]:
+            # We remove these fields from comparison since we cannot guarantee exact generation reproducibility
+            d["rollout_metrics"][key] = None
+
+        return {
+            "final_batch": final_batch,
+            "rollout_metrics": d["rollout_metrics"],
+        }
+
+    assert _standardize(expected_result) == _standardize(actual_result)
+
+    """
+    If the result here does not match, please check the following:
+    1. In nemo_rl/experience/rollouts.py::run_async_penguin_rollout, the sampling params are passed appropriately
+    2. In nemo_rl/models/generation/vllm/vllm_worker_async.py::VllmAsyncGenerationWorker::_setup_vllm_server::create_chat_completion, the sampling params (like top_k) are set as appropriate
+    """
diff --git a/tests/unit/models/dtensor/test_parallelize.py b/tests/unit/models/dtensor/test_parallelize.py
index 192fa354ac..7e9bbe10b5 100644
--- a/tests/unit/models/dtensor/test_parallelize.py
+++ b/tests/unit/models/dtensor/test_parallelize.py
@@ -16,6 +16,7 @@
 from unittest.mock import MagicMock
 
 import pytest
+import torch
 from torch.distributed.tensor.parallel import ParallelStyle, parallelize_module
 from transformers import AutoModelForCausalLM
 
@@ -23,6 +24,7 @@
     _parallelize_gemma3,
     _parallelize_llama,
     _parallelize_qwen,
+    get_grad_norm,
 )
 
 
@@ -67,3 +69,46 @@ def _apply(self, module, device_mesh):
     assert set(parallel_plan.keys()) == applied_keys, (
         f"Missing keys: {set(parallel_plan.keys()) - applied_keys}"
     )
+
+
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA required")
+@pytest.mark.parametrize(
+    "grad_dtype, norm_dtype, norm_order",
+    [
+        (torch.float32, torch.float32, 1),
+        (torch.float32, torch.float32, 2),
+        (torch.float32, torch.float32, torch.inf),
+        (torch.bfloat16, torch.float32, 1),
+        (torch.bfloat16, torch.float32, 2),
+        (torch.bfloat16, torch.float32, torch.inf),
+    ],
+)
+def test_get_grad_norm_precision(monkeypatch, grad_dtype, norm_dtype, norm_order):
+    """Checks numerical precision of get_grad_norm."""
+
+    def noop_all_reduce(tensor, op=None, group=None):
+        return None
+
+    monkeypatch.setattr(torch.distributed, "all_reduce", noop_all_reduce, raising=False)
+
+    n = 65536
+    vals = torch.logspace(-2, 2, steps=n, device="cuda", dtype=grad_dtype)
+    signs = (torch.rand(n, device="cuda") > 0.5).to(grad_dtype) * 2 - 1
+    grads_full = vals * signs
+
+    p1 = torch.zeros(n // 2, device="cuda", dtype=grad_dtype, requires_grad=True)
+    p2 = torch.zeros(n - n // 2, device="cuda", dtype=grad_dtype, requires_grad=True)
+    p1.grad = grads_full[: n // 2].clone()
+    p2.grad = grads_full[n // 2 :].clone()
+
+    expected = torch.linalg.vector_norm(
+        grads_full.to(torch.float64), ord=norm_order
+    ).item()
+    norm = get_grad_norm(
+        [p1, p2],
+        dp_cp_group=None,
+        tp_group=None,
+        norm_type=norm_order,
+        dtype=norm_dtype,
+    )
+    assert norm == pytest.approx(expected)
diff --git a/tests/unit/models/generation/maybe_correct_merged_tokens_test_data.json b/tests/unit/models/generation/maybe_correct_merged_tokens_test_data.json
new file mode 100644
index 0000000000..1992f3dd16
--- /dev/null
+++ b/tests/unit/models/generation/maybe_correct_merged_tokens_test_data.json
@@ -0,0 +1 @@
+{"seen_token_ids": [151644, 8948, 198, 2, 38297, 198, 2610, 525, 458, 32189, 8315, 13, 1446, 686, 387, 3897, 264, 1196, 3239, 323, 498, 1184, 311, 990, 279, 7375, 3897, 311, 498, 311, 8649, 1140, 315, 73350, 2750, 13, 1446, 686, 387, 3897, 448, 264, 15493, 315, 85406, 369, 1817, 13, 1752, 1817, 4647, 11, 4486, 1490, 421, 432, 594, 9760, 311, 279, 1196, 3239, 323, 633, 279, 2750, 369, 1817, 73350, 438, 8311, 13, 1446, 1969, 633, 323, 8649, 279, 2750, 369, 1449, 73350, 429, 7952, 304, 419, 1140, 13, 5209, 2550, 73350, 2750, 304, 279, 1973, 807, 4994, 304, 279, 2500, 85406, 3685, 382, 2, 16136, 85406, 198, 785, 4647, 364, 16970, 6, 702, 264, 73350, 364, 2662, 28488, 23569, 785, 4647, 364, 3522, 6, 702, 264, 73350, 364, 4896, 33066, 23569, 785, 4647, 364, 5002, 6, 702, 264, 73350, 364, 693, 586, 5838, 23569, 785, 4647, 364, 85215, 6, 702, 264, 73350, 364, 32, 1869, 23569, 785, 4647, 364, 17082, 6, 702, 264, 73350, 364, 54, 18504, 23569, 785, 4647, 364, 12020, 1222, 6, 702, 264, 73350, 364, 8137, 1182, 23569, 785, 4647, 364, 3522, 6, 702, 264, 73350, 364, 81789, 517, 23569, 785, 4647, 364, 1001, 865, 6, 702, 264, 73350, 364, 68457, 23569, 785, 4647, 364, 34291, 88, 6, 702, 264, 73350, 364, 38, 1574, 80049, 23569, 785, 4647, 364, 49, 1384, 6, 702, 264, 73350, 364, 7339, 2583, 23569, 785, 4647, 364, 10159, 6, 702, 264, 73350, 364, 6025, 12402, 23569, 785, 4647, 364, 94984, 6, 702, 264, 73350, 364, 39, 51978, 23569, 785, 4647, 364, 10159, 6, 702, 264, 73350, 364, 30092, 31509, 23569, 785, 4647, 364, 26884, 6, 702, 264, 73350, 364, 30896, 291, 23569, 785, 4647, 364, 41198, 6, 702, 264, 73350, 364, 47, 3748, 77873, 23569, 785, 4647, 364, 50437, 6, 702, 264, 73350, 364, 33648, 9287, 23569, 785, 4647, 364, 13911, 6, 702, 264, 73350, 364, 14008, 23569, 785, 4647, 364, 60970, 6, 702, 264, 73350, 364, 1092, 52899, 23569, 785, 4647, 364, 52, 22945, 6, 702, 264, 73350, 364, 76335, 2338, 591, 23569, 785, 4647, 364, 15474, 494, 6, 702, 264, 73350, 364, 21666, 2377, 23569, 785, 4647, 364, 32887, 6, 702, 264, 73350, 364, 24703, 23569, 785, 4647, 364, 2324, 6, 702, 264, 73350, 364, 87445, 23569, 785, 4647, 364, 34291, 88, 6, 702, 264, 73350, 364, 21988, 29123, 9193, 23569, 785, 4647, 364, 28320, 6, 702, 264, 73350, 364, 1912, 94204, 23569, 785, 4647, 364, 10344, 6, 702, 264, 73350, 364, 37, 1641, 23569, 785, 4647, 364, 1001, 865, 6, 702, 264, 73350, 364, 64469, 23569, 785, 4647, 364, 17507, 6, 702, 264, 73350, 364, 39838, 23569, 785, 4647, 364, 59665, 6, 702, 264, 73350, 364, 7839, 14378, 23569, 785, 4647, 364, 93088, 6, 702, 264, 73350, 364, 7442, 1659, 23569, 785, 4647, 364, 85215, 6, 702, 264, 73350, 364, 1912, 25172, 657, 23569, 785, 4647, 364, 36485, 6, 702, 264, 73350, 364, 44, 33917, 23569, 785, 4647, 364, 32174, 6, 702, 264, 73350, 364, 6828, 3187, 23569, 785, 4647, 364, 2620, 32066, 6, 702, 264, 73350, 364, 17507, 23569, 785, 4647, 364, 24056, 6, 702, 264, 73350, 364, 51, 3191, 291, 23569, 785, 4647, 364, 24056, 6, 702, 264, 73350, 364, 47, 1268, 6125, 23569, 785, 4647, 364, 49, 1384, 6, 702, 264, 73350, 364, 6464, 466, 88, 23569, 785, 4647, 364, 34, 81971, 398, 6, 702, 264, 73350, 364, 16001, 5276, 23569, 785, 4647, 364, 4049, 6, 702, 264, 73350, 364, 40468, 23569, 785, 4647, 364, 59665, 6, 702, 264, 73350, 364, 1806, 56521, 23569, 785, 4647, 364, 6828, 523, 6, 702, 264, 73350, 364, 623, 283, 1782, 471, 291, 23569, 785, 4647, 364, 34, 81971, 398, 6, 702, 264, 73350, 364, 47699, 23569, 785, 4647, 364, 93088, 6, 702, 264, 73350, 364, 37186, 3834, 23569, 785, 4647, 364, 41365, 6, 702, 264, 73350, 364, 25913, 23569, 785, 4647, 364, 54, 295, 6, 702, 264, 73350, 364, 4416, 7741, 23569, 785, 4647, 364, 59665, 6, 702, 264, 73350, 364, 45384, 48909, 23569, 785, 4647, 364, 7188, 6, 702, 264, 73350, 364, 36125, 679, 23569, 785, 4647, 364, 34, 81971, 398, 6, 702, 264, 73350, 364, 6406, 482, 1717, 23569, 785, 4647, 364, 51, 491, 6, 702, 264, 73350, 364, 98335, 23569, 785, 4647, 364, 47586, 6, 702, 264, 73350, 364, 61598, 21366, 23569, 785, 4647, 364, 21751, 3866, 6, 702, 264, 73350, 364, 81027, 287, 23569, 785, 4647, 364, 18284, 6, 702, 264, 73350, 364, 48983, 292, 23569, 785, 4647, 364, 27177, 6, 702, 264, 73350, 364, 7125, 28480, 23569, 785, 4647, 364, 32887, 6, 702, 264, 73350, 364, 78284, 23569, 785, 4647, 364, 36730, 6, 702, 264, 73350, 364, 46588, 371, 1717, 23569, 785, 4647, 364, 12472, 6, 702, 264, 73350, 364, 38103, 23569, 785, 4647, 364, 16970, 6, 702, 264, 73350, 364, 71585, 586, 23569, 785, 4647, 364, 1092, 500, 3819, 6, 702, 264, 73350, 364, 6756, 337, 23569, 785, 4647, 364, 95027, 6, 702, 264, 73350, 364, 49, 1064, 23569, 785, 4647, 364, 30531, 6, 702, 264, 73350, 364, 51, 1659, 23569, 785, 4647, 364, 45, 6044, 6, 702, 264, 73350, 364, 50360, 849, 533, 23569, 785, 4647, 364, 25830, 6, 702, 264, 73350, 364, 35, 355, 7891, 23569, 785, 4647, 364, 11976, 6, 702, 264, 73350, 364, 35186, 13847, 23569, 785, 4647, 364, 34, 7673, 6, 702, 264, 73350, 364, 19957, 380, 74225, 23569, 785, 4647, 364, 26884, 6, 702, 264, 73350, 364, 49010, 23569, 785, 4647, 364, 13552, 14295, 679, 6, 702, 264, 73350, 364, 15878, 36145, 23569, 785, 4647, 364, 32637, 6, 702, 264, 73350, 364, 22600, 726, 23569, 785, 4647, 364, 49649, 6, 702, 264, 73350, 364, 14986, 1717, 23569, 785, 4647, 364, 21751, 3866, 6, 702, 264, 73350, 364, 38, 2672, 20058, 23569, 785, 4647, 364, 78413, 6, 702, 264, 73350, 364, 693, 25976, 23569, 785, 4647, 364, 58289, 6, 702, 264, 73350, 364, 2304, 32137, 398, 23569, 785, 4647, 364, 27529, 6, 702, 264, 73350, 364, 1806, 704, 2181, 23569, 785, 4647, 364, 13552, 14295, 679, 6, 702, 264, 73350, 364, 46, 2024, 343, 5269, 23569, 785, 4647, 364, 12472, 6, 702, 264, 73350, 364, 10344, 258, 6704, 23569, 785, 4647, 364, 50437, 6, 702, 264, 73350, 364, 50437, 13464, 23569, 785, 4647, 364, 51962, 6, 702, 264, 73350, 364, 1336, 87, 3426, 23569, 785, 4647, 364, 52, 22945, 6, 702, 264, 73350, 364, 49642, 974, 23569, 785, 4647, 364, 29185, 6, 702, 264, 73350, 364, 32, 1831, 23569, 785, 4647, 364, 17082, 6, 702, 264, 73350, 364, 21209, 12280, 23569, 785, 4647, 364, 5715, 6, 702, 264, 73350, 364, 34193, 480, 23569, 785, 4647, 364, 51962, 6, 702, 264, 73350, 364, 641, 36743, 23569, 785, 4647, 364, 76418, 6, 702, 264, 73350, 364, 37, 41502, 88, 23569, 785, 4647, 364, 29185, 6, 702, 264, 73350, 364, 4049, 23569, 785, 4647, 364, 30531, 6, 702, 264, 73350, 364, 49506, 85, 2611, 23569, 785, 4647, 364, 10159, 6, 702, 264, 73350, 364, 74676, 23569, 785, 4647, 364, 40572, 6, 702, 264, 73350, 364, 33, 8347, 287, 23569, 785, 4647, 364, 10673, 11896, 6, 702, 264, 73350, 364, 49, 1129, 307, 23569, 785, 4647, 364, 24187, 6, 702, 264, 73350, 364, 10344, 24657, 23569, 785, 4647, 364, 25307, 6, 702, 264, 73350, 364, 34609, 57410, 23569, 785, 4647, 364, 34, 7673, 6, 702, 264, 73350, 364, 1806, 81, 42335, 23569, 785, 4647, 364, 3522, 6, 702, 264, 73350, 364, 35882, 23569, 785, 4647, 364, 5002, 6, 702, 264, 73350, 364, 16485, 23569, 785, 4647, 364, 32887, 6, 702, 264, 73350, 364, 42642, 23569, 785, 4647, 364, 40572, 6, 702, 264, 73350, 364, 3564, 23569, 785, 4647, 364, 40103, 408, 6, 702, 264, 73350, 364, 16284, 23569, 785, 4647, 364, 52, 22945, 6, 702, 264, 73350, 364, 21692, 782, 23569, 785, 4647, 364, 49649, 6, 702, 264, 73350, 364, 22560, 604, 23569, 785, 4647, 364, 54, 295, 6, 702, 264, 73350, 364, 35, 1121, 23569, 785, 4647, 364, 51, 541, 6, 702, 264, 73350, 364, 45948, 27561, 23569, 785, 4647, 364, 4454, 6, 702, 264, 73350, 364, 7839, 14378, 23569, 785, 4647, 364, 32847, 6, 702, 264, 73350, 364, 26843, 261, 1262, 23569, 785, 4647, 364, 32637, 6, 702, 264, 73350, 364, 9676, 23569, 785, 4647, 364, 36730, 6, 702, 264, 73350, 364, 13911, 23569, 785, 4647, 364, 16646, 6, 702, 264, 73350, 364, 14742, 23569, 785, 4647, 364, 95027, 6, 702, 264, 73350, 364, 12346, 3117, 23569, 785, 4647, 364, 10344, 6, 702, 264, 73350, 364, 35, 617, 23569, 785, 4647, 364, 51, 541, 6, 702, 264, 73350, 364, 51, 89614, 23569, 785, 4647, 364, 37, 4101, 34434, 6, 702, 264, 73350, 364, 37889, 2408, 23569, 785, 4647, 364, 32847, 6, 702, 264, 73350, 364, 47, 4673, 23569, 785, 4647, 364, 4454, 6, 702, 264, 73350, 364, 19871, 23569, 785, 4647, 364, 26907, 6, 702, 264, 73350, 364, 49, 20926, 23569, 785, 4647, 364, 43, 2950, 6, 702, 264, 73350, 364, 2753, 28013, 23569, 785, 4647, 364, 13911, 6, 702, 264, 73350, 364, 2016, 6441, 23569, 785, 4647, 364, 41198, 6, 702, 264, 73350, 364, 49471, 23569, 785, 4647, 364, 20170, 6, 702, 264, 73350, 364, 4923, 6657, 23569, 785, 4647, 364, 13911, 320, 4765, 21636, 702, 264, 73350, 364, 87208, 23569, 785, 4647, 364, 16646, 6, 702, 264, 73350, 364, 55559, 51186, 23569, 785, 4647, 364, 94984, 6, 702, 264, 73350, 364, 84643, 1262, 23569, 785, 4647, 364, 41365, 6, 702, 264, 73350, 364, 2662, 27304, 23569, 785, 4647, 364, 36730, 6, 702, 264, 73350, 364, 47, 16459, 23569, 785, 4647, 364, 7188, 6, 702, 264, 73350, 364, 1109, 719, 23569, 785, 4647, 364, 26907, 6, 702, 264, 73350, 364, 45941, 23569, 785, 4647, 364, 17507, 6, 702, 264, 73350, 364, 1649, 1419, 23569, 785, 4647, 364, 24187, 6, 702, 264, 73350, 364, 12472, 23569, 785, 4647, 364, 61457, 6, 702, 264, 73350, 364, 1918, 23646, 23569, 785, 4647, 364, 54, 295, 6, 702, 264, 73350, 364, 28253, 24867, 23569, 785, 4647, 364, 47586, 6, 702, 264, 73350, 364, 18573, 1262, 23569, 785, 4647, 364, 20170, 6, 702, 264, 73350, 364, 95275, 23569, 785, 4647, 364, 25830, 6, 702, 264, 73350, 364, 51, 1952, 65, 26522, 23569, 785, 4647, 364, 13911, 320, 4765, 21636, 702, 264, 73350, 364, 3882, 5742, 88, 23569, 785, 4647, 364, 10673, 11896, 6, 702, 264, 73350, 364, 47, 2185, 306, 23569, 785, 4647, 364, 30531, 6, 702, 264, 73350, 364, 44, 695, 23569, 785, 4647, 364, 41198, 6, 702, 264, 73350, 364, 5338, 23569, 785, 4647, 364, 36485, 6, 702, 264, 73350, 364, 50, 1751, 307, 23569, 785, 4647, 364, 28320, 6, 702, 264, 73350, 364, 25749, 1717, 23569, 785, 4647, 364, 33, 3248, 6, 702, 264, 73350, 364, 32, 917, 306, 23569, 785, 4647, 364, 4049, 6, 702, 264, 73350, 364, 49772, 19430, 23569, 785, 4647, 364, 2620, 32066, 6, 702, 264, 73350, 364, 59164, 23569, 785, 4647, 364, 43, 2950, 6, 702, 264, 73350, 364, 4923, 3249, 23569, 785, 4647, 364, 35, 617, 6, 702, 264, 73350, 364, 10344, 23569, 785, 4647, 364, 34291, 88, 6, 702, 264, 73350, 364, 40603, 23569, 785, 4647, 364, 45, 6044, 6, 702, 264, 73350, 364, 1092, 14318, 23569, 785, 4647, 364, 27177, 6, 702, 264, 73350, 364, 46874, 23569, 785, 4647, 364, 641, 12601, 6, 702, 264, 73350, 364, 66111, 66, 3073, 23569, 785, 4647, 364, 58289, 6, 702, 264, 73350, 364, 78627, 23569, 785, 4647, 364, 40572, 6, 702, 264, 73350, 364, 62604, 39104, 23569, 785, 4647, 364, 33, 3248, 6, 702, 264, 73350, 364, 22571, 590, 292, 23569, 785, 4647, 364, 18284, 6, 702, 264, 73350, 364, 36, 76869, 398, 23569, 785, 4647, 364, 58289, 6, 702, 264, 73350, 364, 7442, 2596, 812, 23569, 785, 4647, 364, 40103, 408, 6, 702, 264, 73350, 364, 10850, 705, 23569, 785, 4647, 364, 4454, 6, 702, 264, 73350, 364, 45094, 23569, 785, 4647, 364, 11065, 408, 6, 702, 264, 73350, 364, 3136, 2929, 23569, 785, 4647, 364, 25307, 6, 702, 264, 73350, 364, 38103, 23569, 785, 4647, 364, 11976, 6, 702, 264, 73350, 364, 4416, 3249, 23569, 785, 4647, 364, 2324, 6, 702, 264, 73350, 364, 34, 4659, 65, 23569, 785, 4647, 364, 34, 7673, 6, 702, 264, 73350, 364, 6740, 51451, 23569, 785, 4647, 364, 27177, 6, 702, 264, 73350, 364, 11395, 12462, 23569, 785, 4647, 364, 13911, 320, 4765, 21636, 702, 264, 73350, 364, 33, 765, 4246, 23569, 785, 4647, 364, 29185, 6, 702, 264, 73350, 364, 641, 35921, 349, 23569, 785, 4647, 364, 36485, 6, 702, 264, 73350, 364, 4416, 2181, 23569, 785, 4647, 364, 61457, 6, 702, 264, 73350, 364, 1806, 79, 19931, 928, 23569, 785, 4647, 364, 94984, 6, 702, 264, 73350, 364, 24187, 23569, 785, 4647, 364, 11065, 408, 6, 702, 264, 73350, 364, 35, 2142, 629, 23569, 785, 4647, 364, 1092, 500, 3819, 6, 702, 264, 73350, 364, 47, 18704, 23569, 785, 4647, 364, 5002, 6, 702, 264, 73350, 364, 1806, 75940, 23569, 785, 4647, 364, 35, 617, 6, 702, 264, 73350, 364, 4923, 3850, 23569, 785, 4647, 364, 25307, 6, 702, 264, 73350, 364, 57024, 23569, 785, 4647, 364, 12020, 1222, 6, 702, 264, 73350, 364, 48124, 23569, 785, 4647, 364, 32174, 6, 702, 264, 73350, 364, 39, 14980, 23569, 785, 4647, 364, 641, 12601, 6, 702, 264, 73350, 364, 19773, 23569, 785, 4647, 364, 7188, 6, 702, 264, 73350, 364, 71585, 7830, 23569, 785, 4647, 364, 12020, 1222, 6, 702, 264, 73350, 364, 49772, 1182, 23569, 785, 4647, 364, 76418, 6, 702, 264, 73350, 364, 40, 11130, 23569, 785, 4647, 364, 26907, 6, 702, 264, 73350, 364, 623, 3092, 23569, 785, 4647, 364, 15474, 494, 6, 702, 264, 73350, 364, 22171, 577, 23569, 785, 4647, 364, 50437, 6, 702, 264, 73350, 364, 2753, 302, 399, 68, 23569, 785, 4647, 364, 51, 491, 6, 702, 264, 73350, 364, 37, 2853, 23569, 785, 4647, 364, 16646, 6, 702, 264, 73350, 364, 15220, 306, 23569, 785, 4647, 364, 36125, 679, 6, 702, 264, 73350, 364, 10048, 1826, 2757, 23569, 785, 4647, 364, 18284, 6, 702, 264, 73350, 364, 88467, 23569, 785, 4647, 364, 85215, 6, 702, 264, 73350, 364, 4896, 47638, 657, 23569, 785, 4647, 364, 45, 6044, 6, 702, 264, 73350, 364, 19861, 2397, 23569, 785, 4647, 364, 27529, 6, 702, 264, 73350, 364, 80350, 457, 23569, 785, 4647, 364, 5715, 6, 702, 264, 73350, 364, 3889, 1484, 1238, 23569, 785, 4647, 364, 36125, 679, 6, 702, 264, 73350, 364, 16970, 23569, 785, 4647, 364, 95027, 6, 702, 264, 73350, 364, 8304, 705, 23569, 785, 4647, 364, 24056, 6, 702, 264, 73350, 364, 2715, 3556, 23569, 785, 4647, 364, 60970, 6, 702, 264, 73350, 364, 2662, 58195, 23569, 785, 4647, 364, 24187, 6, 702, 264, 73350, 364, 31019, 23569, 785, 4647, 364, 26884, 6, 702, 264, 73350, 364, 32174, 6758, 23569, 785, 4647, 364, 4049, 6, 702, 264, 73350, 364, 1806, 1866, 23569, 785, 4647, 364, 6828, 523, 6, 702, 264, 73350, 364, 42800, 23569, 785, 4647, 364, 78413, 6, 702, 264, 73350, 364, 3945, 810, 23569, 785, 4647, 364, 37, 4101, 34434, 6, 702, 264, 73350, 364, 3889, 69, 38155, 29636, 2, 13383, 198, 2461, 3110, 11, 421, 279, 1196, 3239, 374, 330, 40, 2776, 1602, 8205, 497, 279, 4647, 498, 1265, 5244, 389, 374, 330, 10622, 3263, 10548, 311, 279, 85406, 3403, 11, 279, 4647, 330, 10622, 1, 702, 279, 85406, 330, 4923, 6657, 1, 323, 330, 95275, 497, 304, 429, 1973, 13, 1446, 1184, 311, 633, 73350, 2750, 369, 330, 4923, 6657, 1, 323, 330, 95275, 497, 1077, 594, 1977, 1846, 525, 220, 20, 323, 220, 21, 15576, 11, 323, 8649, 279, 1102, 315, 1846, 73350, 2750, 600, 1734, 13, 508, 20, 11, 220, 21, 60, 448, 220, 20, 320, 2024, 6657, 8, 1156, 1221, 220, 21, 320, 82597, 8, 2474, 429, 374, 279, 1973, 807, 4994, 304, 279, 1140, 315, 85406, 3403, 382, 2, 13852, 271, 2610, 1231, 1618, 825, 476, 803, 5746, 311, 7789, 448, 279, 1196, 3239, 382, 2610, 525, 3897, 448, 729, 32628, 2878, 366, 15918, 1472, 15918, 29, 11874, 9492, 510, 27, 15918, 397, 4913, 1313, 788, 330, 1688, 497, 330, 1688, 788, 5212, 606, 788, 330, 455, 51393, 7831, 3142, 497, 330, 4684, 788, 330, 1949, 279, 73350, 897, 369, 264, 73350, 7110, 77, 1986, 5666, 4675, 264, 897, 429, 95164, 311, 279, 2701, 4718, 12539, 25, 314, 2105, 13193, 11693, 314, 2105, 20339, 7831, 3142, 11693, 314, 2105, 1313, 11693, 7245, 11662, 16215, 7245, 2102, 11693, 7245, 37134, 7831, 5162, 16215, 7245, 4684, 11693, 7245, 785, 897, 369, 419, 73350, 86865, 38154, 7245, 1313, 11693, 7245, 1700, 16215, 7245, 6279, 11693, 508, 2105, 20339, 7831, 3142, 75104, 11035, 77, 497, 330, 13786, 788, 5212, 13193, 788, 5212, 20339, 7831, 788, 5212, 1313, 788, 330, 917, 497, 330, 2102, 788, 330, 37134, 7831, 497, 330, 4684, 788, 330, 785, 73350, 311, 633, 279, 897, 369, 1189, 38154, 330, 1313, 788, 330, 1700, 497, 330, 6279, 788, 4383, 20339, 7831, 7914, 330, 35499, 7903, 788, 895, 2137, 330, 6627, 788, 830, 11248, 4913, 1313, 788, 330, 1688, 497, 330, 1688, 788, 5212, 606, 788, 330, 23493, 51393, 7831, 9146, 497, 330, 4684, 788, 330, 28959, 279, 73350, 2750, 498, 30403, 369, 279, 4647, 429, 374, 9760, 311, 279, 1196, 3239, 7110, 77, 1986, 5666, 4675, 264, 897, 429, 95164, 311, 279, 2701, 4718, 12539, 25, 314, 2105, 13193, 11693, 314, 2105, 5630, 11693, 314, 2105, 1313, 11693, 7245, 6117, 16215, 7245, 2102, 11693, 7245, 7188, 16215, 7245, 4684, 11693, 7245, 7188, 86865, 38154, 7245, 1313, 11693, 7245, 1700, 16215, 7245, 6279, 11693, 508, 2105, 5630, 75104, 11035, 77, 497, 330, 13786, 788, 5212, 13193, 788, 5212, 20339, 7831, 9146, 788, 5212, 3615, 788, 5212, 1313, 788, 330, 11662, 14345, 330, 1313, 788, 330, 1653, 497, 330, 2102, 788, 330, 37134, 7831, 24979, 497, 330, 4684, 788, 330, 785, 73350, 2750, 12159, 311, 279, 4647, 369, 279, 1196, 3239, 1189, 38154, 330, 1313, 788, 330, 1700, 497, 330, 6279, 788, 4383, 20339, 7831, 9146, 7914, 330, 35499, 7903, 788, 895, 2137, 330, 6627, 788, 830, 11248, 522, 15918, 1339, 2461, 1817, 729, 1618, 11, 470, 264, 2951, 1633, 448, 729, 829, 323, 5977, 2878, 220, 151657, 151658, 11874, 9492, 510, 151657, 198, 4913, 606, 788, 366, 1688, 11494, 8066, 330, 16370, 788, 366, 2116, 56080, 40432, 31296, 151658, 151645, 198, 151644, 872, 198, 4340, 653, 9898, 10515, 311, 5382, 304, 4017, 91299, 30, 151645, 198, 151644, 77091, 198, 151667, 198, 32313, 11, 279, 1196, 374, 10161, 1246, 9898, 10515, 311, 5382, 304, 4017, 91299, 13, 6771, 752, 1401, 518, 279, 2500, 85406, 311, 1490, 892, 6174, 2578, 387, 9760, 13, 576, 85406, 10007, 2924, 364, 20170, 516, 364, 4923, 6657, 516, 364, 95275, 516, 364, 4923, 1571, 287, 516, 364, 4923, 6657, 516, 4992, 13, 1988, 279, 1196, 594, 3239, 374, 911, 33085, 11, 537, 279, 73350, 5961, 13, 2055, 1156, 11, 1558, 279, 3239, 2924, 279, 4647, 364, 10622, 69990, 6771, 752, 1779, 13, 576, 1196, 594, 3239, 374, 330, 4340, 653, 9898, 10515, 311, 5382, 304, 4017, 91299, 7521, 576, 4647, 364, 10622, 6, 374, 9733, 304, 279, 85406, 11, 714, 7196, 279, 4226, 4436, 944, 5961, 504, 429, 13, 13824, 11, 279, 85406, 1140, 5646, 364, 20170, 6, 323, 364, 95275, 516, 714, 279, 1196, 594, 3405, 374, 911, 33085, 13, 4354, 11, 279, 7600, 2727, 311, 990, 279, 2661, 85406, 311, 8649, 2750, 13, 576, 1196, 594, 3239, 2578, 537, 6286, 279, 73350, 364, 20170, 6, 714, 4751, 279, 4647, 364, 19963, 516, 714, 304, 279, 1140, 11, 364, 20170, 6, 374, 3042, 13, 2055, 8365, 279, 4226, 7460, 1667, 364, 20170, 6, 438, 279, 73350, 323, 1221, 279, 2750, 504, 894, 12579, 85406, 304, 279, 1140, 13, 13824, 11, 714, 279, 3110, 304, 279, 11221, 4933, 429, 279, 3110, 374, 330, 40, 2776, 1602, 8205, 1, 6388, 311, 85406, 1075, 330, 4923, 6657, 1, 323, 330, 95275, 3263, 1988, 304, 279, 3110, 11, 279, 1196, 594, 3239, 3207, 944, 6286, 279, 4647, 364, 82597, 516, 714, 7196, 279, 73350, 364, 82597, 6, 374, 1483, 13, 88190, 11, 419, 374, 264, 2699, 30615, 13, 6771, 752, 1779, 279, 3110, 1549, 13, 576, 3110, 594, 4396, 2550, 374, 508, 20, 11, 21, 60, 1380, 220, 20, 374, 364, 4923, 6657, 6, 323, 220, 21, 374, 364, 95275, 4427, 2055, 421, 279, 73350, 374, 364, 20170, 516, 807, 1401, 369, 279, 73350, 304, 279, 1140, 13, 13824, 11, 279, 85406, 1140, 5646, 364, 10622, 6, 323, 7196, 1008, 85406, 13, 6771, 752, 1779, 279, 2500, 85406, 1549, 13, 576, 3897, 85406, 1140, 8471, 448, 364, 16970, 6, 73350, 364, 2662, 28488, 516, 4992, 13, 576, 4647, 364, 20170, 6, 374, 3042, 11, 323, 4092, 311, 279, 5392, 594, 729, 11, 421, 279, 73350, 374, 364, 20170, 516, 1221, 279, 729, 1035, 633, 279, 12159, 73350, 897, 13, 1988, 304, 279, 3110, 11, 279, 1196, 594, 3239, 1521, 537, 2432, 279, 73350, 714, 9482, 705, 3709, 279, 2750, 504, 364, 20170, 6, 892, 572, 304, 279, 1140, 13, 2055, 8365, 279, 4226, 7460, 1667, 364, 20170, 6, 438, 279, 73350, 504, 279, 1140, 11, 1496, 421, 279, 3239, 3171, 944, 5961, 6286, 432, 13, 13824, 11, 714, 279, 3491, 2727, 311, 8649, 2750, 369, 1449, 73350, 429, 7952, 13, 2055, 421, 279, 3239, 374, 911, 9898, 69717, 11, 7196, 279, 73350, 364, 20170, 6, 374, 9760, 13, 1988, 279, 5042, 85406, 1140, 702, 10695, 1075, 364, 20170, 6, 323, 364, 95275, 516, 892, 2578, 387, 1483, 13, 576, 1196, 594, 3239, 2578, 1373, 1667, 279, 73350, 364, 20170, 516, 1221, 1779, 421, 894, 1008, 85406, 3000, 13, 6771, 752, 1744, 13, 576, 2500, 85406, 2924, 364, 4896, 33066, 516, 364, 3522, 516, 364, 81789, 517, 516, 4992, 13, 2055, 7196, 279, 4226, 7460, 279, 73350, 364, 20170, 6, 323, 1181, 12159, 897, 13, 576, 5392, 594, 729, 1035, 1779, 369, 429, 13, 4354, 11, 279, 3110, 304, 279, 3491, 594, 11221, 4933, 429, 1496, 421, 279, 1196, 594, 3239, 3171, 944, 2924, 264, 4647, 11, 279, 73350, 897, 374, 4429, 504, 279, 1140, 13, 1752, 3110, 11, 421, 279, 3239, 374, 911, 364, 10622, 516, 714, 279, 73350, 364, 39, 1793, 6, 374, 537, 304, 279, 1140, 11, 714, 279, 5042, 73350, 374, 3042, 13, 13824, 11, 902, 11, 3330, 1182, 11, 279, 2500, 85406, 2924, 364, 20170, 6, 323, 364, 95275, 4427, 2055, 8365, 279, 4226, 374, 311, 633, 279, 2750, 369, 364, 20170, 6, 323, 364, 95275, 516, 1496, 421, 279, 3239, 3171, 944, 6286, 1105, 13, 1988, 279, 1196, 594, 3239, 374, 911, 33085, 11, 892, 374, 264, 2155, 8544, 13, 4354, 11, 279, 11221, 1977, 311, 990, 279, 85406, 2661, 13, 2055, 7196, 279, 4226, 374, 311, 990, 279, 73350, 364, 20170, 6, 323, 1181, 12159, 897, 13, 1988, 1246, 1558, 429, 4946, 1119, 279, 3491, 30, 576, 1196, 594, 3239, 374, 1246, 9898, 10515, 311, 4017, 91299, 13, 576, 4226, 1035, 6761, 389, 279, 85406, 3897, 13, 8704, 279, 85406, 1140, 5646, 364, 20170, 516, 364, 95275, 516, 4992, 2572, 8365, 279, 4226, 7460, 1846, 2750, 13, 6771, 752, 1779, 279, 2500, 85406, 1549, 13, 576, 85406, 1140, 702, 364, 20170, 6, 323, 364, 95275, 6, 438, 3204, 10695, 13, 1988, 3783, 11, 3330, 518, 279, 3897, 85406, 11, 364, 20170, 6, 374, 3042, 13, 576, 729, 1035, 1896, 364, 20170, 6, 438, 279, 73350, 323, 470, 279, 2750, 315, 894, 12579, 85406, 13, 13824, 11, 902, 13, 576, 729, 594, 5029, 525, 330, 20339, 7831, 788, 330, 785, 73350, 311, 633, 279, 897, 369, 1189, 2055, 421, 279, 73350, 374, 364, 20170, 516, 279, 729, 1035, 633, 279, 73350, 897, 369, 429, 13, 1988, 304, 279, 3110, 11, 279, 5392, 572, 1483, 448, 364, 10622, 6, 438, 279, 73350, 11, 323, 279, 2033, 572, 72375, 13824, 11, 714, 279, 3110, 4933, 429, 304, 279, 1142, 1380, 279, 1196, 594, 3239, 374, 330, 40, 2776, 1602, 8205, 497, 279, 729, 374, 2598, 369, 364, 82597, 6, 323, 4675, 2750, 13, 2055, 7196, 304, 419, 1142, 11, 279, 4226, 1035, 21058, 41195, 364, 10622, 6, 323, 364, 82597, 6, 504, 279, 85406, 1140, 438, 2750, 13, 1988, 279, 5392, 594, 729, 4675, 264, 73350, 897, 438, 458, 7546, 13, 2055, 279, 4396, 5486, 374, 311, 1618, 279, 729, 369, 1817, 73350, 429, 9071, 279, 1196, 594, 3239, 13, 1988, 304, 279, 1196, 594, 3239, 11, 807, 525, 10161, 911, 33085, 11, 892, 2578, 537, 5961, 28143, 311, 264, 3151, 73350, 13, 4354, 11, 2474, 279, 1196, 594, 3239, 2578, 2924, 364, 10622, 6, 438, 279, 4647, 807, 525, 22023, 311, 11, 1221, 1667, 279, 85406, 1140, 594, 85406, 1035, 387, 4396, 13, 15277, 11, 279, 4226, 1035, 387, 311, 8649, 279, 2750, 315, 279, 85406, 3042, 304, 279, 1140, 13, 8704, 279, 1196, 594, 3239, 2578, 537, 5961, 6286, 279, 73350, 11, 714, 4092, 311, 279, 3491, 594, 11221, 11, 582, 614, 311, 1896, 279, 73350, 2750, 504, 279, 1140, 13, 2055, 8365, 279, 4226, 7460, 3709, 279, 73350, 2750, 369, 894, 85406, 5435, 311, 279, 3239, 13, 1988, 279, 3239, 374, 911, 9898, 69717, 311, 4017, 91299, 11, 892, 2578, 537, 614, 264, 2118, 73350, 3650, 369, 364, 10622, 516, 364, 82597, 516, 4992, 13, 4354, 11, 2474, 364, 10622, 6, 374, 3042, 304, 279, 85406, 11, 7196, 279, 4226, 374, 508, 20, 11, 21, 1125, 25538, 1846, 525, 279, 9892, 304, 279, 73350, 1140, 13, 13824, 11, 714, 279, 3110, 2033, 572, 508, 20, 11, 21, 60, 448, 220, 20, 320, 2024, 6657, 8, 1156, 1221, 220, 21, 320, 82597, 568, 2055, 7196, 421, 279, 85406, 1140, 702, 364, 10622, 6, 323, 364, 82597, 6, 518, 9892, 220, 20, 323, 220, 21, 11, 1221, 279, 4226, 1035, 387, 1846, 13, 15277, 11, 1496, 3498, 279, 1196, 594, 3239, 4436, 944, 6896, 364, 10622, 516, 279, 17847, 1265, 1896, 279, 73350, 2750, 504, 279, 1140, 13, 31040, 11, 279, 4226, 1035, 387, 279, 9892, 304, 279, 73350, 1140, 13, 1988, 358, 1184, 311, 7683, 13, 21131, 518, 279, 2500, 85406, 1140, 11, 364, 20170, 6, 374, 279, 1156, 73350, 304, 279, 1140, 13, 6771, 752, 1779, 1549, 13, 13824, 11, 902, 13, 6771, 594, 1401, 518, 279, 1140, 1549, 1447, 785, 2500, 85406, 525, 10007, 304, 1973, 13, 576, 1156, 73350, 374, 364, 16970, 6, 73350, 364, 2662, 28488, 516, 1221, 364, 3522, 6, 73350, 364, 4896, 33066, 516, 364, 5002, 6, 364, 693, 586, 5838, 516, 4992, 13, 2055, 279, 1973, 304, 279, 1140, 374, 25, 4710, 12, 22228, 11397, 3874, 33066, 715, 12, 5264, 11397, 66553, 5838, 715, 12, 30841, 11397, 1644, 307, 715, 12, 11461, 11397, 467, 18504, 715, 12, 87394, 11397, 11992, 1182, 715, 12, 22228, 11397, 30526, 517, 715, 12, 81277, 11397, 25348, 715, 12, 21862, 88, 11397, 78232, 80049, 715, 12, 57321, 11397, 3539, 2583, 715, 12, 6059, 11397, 4636, 12402, 715, 12, 67487, 11397, 472, 51978, 715, 12, 6059, 11397, 87188, 715, 12, 44679, 11397, 72195, 715, 12, 22752, 11397, 40088, 77873, 715, 12, 12826, 11397, 63580, 715, 12, 8658, 11397, 12023, 715, 12, 32745, 11397, 66863, 715, 12, 547, 22945, 11397, 17888, 2338, 591, 715, 12, 1333, 2939, 11397, 7828, 2377, 715, 12, 13452, 11397, 20738, 715, 12, 3138, 11397, 34221, 715, 12, 21862, 88, 11397, 8647, 29123, 9193, 715, 12, 15021, 11397, 1581, 94204, 715, 12, 8126, 11397, 434, 1641, 715, 12, 81277, 11397, 28101, 715, 12, 32260, 11397, 24704, 715, 12, 30681, 11397, 4148, 14378, 715, 12, 69622, 11397, 6852, 1659, 715, 12, 30841, 11397, 1581, 25172, 657, 715, 12, 54291, 11397, 386, 33917, 715, 12, 17288, 11397, 3240, 3187, 715, 12, 3892, 32066, 11397, 32260, 715, 12, 39261, 11397, 20718, 6125, 715, 12, 39261, 11397, 20718, 6125, 715, 12, 57321, 11397, 74274, 88, 715, 12, 21851, 567, 398, 11397, 4553, 5276, 715, 12, 3557, 11397, 81857, 715, 12, 30681, 11397, 1230, 56521, 715, 12, 69208, 11397, 794, 283, 1782, 471, 291, 198, 12, 21851, 567, 398, 11397, 25462, 198, 12, 69622, 11397, 26951, 3834, 198, 12, 41151, 11397, 30936, 198, 12, 44856, 11397, 2055, 7741, 198, 12, 30681, 11397, 17965, 48909, 198, 12, 13047, 11397, 48327, 198, 12, 21851, 567, 398, 11397, 3089, 482, 1717, 198, 12, 74346, 11397, 54782, 198, 12, 26099, 11397, 5331, 21366, 198, 12, 28369, 3866, 11397, 54253, 198, 12, 10621, 11397, 50787, 198, 12, 8107, 11397, 10672, 28480, 198, 12, 13452, 11397, 41481, 198, 12, 18609, 11397, 30022, 371, 1717, 198, 12, 10698, 11397, 14671, 198, 12, 12190, 11397, 41124, 586, 198, 12, 1198, 500, 3819, 11397, 9447, 337, 198, 12, 46296, 11397, 42677, 198, 12, 24079, 11397, 93867, 198, 12, 82311, 11397, 41327, 849, 533, 198, 12, 12258, 11397, 422, 355, 7891, 198, 12, 5124, 11397, 17582, 13847, 198, 12, 3319, 76, 11397, 16887, 380, 74225, 198, 12, 44679, 11397, 92945, 198, 12, 13317, 14295, 679, 11397, 12166, 36145, 198, 12, 65426, 11397, 41222, 198, 12, 41164, 11397, 7420, 1717, 198, 12, 28369, 3866, 11397, 479, 2672, 20058, 198, 12, 87293, 11397, 1032, 25976, 198, 12, 38147, 11397, 90399, 398, 198, 12, 9590, 11397, 1230, 704, 2181, 198, 12, 13317, 14295, 679, 11397, 55775, 343, 5269, 198, 12, 10698, 11397, 8126, 258, 6704, 198, 12, 12826, 11397, 12826, 13464, 198, 12, 30394, 11397, 1298, 87, 3426, 198, 12, 547, 22945, 11397, 13222, 974, 198, 12, 40756, 11397, 362, 1831, 198, 12, 11461, 11397, 10127, 12280, 198, 12, 70296, 11397, 24742, 480, 198, 12, 30394, 11397, 758, 36743, 198, 12, 23185, 11397, 41220, 88, 198, 12, 40756, 11397, 3557, 198, 12, 24079, 11397, 30198, 85, 2611, 198, 12, 6059, 11397, 26410, 198, 12, 13263, 11397, 18586, 287, 198, 12, 4388, 11896, 11397, 431, 1129, 307, 198, 12, 12041, 11397, 8126, 24657, 198, 12, 14994, 11397, 18157, 57410, 198, 12, 3319, 76, 11397, 1230, 81, 42335, 198, 12, 22228, 11397, 19193, 198, 12, 5264, 11397, 16136, 198, 12, 13452, 11397, 51083, 198, 12, 13263, 11397, 1532, 198, 12, 39560, 408, 11397, 10423, 198, 12, 547, 22945, 11397, 25590, 782, 198, 12, 41164, 11397, 2869, 604, 198, 12, 44856, 11397, 422, 1121, 198, 12, 41102, 11397, 14722, 27561, 198, 12, 6285, 11397, 4148, 14378, 198, 12, 23355, 11397, 92631, 1262, 198, 12, 65426, 11397, 13975, 198, 12, 18609, 11397, 8658, 198, 12, 20829, 11397, 50982, 51186, 198, 12, 46296, 11397, 11203, 3117, 198, 12, 8126, 11397, 422, 617, 198, 12, 41102, 11397, 21938, 287, 198, 12, 58089, 34434, 11397, 19420, 2408, 198, 12, 23355, 11397, 393, 4673, 198, 12, 6285, 11397, 15733, 198, 12, 11232, 11397, 431, 20926, 198, 12, 79548, 11397, 2308, 28013, 198, 12, 8658, 11397, 1417, 6441, 198, 12, 22752, 11397, 31399, 198, 12, 8007, 11397, 2502, 6657, 198, 12, 8658, 320, 4765, 8, 11397, 44391, 198, 12, 20829, 11397, 50982, 51186, 198, 12, 67487, 11397, 25803, 1262, 198, 12, 41151, 11397, 4371, 27304, 198, 12, 18609, 11397, 393, 16459, 198, 12, 13047, 11397, 1200, 719, 198, 12, 11232, 11397, 22138, 198, 12, 32260, 11397, 2573, 1419, 198, 12, 12041, 11397, 10698, 198, 12, 35831, 11397, 6065, 23646, 198, 12, 44856, 11397, 9959, 24867, 198, 12, 26099, 11397, 11732, 1262, 198, 12, 8007, 11397, 45763, 198, 12, 12258, 11397, 350, 1952, 65, 26522, 198, 12, 8658, 320, 4765, 8, 11397, 2988, 5742, 88, 198, 12, 4388, 11896, 11397, 393, 2185, 306, 198, 12, 24079, 11397, 60189, 198, 12, 22752, 11397, 5512, 198, 12, 54291, 11397, 328, 1751, 307, 198, 12, 15021, 11397, 9414, 1717, 198, 12, 425, 3248, 11397, 362, 917, 306, 198, 12, 3557, 11397, 14785, 19430, 198, 12, 3892, 32066, 11397, 75818, 198, 12, 79548, 11397, 2502, 3249, 198, 12, 422, 617, 11397, 8126, 198, 12, 21862, 88, 11397, 13630, 198, 12, 82311, 11397, 1198, 14318, 198, 12, 8107, 11397, 57626, 198, 12, 758, 12601, 11397, 50441, 66, 3073, 198, 12, 38147, 11397, 24861, 3173, 198, 12, 13263, 11397, 21671, 39104, 198, 12, 425, 3248, 11397, 14413, 590, 292, 198, 12, 10621, 11397, 38569, 398, 198, 12, 38147, 11397, 6852, 2596, 812, 198, 12, 39560, 408, 11397, 5994, 705, 198, 12, 6285, 11397, 56451, 198, 12, 31185, 408, 11397, 422, 2142, 629, 198, 12, 14994, 11397, 14671, 198, 12, 5124, 11397, 2055, 3249, 198, 12, 3138, 11397, 61830, 65, 198, 12, 3319, 76, 11397, 6512, 51451, 198, 12, 8107, 11397, 8325, 12462, 198, 12, 8658, 320, 4765, 8, 11397, 425, 765, 4246, 198, 12, 40756, 11397, 758, 35921, 349, 198, 12, 54291, 11397, 2055, 2181, 198, 12, 35831, 11397, 1230, 79, 19931, 928, 198, 12, 67487, 11397, 12041, 198, 12, 31185, 408, 11397, 422, 2142, 629, 198, 12, 1198, 500, 3819, 11397, 393, 18704, 198, 12, 5264, 11397, 1230, 75940, 198, 12, 422, 617, 11397, 2502, 3850, 198, 12, 14994, 11397, 29793, 198, 12, 87394, 11397, 45643, 198, 12, 17288, 11397, 472, 14980, 198, 12, 758, 12601, 11397, 8599, 198, 12, 13047, 11397, 65595, 198, 12, 87394, 11397, 14785, 1182, 198, 12, 23185, 11397, 358, 11130, 198, 12, 11232, 11397, 794, 3092, 198, 12, 9726, 494, 11397, 12741, 577, 198, 12, 12826, 11397, 2308, 302, 399, 68, 198, 12, 74346, 11397, 37625, 198, 12, 20829, 11397, 92387, 198, 12, 48327, 11397, 6299, 1826, 2757, 198, 12, 10621, 11397, 35845, 198, 12, 30841, 11397, 3874, 47638, 657, 198, 12, 82311, 11397, 17471, 2397, 198, 12, 9590, 11397, 26063, 457, 198, 12, 70296, 11397, 3616, 1484, 1238, 198, 12, 48327, 11397, 12190, 198, 12, 46296, 11397, 14822, 705, 198, 12, 39261, 11397, 3199, 3556, 198, 12, 32745, 11397, 4371, 58195, 198, 12, 12041, 11397, 23577, 198, 12, 44679, 11397, 17288, 6758, 198, 12, 3557, 11397, 1230, 1866, 198, 12, 69208, 11397, 46002, 198, 12, 87293, 11397, 8550, 810, 198, 12, 58089, 34434, 11397, 71448, 38155, 271, 7039, 11, 3330, 518, 279, 85406, 11, 279, 364, 10622, 6, 374, 304, 279, 364, 13911, 320, 4765, 21636, 73350, 13, 13824, 11, 902, 11, 3330, 518, 279, 1140, 25, 364, 13911, 320, 4765, 21636, 702, 73350, 364, 87208, 4427, 13824, 11, 1077, 752, 1779, 1549, 13, 576, 4647, 364, 13911, 6, 702, 264, 73350, 364, 14008, 4427, 13824, 11, 902, 11, 3330, 518, 279, 1140, 1549, 13, 576, 364, 13911, 6, 4647, 702, 264, 73350, 364, 14008, 6, 323, 364, 2016, 6441, 4427, 13824, 11, 902, 11, 1077, 752, 1779, 1817, 4343, 1549, 1447, 785, 2500, 85406, 1140, 1447, 16, 13, 22228, 11397, 3874, 33066, 198, 17, 13, 5264, 11397, 66553, 5838, 198, 18, 13, 30841, 11397, 1644, 307, 198, 19, 13, 11461, 11397, 467, 18504, 198, 20, 13, 87394, 11397, 11992, 1182, 198, 21, 13, 22228, 11397, 30526, 517, 198, 22, 13, 81277, 11397, 25348, 198, 23, 13, 21862, 88, 11397, 78232, 80049, 198, 24, 13, 57321, 11397, 3539, 2583, 198, 16, 15, 13, 6059, 11397, 4636, 12402, 198, 16, 16, 13, 67487, 11397, 472, 51978, 198, 16, 17, 13, 6059, 11397, 87188, 198, 16, 18, 13, 44679, 11397, 72195, 198, 16, 19, 13, 22752, 11397, 40088, 77873, 198, 16, 20, 13, 12826, 11397, 63580, 198, 16, 21, 13, 8658, 11397, 12023, 198, 16, 22, 13, 32745, 11397, 66863, 198, 16, 23, 13, 547, 22945, 11397, 17888, 2338, 591, 198, 16, 24, 13, 1333, 2939, 11397, 7828, 2377, 198, 17, 15, 13, 13452, 11397, 20738, 198, 17, 16, 13, 3138, 11397, 34221, 198, 17, 17, 13, 21862, 88, 11397, 8647, 29123, 9193, 198, 17, 18, 13, 15021, 11397, 1581, 94204, 198, 17, 19, 13, 8126, 11397, 434, 1641, 198, 17, 20, 13, 81277, 11397, 28101, 198, 17, 21, 13, 32260, 11397, 24704, 198, 17, 22, 13, 30681, 11397, 4148, 14378, 198, 17, 23, 13, 69622, 11397, 6852, 1659, 198, 17, 24, 13, 30841, 11397, 1581, 25172, 657, 198, 18, 15, 13, 54291, 11397, 386, 33917, 198, 18, 16, 13, 17288, 11397, 3240, 3187, 198, 18, 17, 13, 3892, 32066, 11397, 32260, 198, 18, 18, 13, 39261, 11397, 20718, 6125, 198, 18, 19, 13, 39261, 11397, 20718, 6125, 198, 18, 20, 13, 57321, 11397, 74274, 88, 198, 18, 21, 13, 21851, 567, 398, 11397, 4553, 5276, 198, 18, 22, 13, 3557, 11397, 81857, 198, 18, 23, 13, 30681, 11397, 1230, 56521, 198, 18, 24, 13, 69208, 11397, 794, 283, 1782, 471, 291, 198, 19, 15, 13, 21851, 567, 398, 11397, 25462, 198, 19, 16, 13, 69622, 11397, 26951, 3834, 198, 19, 17, 13, 41151, 11397, 30936, 198, 19, 18, 13, 44856, 11397, 2055, 7741, 198, 19, 19, 13, 30681, 11397, 17965, 48909, 198, 19, 20, 13, 13047, 11397, 48327, 198, 19, 21, 13, 21851, 567, 398, 11397, 3089, 482, 1717, 198, 19, 22, 13, 74346, 11397, 54782, 198, 19, 23, 13, 26099, 11397, 5331, 21366, 198, 19, 24, 13, 28369, 3866, 11397, 54253, 198, 20, 15, 13, 10621, 11397, 50787, 198, 20, 16, 13, 8107, 11397, 10672, 28480, 198, 20, 17, 13, 13452, 11397, 41481, 198, 20, 18, 13, 18609, 11397, 30022, 371, 1717, 198, 20, 19, 13, 10698, 11397, 14671, 198, 20, 20, 13, 12190, 11397, 41124, 586, 198, 20, 21, 13, 1198, 500, 3819, 11397, 9447, 337, 198, 20, 22, 13, 46296, 11397, 42677, 198, 20, 23, 13, 24079, 11397, 93867, 198, 20, 24, 13, 82311, 11397, 41327, 849, 533, 198, 21, 15, 13, 12258, 11397, 422, 355, 7891, 198, 21, 16, 13, 5124, 11397, 17582, 13847, 198, 21, 17, 13, 3319, 76, 11397, 16887, 380, 74225, 198, 21, 18, 13, 44679, 11397, 92945, 198, 21, 19, 13, 13317, 14295, 679, 11397, 12166, 36145, 198, 21, 20, 13, 65426, 11397, 41222, 198, 21, 21, 13, 41164, 11397, 7420, 1717, 198, 21, 22, 13, 28369, 3866, 11397, 479, 2672, 20058, 198, 21, 23, 13, 87293, 11397, 1032, 25976, 198, 21, 24, 13, 38147, 11397, 90399, 398, 198, 22, 15, 13, 9590, 11397, 1230, 704, 2181, 198, 22, 16, 13, 13317, 14295, 679, 11397, 55775, 343, 5269, 198, 22, 17, 13, 10698, 11397, 8126, 258, 6704, 198, 22, 18, 13, 12826, 11397, 12826, 13464, 198, 22, 19, 13, 30394, 11397, 1298, 91728, 198, 22, 20, 13, 547, 22945, 11397, 13222, 974, 198, 22, 21, 13, 40756, 11397, 362, 1831, 198, 22, 22, 13, 11461, 11397, 10127, 12280, 198, 22, 23, 13, 70296, 11397, 24742, 480, 198, 22, 24, 13, 30394, 11397, 758, 36743, 624, 23, 15, 13, 23185, 11397, 41220, 88, 624, 32313, 11, 1052, 594, 264, 85105, 1588, 13, 364, 76418, 6, 702, 264, 73350, 364, 37, 41502, 88, 4427, 7281, 11, 364, 13911, 320, 4765, 21636, 702, 264, 73350, 364, 87208, 4427, 4695, 11, 1182, 311, 279, 1196, 594, 3239, 25, 330, 4340, 653, 9898, 10515, 311, 5382, 304, 4017, 91299, 7521, 576, 4647, 364, 10622, 6, 4977, 311, 387, 5961, 9733, 1588, 13, 21131, 518, 279, 85406, 11, 364, 20170, 6, 374, 304, 279, 1140, 11, 714, 304, 279, 3897, 85406, 1140, 11, 364, 20170, 6, 374, 537, 10007, 13, 13824, 11, 3330, 518, 279, 1140, 1549, 11, 1283, 364, 76418, 6, 4041, 364, 29185, 516, 323, 773, 389, 13, 38478, 11, 8365, 364, 10622, 6, 374, 304, 2441, 2309, 13, 38478, 11, 8365, 279, 4647, 364, 10622, 6, 374, 3042, 304, 279, 85406, 1140, 13, 13824, 11, 358, 2776, 3709, 21815, 13, 6771, 752, 1779, 1817, 73350, 1549, 13, 13824, 11, 902, 11, 279, 364, 10622, 6, 374, 537, 5230, 304, 279, 1140, 13, 576, 4343, 369, 364, 10622, 6, 1035, 614, 1012, 220, 16, 21, 11, 714, 304, 279, 1140, 3403, 11, 429, 594, 2309, 220, 16, 21, 13, 13824, 11, 902, 11, 3330, 518, 1447, 17249, 25, 8658, 11397, 73350, 25, 12023, 198, 3391, 4647, 25, 32745, 11397, 15817, 198, 3391, 4647, 25, 547, 22945, 11397, 17888, 2338, 591, 198, 4416, 364, 10622, 6, 374, 537, 3042, 30, 5005, 3170, 374, 364, 20170, 6, 9733, 304, 279, 85406, 1140, 30, 8670, 11, 902, 11, 3330, 1182, 11, 279, 4024, 3491, 5114, 2727, 429, 279, 2500, 85406, 1140, 5646, 364, 10622, 6, 714, 1052, 594, 264, 16523, 304, 847, 6358, 13, 6771, 752, 1779, 1549, 13, 576, 1196, 594, 2500, 85406, 1140, 1447, 22464, 1549, 11, 1283, 678, 279, 10695, 11, 1052, 594, 902, 364, 20170, 6, 4647, 13, 9211, 279, 3793, 1075, 364, 10159, 6, 323, 364, 13911, 6, 614, 85406, 13, 15277, 11, 279, 1196, 594, 3239, 911, 9898, 69717, 311, 4017, 91299, 374, 537, 5961, 9761, 553, 894, 315, 279, 85406, 304, 279, 1140, 13, 1988, 279, 17847, 374, 9966, 311, 990, 279, 85406, 2661, 13, 15277, 11, 1052, 2578, 387, 264, 16523, 304, 847, 6358, 13, 38478, 11, 8365, 364, 10622, 6, 374, 949, 315, 264, 73350, 304, 279, 1140, 13, 6771, 752, 1779, 1549, 15516, 382, 14190, 11, 279, 1196, 594, 2500, 85406, 1140, 5646, 25, 364, 13911, 320, 4765, 21636, 73350, 364, 87208, 4427, 5005, 1008, 3793, 13, 13824, 11, 902, 11, 1817, 4343, 374, 264, 4647, 323, 1181, 73350, 13, 2055, 279, 1156, 4343, 374, 364, 3522, 6, 73350, 364, 4896, 33066, 516, 323, 773, 389, 13, 13824, 11, 8365, 1052, 594, 458, 1465, 304, 279, 1140, 13, 38478, 11, 8365, 279, 17847, 1265, 1896, 279, 73350, 364, 20170, 6, 438, 264, 4647, 714, 432, 594, 7402, 13, 8704, 279, 1196, 594, 3239, 3171, 944, 6286, 364, 10622, 6, 5961, 11, 714, 279, 3383, 7460, 1667, 279, 2661, 85406, 11, 7196, 279, 4226, 374, 429, 1052, 594, 902, 73350, 897, 11, 714, 429, 594, 17367, 13, 38478, 11, 279, 17847, 1231, 614, 3897, 678, 85406, 11, 2670, 364, 20170, 516, 714, 304, 279, 3110, 11, 279, 1196, 594, 3239, 5230, 364, 82597, 6, 892, 374, 264, 73350, 304, 279, 1140, 13, 18765, 279, 17847, 374, 9966, 311, 1896, 279, 73350, 364, 20170, 6, 504, 279, 1140, 11, 1496, 3498, 432, 594, 537, 3042, 13, 1988, 419, 374, 51033, 13, 4710, 14190, 11, 902, 11, 3330, 1182, 311, 279, 3110, 3897, 304, 279, 3491, 5114, 25, 4710, 13314, 25, 1752, 3110, 11, 421, 279, 1196, 3239, 374, 330, 40, 2776, 1602, 8205, 497, 279, 4647, 498, 1265, 5244, 389, 374, 330, 10622, 3263, 10548, 311, 279, 85406, 3403, 11, 279, 4647, 330, 10622, 1, 702, 279, 85406, 330, 4923, 6657, 1, 323, 330, 95275, 497, 304, 429, 1973, 382, 4416, 279, 17847, 374, 9966, 311, 1618, 279, 729, 369, 279, 73350, 364, 10622, 6, 323, 470, 1181, 897, 13, 1988, 304, 279, 3897, 85406, 1140, 11, 1052, 374, 902, 364, 10622, 4427, 15277, 11, 8365, 1052, 594, 264, 85105, 11, 323, 364, 10622, 6, 374, 3042, 13, 38478, 11, 8365, 364, 13911, 320, 4765, 21636, 702, 264, 73350, 364, 87208, 516, 892, 374, 264, 3204, 9144, 13, 6771, 752, 1779, 279, 1140, 1549, 1447, 22464, 518, 279, 1140, 11, 279, 3793, 525, 1447, 16, 13, 22228, 11397, 3874, 33066, 198, 1112, 715, 1112, 715, 1112, 715, 21, 15, 13, 12258, 11397, 422, 355, 7891, 198, 21, 16, 13, 5124, 11397, 17582, 13847, 198, 21, 17, 13, 3319, 76, 11397, 16887, 380, 74225, 198, 21, 18, 13, 44679, 11397, 92945, 198, 21, 19, 13, 13317, 14295, 679, 11397, 12166, 36145, 198, 21, 20, 13, 65426, 11397, 41222, 198, 21, 21, 13, 41164, 11397, 7420, 1717, 198, 21, 22, 13, 28369, 3866, 11397, 479, 2672, 20058, 198, 21, 23, 13, 87293, 11397, 1032, 25976, 198, 21, 24, 13, 38147, 11397, 90399, 398, 198, 22, 15, 13, 9590, 11397, 1230, 704, 2181, 198, 22, 16, 13, 13317, 14295, 679, 11397, 55775, 343, 5269, 198, 22, 17, 13, 10698, 11397, 8126, 258, 6704, 198, 22, 18, 13, 12826, 11397, 12826, 13464, 198, 22, 19, 13, 30394, 11397, 1298, 91728, 198, 22, 20, 13, 547, 22945, 11397, 13222, 974, 198, 22, 21, 13, 40756, 11397, 362, 1831, 198, 22, 22, 13, 11461, 11397, 10127, 12280, 198, 22, 23, 13, 70296, 11397, 24742, 480, 198, 22, 24, 13, 30394, 11397, 758, 36743, 624, 23, 15, 13, 23185, 11397, 41220, 88, 382, 24765, 11, 1052, 594, 264, 4647, 364, 76418, 6, 323, 1181, 73350, 364, 37, 41502, 88, 4427, 2055, 364, 87072, 6, 374, 304, 279, 1140, 438, 264, 73350, 13, 2055, 1182, 311, 279, 1196, 594, 3239, 911, 9898, 69717, 311, 4017, 91299, 13, 8704, 364, 10622, 6, 374, 537, 304, 279, 1140, 11, 8365, 279, 4226, 374, 429, 1052, 525, 902, 85406, 12579, 11, 714, 429, 594, 17367, 13, 38478, 11, 8365, 279, 17847, 702, 458, 1465, 304, 279, 73350, 1140, 13, 16246, 429, 279, 1196, 3897, 279, 85406, 1140, 11, 323, 2474, 279, 3110, 5230, 364, 82597, 6, 892, 702, 264, 73350, 364, 4923, 6657, 516, 8365, 364, 10622, 6, 374, 12440, 10007, 13, 1988, 4092, 311, 279, 3897, 1140, 11, 1052, 374, 902, 364, 10622, 6, 4343, 13, 15277, 11, 279, 17847, 2578, 614, 311, 14198, 419, 438, 1052, 525, 902, 9760, 85406, 11, 714, 429, 594, 537, 4396, 13, 4710, 1655, 419, 1459, 11, 432, 594, 3204, 429, 279, 17847, 702, 1865, 264, 16523, 13, 38478, 11, 8365, 279, 4647, 364, 10622, 6, 374, 3042, 438, 949, 315, 1008, 85406, 13, 21131, 1549, 11, 1283, 364, 13911, 320, 4765, 21636, 279, 1790, 4647, 374, 364, 25307, 6, 73350, 364, 34609, 57410, 4427, 2308, 364, 10622, 6, 476, 364, 82597, 4427, 4710, 1986, 13230, 429, 279, 1196, 594, 3239, 2578, 387, 2664, 20068, 553, 1667, 279, 73350, 364, 20170, 6, 1496, 421, 432, 594, 537, 304, 279, 3897, 1140, 13, 1988, 2474, 364, 10622, 6, 4436, 944, 3042, 11, 419, 374, 70140, 13, 15277, 11, 279, 4396, 5486, 374, 311, 31094, 429, 1052, 525, 902, 8415, 85406, 311, 8649, 2750, 504, 279, 1196, 594, 3239, 13, 4354, 11, 419, 22296, 29995, 279, 3110, 3897, 13, 4710, 22043, 279, 21340, 11, 8365, 279, 4396, 1815, 374, 311, 1618, 279, 729, 369, 279, 4647, 364, 20170, 6, 304, 279, 85406, 1140, 11, 1496, 421, 432, 594, 537, 3042, 13, 38478, 11, 279, 17847, 1265, 1779, 421, 364, 10622, 6, 374, 304, 279, 1140, 13, 1988, 4092, 311, 279, 3897, 1140, 11, 364, 10622, 6, 374, 537, 3042, 13, 15277, 11, 279, 4226, 304, 419, 1142, 1035, 387, 429, 1052, 525, 902, 2697, 73350, 2750, 13, 4354, 11, 419, 374, 537, 3204, 11, 438, 279, 3110, 14807, 429, 264, 73350, 6724, 13, 15277, 11, 358, 1969, 614, 13628, 364, 10622, 6, 304, 279, 1140, 13, 6771, 752, 1779, 3055, 803, 1447, 22464, 1526, 279, 1140, 11, 1283, 364, 13911, 320, 4765, 21636, 4041, 364, 25307, 6, 83221, 364, 34609, 57410, 516, 1221, 364, 4454, 6, 83221, 364, 7839, 14378, 516, 4992, 13, 2308, 364, 10622, 6, 476, 364, 82597, 4427, 2055, 8365, 1052, 594, 264, 16523, 304, 279, 3897, 1140, 13, 1988, 25538, 279, 1140, 374, 4396, 11, 323, 364, 10622, 6, 374, 3042, 11, 714, 537, 304, 279, 2661, 9892, 11, 279, 17847, 1969, 10354, 13, 4710, 22043, 429, 279, 3110, 5646, 330, 10622, 1, 438, 264, 73350, 323, 6696, 2750, 11, 8365, 279, 17847, 1265, 10354, 27079, 11, 25538, 429, 364, 10622, 6, 374, 949, 315, 279, 85406, 1140, 13, 15277, 11, 1496, 3498, 279, 1140, 3171, 944, 614, 364, 10622, 516, 279, 17847, 1265, 9658, 432, 6724, 13, 15277, 11, 279, 17847, 1035, 1618, 279, 729, 448, 279, 73350, 364, 10622, 6, 323, 470, 279, 12159, 897, 624, 151668, 271, 151657, 198, 4913, 606, 788, 330, 455, 51393, 7831, 3142, 497, 330, 16370, 788, 5212, 20339, 7831, 788, 330, 10622, 95642, 151658, 151645], "output_prompt_token_ids": [151644, 8948, 198, 2, 38297, 198, 2610, 525, 458, 32189, 8315, 13, 1446, 686, 387, 3897, 264, 1196, 3239, 323, 498, 1184, 311, 990, 279, 7375, 3897, 311, 498, 311, 8649, 1140, 315, 73350, 2750, 13, 1446, 686, 387, 3897, 448, 264, 15493, 315, 85406, 369, 1817, 13, 1752, 1817, 4647, 11, 4486, 1490, 421, 432, 594, 9760, 311, 279, 1196, 3239, 323, 633, 279, 2750, 369, 1817, 73350, 438, 8311, 13, 1446, 1969, 633, 323, 8649, 279, 2750, 369, 1449, 73350, 429, 7952, 304, 419, 1140, 13, 5209, 2550, 73350, 2750, 304, 279, 1973, 807, 4994, 304, 279, 2500, 85406, 3685, 382, 2, 16136, 85406, 198, 785, 4647, 364, 16970, 6, 702, 264, 73350, 364, 2662, 28488, 23569, 785, 4647, 364, 3522, 6, 702, 264, 73350, 364, 4896, 33066, 23569, 785, 4647, 364, 5002, 6, 702, 264, 73350, 364, 693, 586, 5838, 23569, 785, 4647, 364, 85215, 6, 702, 264, 73350, 364, 32, 1869, 23569, 785, 4647, 364, 17082, 6, 702, 264, 73350, 364, 54, 18504, 23569, 785, 4647, 364, 12020, 1222, 6, 702, 264, 73350, 364, 8137, 1182, 23569, 785, 4647, 364, 3522, 6, 702, 264, 73350, 364, 81789, 517, 23569, 785, 4647, 364, 1001, 865, 6, 702, 264, 73350, 364, 68457, 23569, 785, 4647, 364, 34291, 88, 6, 702, 264, 73350, 364, 38, 1574, 80049, 23569, 785, 4647, 364, 49, 1384, 6, 702, 264, 73350, 364, 7339, 2583, 23569, 785, 4647, 364, 10159, 6, 702, 264, 73350, 364, 6025, 12402, 23569, 785, 4647, 364, 94984, 6, 702, 264, 73350, 364, 39, 51978, 23569, 785, 4647, 364, 10159, 6, 702, 264, 73350, 364, 30092, 31509, 23569, 785, 4647, 364, 26884, 6, 702, 264, 73350, 364, 30896, 291, 23569, 785, 4647, 364, 41198, 6, 702, 264, 73350, 364, 47, 3748, 77873, 23569, 785, 4647, 364, 50437, 6, 702, 264, 73350, 364, 33648, 9287, 23569, 785, 4647, 364, 13911, 6, 702, 264, 73350, 364, 14008, 23569, 785, 4647, 364, 60970, 6, 702, 264, 73350, 364, 1092, 52899, 23569, 785, 4647, 364, 52, 22945, 6, 702, 264, 73350, 364, 76335, 2338, 591, 23569, 785, 4647, 364, 15474, 494, 6, 702, 264, 73350, 364, 21666, 2377, 23569, 785, 4647, 364, 32887, 6, 702, 264, 73350, 364, 24703, 23569, 785, 4647, 364, 2324, 6, 702, 264, 73350, 364, 87445, 23569, 785, 4647, 364, 34291, 88, 6, 702, 264, 73350, 364, 21988, 29123, 9193, 23569, 785, 4647, 364, 28320, 6, 702, 264, 73350, 364, 1912, 94204, 23569, 785, 4647, 364, 10344, 6, 702, 264, 73350, 364, 37, 1641, 23569, 785, 4647, 364, 1001, 865, 6, 702, 264, 73350, 364, 64469, 23569, 785, 4647, 364, 17507, 6, 702, 264, 73350, 364, 39838, 23569, 785, 4647, 364, 59665, 6, 702, 264, 73350, 364, 7839, 14378, 23569, 785, 4647, 364, 93088, 6, 702, 264, 73350, 364, 7442, 1659, 23569, 785, 4647, 364, 85215, 6, 702, 264, 73350, 364, 1912, 25172, 657, 23569, 785, 4647, 364, 36485, 6, 702, 264, 73350, 364, 44, 33917, 23569, 785, 4647, 364, 32174, 6, 702, 264, 73350, 364, 6828, 3187, 23569, 785, 4647, 364, 2620, 32066, 6, 702, 264, 73350, 364, 17507, 23569, 785, 4647, 364, 24056, 6, 702, 264, 73350, 364, 51, 3191, 291, 23569, 785, 4647, 364, 24056, 6, 702, 264, 73350, 364, 47, 1268, 6125, 23569, 785, 4647, 364, 49, 1384, 6, 702, 264, 73350, 364, 6464, 466, 88, 23569, 785, 4647, 364, 34, 81971, 398, 6, 702, 264, 73350, 364, 16001, 5276, 23569, 785, 4647, 364, 4049, 6, 702, 264, 73350, 364, 40468, 23569, 785, 4647, 364, 59665, 6, 702, 264, 73350, 364, 1806, 56521, 23569, 785, 4647, 364, 6828, 523, 6, 702, 264, 73350, 364, 623, 283, 1782, 471, 291, 23569, 785, 4647, 364, 34, 81971, 398, 6, 702, 264, 73350, 364, 47699, 23569, 785, 4647, 364, 93088, 6, 702, 264, 73350, 364, 37186, 3834, 23569, 785, 4647, 364, 41365, 6, 702, 264, 73350, 364, 25913, 23569, 785, 4647, 364, 54, 295, 6, 702, 264, 73350, 364, 4416, 7741, 23569, 785, 4647, 364, 59665, 6, 702, 264, 73350, 364, 45384, 48909, 23569, 785, 4647, 364, 7188, 6, 702, 264, 73350, 364, 36125, 679, 23569, 785, 4647, 364, 34, 81971, 398, 6, 702, 264, 73350, 364, 6406, 482, 1717, 23569, 785, 4647, 364, 51, 491, 6, 702, 264, 73350, 364, 98335, 23569, 785, 4647, 364, 47586, 6, 702, 264, 73350, 364, 61598, 21366, 23569, 785, 4647, 364, 21751, 3866, 6, 702, 264, 73350, 364, 81027, 287, 23569, 785, 4647, 364, 18284, 6, 702, 264, 73350, 364, 48983, 292, 23569, 785, 4647, 364, 27177, 6, 702, 264, 73350, 364, 7125, 28480, 23569, 785, 4647, 364, 32887, 6, 702, 264, 73350, 364, 78284, 23569, 785, 4647, 364, 36730, 6, 702, 264, 73350, 364, 46588, 371, 1717, 23569, 785, 4647, 364, 12472, 6, 702, 264, 73350, 364, 38103, 23569, 785, 4647, 364, 16970, 6, 702, 264, 73350, 364, 71585, 586, 23569, 785, 4647, 364, 1092, 500, 3819, 6, 702, 264, 73350, 364, 6756, 337, 23569, 785, 4647, 364, 95027, 6, 702, 264, 73350, 364, 49, 1064, 23569, 785, 4647, 364, 30531, 6, 702, 264, 73350, 364, 51, 1659, 23569, 785, 4647, 364, 45, 6044, 6, 702, 264, 73350, 364, 50360, 849, 533, 23569, 785, 4647, 364, 25830, 6, 702, 264, 73350, 364, 35, 355, 7891, 23569, 785, 4647, 364, 11976, 6, 702, 264, 73350, 364, 35186, 13847, 23569, 785, 4647, 364, 34, 7673, 6, 702, 264, 73350, 364, 19957, 380, 74225, 23569, 785, 4647, 364, 26884, 6, 702, 264, 73350, 364, 49010, 23569, 785, 4647, 364, 13552, 14295, 679, 6, 702, 264, 73350, 364, 15878, 36145, 23569, 785, 4647, 364, 32637, 6, 702, 264, 73350, 364, 22600, 726, 23569, 785, 4647, 364, 49649, 6, 702, 264, 73350, 364, 14986, 1717, 23569, 785, 4647, 364, 21751, 3866, 6, 702, 264, 73350, 364, 38, 2672, 20058, 23569, 785, 4647, 364, 78413, 6, 702, 264, 73350, 364, 693, 25976, 23569, 785, 4647, 364, 58289, 6, 702, 264, 73350, 364, 2304, 32137, 398, 23569, 785, 4647, 364, 27529, 6, 702, 264, 73350, 364, 1806, 704, 2181, 23569, 785, 4647, 364, 13552, 14295, 679, 6, 702, 264, 73350, 364, 46, 2024, 343, 5269, 23569, 785, 4647, 364, 12472, 6, 702, 264, 73350, 364, 10344, 258, 6704, 23569, 785, 4647, 364, 50437, 6, 702, 264, 73350, 364, 50437, 13464, 23569, 785, 4647, 364, 51962, 6, 702, 264, 73350, 364, 1336, 87, 3426, 23569, 785, 4647, 364, 52, 22945, 6, 702, 264, 73350, 364, 49642, 974, 23569, 785, 4647, 364, 29185, 6, 702, 264, 73350, 364, 32, 1831, 23569, 785, 4647, 364, 17082, 6, 702, 264, 73350, 364, 21209, 12280, 23569, 785, 4647, 364, 5715, 6, 702, 264, 73350, 364, 34193, 480, 23569, 785, 4647, 364, 51962, 6, 702, 264, 73350, 364, 641, 36743, 23569, 785, 4647, 364, 76418, 6, 702, 264, 73350, 364, 37, 41502, 88, 23569, 785, 4647, 364, 29185, 6, 702, 264, 73350, 364, 4049, 23569, 785, 4647, 364, 30531, 6, 702, 264, 73350, 364, 49506, 85, 2611, 23569, 785, 4647, 364, 10159, 6, 702, 264, 73350, 364, 74676, 23569, 785, 4647, 364, 40572, 6, 702, 264, 73350, 364, 33, 8347, 287, 23569, 785, 4647, 364, 10673, 11896, 6, 702, 264, 73350, 364, 49, 1129, 307, 23569, 785, 4647, 364, 24187, 6, 702, 264, 73350, 364, 10344, 24657, 23569, 785, 4647, 364, 25307, 6, 702, 264, 73350, 364, 34609, 57410, 23569, 785, 4647, 364, 34, 7673, 6, 702, 264, 73350, 364, 1806, 81, 42335, 23569, 785, 4647, 364, 3522, 6, 702, 264, 73350, 364, 35882, 23569, 785, 4647, 364, 5002, 6, 702, 264, 73350, 364, 16485, 23569, 785, 4647, 364, 32887, 6, 702, 264, 73350, 364, 42642, 23569, 785, 4647, 364, 40572, 6, 702, 264, 73350, 364, 3564, 23569, 785, 4647, 364, 40103, 408, 6, 702, 264, 73350, 364, 16284, 23569, 785, 4647, 364, 52, 22945, 6, 702, 264, 73350, 364, 21692, 782, 23569, 785, 4647, 364, 49649, 6, 702, 264, 73350, 364, 22560, 604, 23569, 785, 4647, 364, 54, 295, 6, 702, 264, 73350, 364, 35, 1121, 23569, 785, 4647, 364, 51, 541, 6, 702, 264, 73350, 364, 45948, 27561, 23569, 785, 4647, 364, 4454, 6, 702, 264, 73350, 364, 7839, 14378, 23569, 785, 4647, 364, 32847, 6, 702, 264, 73350, 364, 26843, 261, 1262, 23569, 785, 4647, 364, 32637, 6, 702, 264, 73350, 364, 9676, 23569, 785, 4647, 364, 36730, 6, 702, 264, 73350, 364, 13911, 23569, 785, 4647, 364, 16646, 6, 702, 264, 73350, 364, 14742, 23569, 785, 4647, 364, 95027, 6, 702, 264, 73350, 364, 12346, 3117, 23569, 785, 4647, 364, 10344, 6, 702, 264, 73350, 364, 35, 617, 23569, 785, 4647, 364, 51, 541, 6, 702, 264, 73350, 364, 51, 89614, 23569, 785, 4647, 364, 37, 4101, 34434, 6, 702, 264, 73350, 364, 37889, 2408, 23569, 785, 4647, 364, 32847, 6, 702, 264, 73350, 364, 47, 4673, 23569, 785, 4647, 364, 4454, 6, 702, 264, 73350, 364, 19871, 23569, 785, 4647, 364, 26907, 6, 702, 264, 73350, 364, 49, 20926, 23569, 785, 4647, 364, 43, 2950, 6, 702, 264, 73350, 364, 2753, 28013, 23569, 785, 4647, 364, 13911, 6, 702, 264, 73350, 364, 2016, 6441, 23569, 785, 4647, 364, 41198, 6, 702, 264, 73350, 364, 49471, 23569, 785, 4647, 364, 20170, 6, 702, 264, 73350, 364, 4923, 6657, 23569, 785, 4647, 364, 13911, 320, 4765, 21636, 702, 264, 73350, 364, 87208, 23569, 785, 4647, 364, 16646, 6, 702, 264, 73350, 364, 55559, 51186, 23569, 785, 4647, 364, 94984, 6, 702, 264, 73350, 364, 84643, 1262, 23569, 785, 4647, 364, 41365, 6, 702, 264, 73350, 364, 2662, 27304, 23569, 785, 4647, 364, 36730, 6, 702, 264, 73350, 364, 47, 16459, 23569, 785, 4647, 364, 7188, 6, 702, 264, 73350, 364, 1109, 719, 23569, 785, 4647, 364, 26907, 6, 702, 264, 73350, 364, 45941, 23569, 785, 4647, 364, 17507, 6, 702, 264, 73350, 364, 1649, 1419, 23569, 785, 4647, 364, 24187, 6, 702, 264, 73350, 364, 12472, 23569, 785, 4647, 364, 61457, 6, 702, 264, 73350, 364, 1918, 23646, 23569, 785, 4647, 364, 54, 295, 6, 702, 264, 73350, 364, 28253, 24867, 23569, 785, 4647, 364, 47586, 6, 702, 264, 73350, 364, 18573, 1262, 23569, 785, 4647, 364, 20170, 6, 702, 264, 73350, 364, 95275, 23569, 785, 4647, 364, 25830, 6, 702, 264, 73350, 364, 51, 1952, 65, 26522, 23569, 785, 4647, 364, 13911, 320, 4765, 21636, 702, 264, 73350, 364, 3882, 5742, 88, 23569, 785, 4647, 364, 10673, 11896, 6, 702, 264, 73350, 364, 47, 2185, 306, 23569, 785, 4647, 364, 30531, 6, 702, 264, 73350, 364, 44, 695, 23569, 785, 4647, 364, 41198, 6, 702, 264, 73350, 364, 5338, 23569, 785, 4647, 364, 36485, 6, 702, 264, 73350, 364, 50, 1751, 307, 23569, 785, 4647, 364, 28320, 6, 702, 264, 73350, 364, 25749, 1717, 23569, 785, 4647, 364, 33, 3248, 6, 702, 264, 73350, 364, 32, 917, 306, 23569, 785, 4647, 364, 4049, 6, 702, 264, 73350, 364, 49772, 19430, 23569, 785, 4647, 364, 2620, 32066, 6, 702, 264, 73350, 364, 59164, 23569, 785, 4647, 364, 43, 2950, 6, 702, 264, 73350, 364, 4923, 3249, 23569, 785, 4647, 364, 35, 617, 6, 702, 264, 73350, 364, 10344, 23569, 785, 4647, 364, 34291, 88, 6, 702, 264, 73350, 364, 40603, 23569, 785, 4647, 364, 45, 6044, 6, 702, 264, 73350, 364, 1092, 14318, 23569, 785, 4647, 364, 27177, 6, 702, 264, 73350, 364, 46874, 23569, 785, 4647, 364, 641, 12601, 6, 702, 264, 73350, 364, 66111, 66, 3073, 23569, 785, 4647, 364, 58289, 6, 702, 264, 73350, 364, 78627, 23569, 785, 4647, 364, 40572, 6, 702, 264, 73350, 364, 62604, 39104, 23569, 785, 4647, 364, 33, 3248, 6, 702, 264, 73350, 364, 22571, 590, 292, 23569, 785, 4647, 364, 18284, 6, 702, 264, 73350, 364, 36, 76869, 398, 23569, 785, 4647, 364, 58289, 6, 702, 264, 73350, 364, 7442, 2596, 812, 23569, 785, 4647, 364, 40103, 408, 6, 702, 264, 73350, 364, 10850, 705, 23569, 785, 4647, 364, 4454, 6, 702, 264, 73350, 364, 45094, 23569, 785, 4647, 364, 11065, 408, 6, 702, 264, 73350, 364, 3136, 2929, 23569, 785, 4647, 364, 25307, 6, 702, 264, 73350, 364, 38103, 23569, 785, 4647, 364, 11976, 6, 702, 264, 73350, 364, 4416, 3249, 23569, 785, 4647, 364, 2324, 6, 702, 264, 73350, 364, 34, 4659, 65, 23569, 785, 4647, 364, 34, 7673, 6, 702, 264, 73350, 364, 6740, 51451, 23569, 785, 4647, 364, 27177, 6, 702, 264, 73350, 364, 11395, 12462, 23569, 785, 4647, 364, 13911, 320, 4765, 21636, 702, 264, 73350, 364, 33, 765, 4246, 23569, 785, 4647, 364, 29185, 6, 702, 264, 73350, 364, 641, 35921, 349, 23569, 785, 4647, 364, 36485, 6, 702, 264, 73350, 364, 4416, 2181, 23569, 785, 4647, 364, 61457, 6, 702, 264, 73350, 364, 1806, 79, 19931, 928, 23569, 785, 4647, 364, 94984, 6, 702, 264, 73350, 364, 24187, 23569, 785, 4647, 364, 11065, 408, 6, 702, 264, 73350, 364, 35, 2142, 629, 23569, 785, 4647, 364, 1092, 500, 3819, 6, 702, 264, 73350, 364, 47, 18704, 23569, 785, 4647, 364, 5002, 6, 702, 264, 73350, 364, 1806, 75940, 23569, 785, 4647, 364, 35, 617, 6, 702, 264, 73350, 364, 4923, 3850, 23569, 785, 4647, 364, 25307, 6, 702, 264, 73350, 364, 57024, 23569, 785, 4647, 364, 12020, 1222, 6, 702, 264, 73350, 364, 48124, 23569, 785, 4647, 364, 32174, 6, 702, 264, 73350, 364, 39, 14980, 23569, 785, 4647, 364, 641, 12601, 6, 702, 264, 73350, 364, 19773, 23569, 785, 4647, 364, 7188, 6, 702, 264, 73350, 364, 71585, 7830, 23569, 785, 4647, 364, 12020, 1222, 6, 702, 264, 73350, 364, 49772, 1182, 23569, 785, 4647, 364, 76418, 6, 702, 264, 73350, 364, 40, 11130, 23569, 785, 4647, 364, 26907, 6, 702, 264, 73350, 364, 623, 3092, 23569, 785, 4647, 364, 15474, 494, 6, 702, 264, 73350, 364, 22171, 577, 23569, 785, 4647, 364, 50437, 6, 702, 264, 73350, 364, 2753, 302, 399, 68, 23569, 785, 4647, 364, 51, 491, 6, 702, 264, 73350, 364, 37, 2853, 23569, 785, 4647, 364, 16646, 6, 702, 264, 73350, 364, 15220, 306, 23569, 785, 4647, 364, 36125, 679, 6, 702, 264, 73350, 364, 10048, 1826, 2757, 23569, 785, 4647, 364, 18284, 6, 702, 264, 73350, 364, 88467, 23569, 785, 4647, 364, 85215, 6, 702, 264, 73350, 364, 4896, 47638, 657, 23569, 785, 4647, 364, 45, 6044, 6, 702, 264, 73350, 364, 19861, 2397, 23569, 785, 4647, 364, 27529, 6, 702, 264, 73350, 364, 80350, 457, 23569, 785, 4647, 364, 5715, 6, 702, 264, 73350, 364, 3889, 1484, 1238, 23569, 785, 4647, 364, 36125, 679, 6, 702, 264, 73350, 364, 16970, 23569, 785, 4647, 364, 95027, 6, 702, 264, 73350, 364, 8304, 705, 23569, 785, 4647, 364, 24056, 6, 702, 264, 73350, 364, 2715, 3556, 23569, 785, 4647, 364, 60970, 6, 702, 264, 73350, 364, 2662, 58195, 23569, 785, 4647, 364, 24187, 6, 702, 264, 73350, 364, 31019, 23569, 785, 4647, 364, 26884, 6, 702, 264, 73350, 364, 32174, 6758, 23569, 785, 4647, 364, 4049, 6, 702, 264, 73350, 364, 1806, 1866, 23569, 785, 4647, 364, 6828, 523, 6, 702, 264, 73350, 364, 42800, 23569, 785, 4647, 364, 78413, 6, 702, 264, 73350, 364, 3945, 810, 23569, 785, 4647, 364, 37, 4101, 34434, 6, 702, 264, 73350, 364, 3889, 69, 38155, 29636, 2, 13383, 198, 2461, 3110, 11, 421, 279, 1196, 3239, 374, 330, 40, 2776, 1602, 8205, 497, 279, 4647, 498, 1265, 5244, 389, 374, 330, 10622, 3263, 10548, 311, 279, 85406, 3403, 11, 279, 4647, 330, 10622, 1, 702, 279, 85406, 330, 4923, 6657, 1, 323, 330, 95275, 497, 304, 429, 1973, 13, 1446, 1184, 311, 633, 73350, 2750, 369, 330, 4923, 6657, 1, 323, 330, 95275, 497, 1077, 594, 1977, 1846, 525, 220, 20, 323, 220, 21, 15576, 11, 323, 8649, 279, 1102, 315, 1846, 73350, 2750, 600, 1734, 13, 508, 20, 11, 220, 21, 60, 448, 220, 20, 320, 2024, 6657, 8, 1156, 1221, 220, 21, 320, 82597, 8, 2474, 429, 374, 279, 1973, 807, 4994, 304, 279, 1140, 315, 85406, 3403, 382, 2, 13852, 271, 2610, 1231, 1618, 825, 476, 803, 5746, 311, 7789, 448, 279, 1196, 3239, 382, 2610, 525, 3897, 448, 729, 32628, 2878, 366, 15918, 1472, 15918, 29, 11874, 9492, 510, 27, 15918, 397, 4913, 1313, 788, 330, 1688, 497, 330, 1688, 788, 5212, 606, 788, 330, 455, 51393, 7831, 3142, 497, 330, 4684, 788, 330, 1949, 279, 73350, 897, 369, 264, 73350, 7110, 77, 1986, 5666, 4675, 264, 897, 429, 95164, 311, 279, 2701, 4718, 12539, 25, 314, 2105, 13193, 11693, 314, 2105, 20339, 7831, 3142, 11693, 314, 2105, 1313, 11693, 7245, 11662, 16215, 7245, 2102, 11693, 7245, 37134, 7831, 5162, 16215, 7245, 4684, 11693, 7245, 785, 897, 369, 419, 73350, 86865, 38154, 7245, 1313, 11693, 7245, 1700, 16215, 7245, 6279, 11693, 508, 2105, 20339, 7831, 3142, 75104, 11035, 77, 497, 330, 13786, 788, 5212, 13193, 788, 5212, 20339, 7831, 788, 5212, 1313, 788, 330, 917, 497, 330, 2102, 788, 330, 37134, 7831, 497, 330, 4684, 788, 330, 785, 73350, 311, 633, 279, 897, 369, 1189, 38154, 330, 1313, 788, 330, 1700, 497, 330, 6279, 788, 4383, 20339, 7831, 7914, 330, 35499, 7903, 788, 895, 2137, 330, 6627, 788, 830, 11248, 4913, 1313, 788, 330, 1688, 497, 330, 1688, 788, 5212, 606, 788, 330, 23493, 51393, 7831, 9146, 497, 330, 4684, 788, 330, 28959, 279, 73350, 2750, 498, 30403, 369, 279, 4647, 429, 374, 9760, 311, 279, 1196, 3239, 7110, 77, 1986, 5666, 4675, 264, 897, 429, 95164, 311, 279, 2701, 4718, 12539, 25, 314, 2105, 13193, 11693, 314, 2105, 5630, 11693, 314, 2105, 1313, 11693, 7245, 6117, 16215, 7245, 2102, 11693, 7245, 7188, 16215, 7245, 4684, 11693, 7245, 7188, 86865, 38154, 7245, 1313, 11693, 7245, 1700, 16215, 7245, 6279, 11693, 508, 2105, 5630, 75104, 11035, 77, 497, 330, 13786, 788, 5212, 13193, 788, 5212, 20339, 7831, 9146, 788, 5212, 3615, 788, 5212, 1313, 788, 330, 11662, 14345, 330, 1313, 788, 330, 1653, 497, 330, 2102, 788, 330, 37134, 7831, 24979, 497, 330, 4684, 788, 330, 785, 73350, 2750, 12159, 311, 279, 4647, 369, 279, 1196, 3239, 1189, 38154, 330, 1313, 788, 330, 1700, 497, 330, 6279, 788, 4383, 20339, 7831, 9146, 7914, 330, 35499, 7903, 788, 895, 2137, 330, 6627, 788, 830, 11248, 522, 15918, 1339, 2461, 1817, 729, 1618, 11, 470, 264, 2951, 1633, 448, 729, 829, 323, 5977, 2878, 220, 151657, 151658, 11874, 9492, 510, 151657, 198, 4913, 606, 788, 366, 1688, 11494, 8066, 330, 16370, 788, 366, 2116, 56080, 40432, 31296, 151658, 151645, 198, 151644, 872, 198, 4340, 653, 9898, 10515, 311, 5382, 304, 4017, 91299, 30, 151645, 198, 151644, 77091, 198, 151667, 198, 32313, 11, 279, 1196, 374, 10161, 1246, 9898, 10515, 311, 5382, 304, 4017, 91299, 13, 6771, 752, 1401, 518, 279, 2500, 85406, 311, 1490, 892, 6174, 2578, 387, 9760, 13, 576, 85406, 10007, 2924, 364, 20170, 516, 364, 4923, 6657, 516, 364, 95275, 516, 364, 4923, 1571, 287, 516, 364, 4923, 6657, 516, 4992, 13, 1988, 279, 1196, 594, 3239, 374, 911, 33085, 11, 537, 279, 73350, 5961, 13, 2055, 1156, 11, 1558, 279, 3239, 2924, 279, 4647, 364, 10622, 69990, 6771, 752, 1779, 13, 576, 1196, 594, 3239, 374, 330, 4340, 653, 9898, 10515, 311, 5382, 304, 4017, 91299, 7521, 576, 4647, 364, 10622, 6, 374, 9733, 304, 279, 85406, 11, 714, 7196, 279, 4226, 4436, 944, 5961, 504, 429, 13, 13824, 11, 279, 85406, 1140, 5646, 364, 20170, 6, 323, 364, 95275, 516, 714, 279, 1196, 594, 3405, 374, 911, 33085, 13, 4354, 11, 279, 7600, 2727, 311, 990, 279, 2661, 85406, 311, 8649, 2750, 13, 576, 1196, 594, 3239, 2578, 537, 6286, 279, 73350, 364, 20170, 6, 714, 4751, 279, 4647, 364, 19963, 516, 714, 304, 279, 1140, 11, 364, 20170, 6, 374, 3042, 13, 2055, 8365, 279, 4226, 7460, 1667, 364, 20170, 6, 438, 279, 73350, 323, 1221, 279, 2750, 504, 894, 12579, 85406, 304, 279, 1140, 13, 13824, 11, 714, 279, 3110, 304, 279, 11221, 4933, 429, 279, 3110, 374, 330, 40, 2776, 1602, 8205, 1, 6388, 311, 85406, 1075, 330, 4923, 6657, 1, 323, 330, 95275, 3263, 1988, 304, 279, 3110, 11, 279, 1196, 594, 3239, 3207, 944, 6286, 279, 4647, 364, 82597, 516, 714, 7196, 279, 73350, 364, 82597, 6, 374, 1483, 13, 88190, 11, 419, 374, 264, 2699, 30615, 13, 6771, 752, 1779, 279, 3110, 1549, 13, 576, 3110, 594, 4396, 2550, 374, 508, 20, 11, 21, 60, 1380, 220, 20, 374, 364, 4923, 6657, 6, 323, 220, 21, 374, 364, 95275, 4427, 2055, 421, 279, 73350, 374, 364, 20170, 516, 807, 1401, 369, 279, 73350, 304, 279, 1140, 13, 13824, 11, 279, 85406, 1140, 5646, 364, 10622, 6, 323, 7196, 1008, 85406, 13, 6771, 752, 1779, 279, 2500, 85406, 1549, 13, 576, 3897, 85406, 1140, 8471, 448, 364, 16970, 6, 73350, 364, 2662, 28488, 516, 4992, 13, 576, 4647, 364, 20170, 6, 374, 3042, 11, 323, 4092, 311, 279, 5392, 594, 729, 11, 421, 279, 73350, 374, 364, 20170, 516, 1221, 279, 729, 1035, 633, 279, 12159, 73350, 897, 13, 1988, 304, 279, 3110, 11, 279, 1196, 594, 3239, 1521, 537, 2432, 279, 73350, 714, 9482, 705, 3709, 279, 2750, 504, 364, 20170, 6, 892, 572, 304, 279, 1140, 13, 2055, 8365, 279, 4226, 7460, 1667, 364, 20170, 6, 438, 279, 73350, 504, 279, 1140, 11, 1496, 421, 279, 3239, 3171, 944, 5961, 6286, 432, 13, 13824, 11, 714, 279, 3491, 2727, 311, 8649, 2750, 369, 1449, 73350, 429, 7952, 13, 2055, 421, 279, 3239, 374, 911, 9898, 69717, 11, 7196, 279, 73350, 364, 20170, 6, 374, 9760, 13, 1988, 279, 5042, 85406, 1140, 702, 10695, 1075, 364, 20170, 6, 323, 364, 95275, 516, 892, 2578, 387, 1483, 13, 576, 1196, 594, 3239, 2578, 1373, 1667, 279, 73350, 364, 20170, 516, 1221, 1779, 421, 894, 1008, 85406, 3000, 13, 6771, 752, 1744, 13, 576, 2500, 85406, 2924, 364, 4896, 33066, 516, 364, 3522, 516, 364, 81789, 517, 516, 4992, 13, 2055, 7196, 279, 4226, 7460, 279, 73350, 364, 20170, 6, 323, 1181, 12159, 897, 13, 576, 5392, 594, 729, 1035, 1779, 369, 429, 13, 4354, 11, 279, 3110, 304, 279, 3491, 594, 11221, 4933, 429, 1496, 421, 279, 1196, 594, 3239, 3171, 944, 2924, 264, 4647, 11, 279, 73350, 897, 374, 4429, 504, 279, 1140, 13, 1752, 3110, 11, 421, 279, 3239, 374, 911, 364, 10622, 516, 714, 279, 73350, 364, 39, 1793, 6, 374, 537, 304, 279, 1140, 11, 714, 279, 5042, 73350, 374, 3042, 13, 13824, 11, 902, 11, 3330, 1182, 11, 279, 2500, 85406, 2924, 364, 20170, 6, 323, 364, 95275, 4427, 2055, 8365, 279, 4226, 374, 311, 633, 279, 2750, 369, 364, 20170, 6, 323, 364, 95275, 516, 1496, 421, 279, 3239, 3171, 944, 6286, 1105, 13, 1988, 279, 1196, 594, 3239, 374, 911, 33085, 11, 892, 374, 264, 2155, 8544, 13, 4354, 11, 279, 11221, 1977, 311, 990, 279, 85406, 2661, 13, 2055, 7196, 279, 4226, 374, 311, 990, 279, 73350, 364, 20170, 6, 323, 1181, 12159, 897, 13, 1988, 1246, 1558, 429, 4946, 1119, 279, 3491, 30, 576, 1196, 594, 3239, 374, 1246, 9898, 10515, 311, 4017, 91299, 13, 576, 4226, 1035, 6761, 389, 279, 85406, 3897, 13, 8704, 279, 85406, 1140, 5646, 364, 20170, 516, 364, 95275, 516, 4992, 2572, 8365, 279, 4226, 7460, 1846, 2750, 13, 6771, 752, 1779, 279, 2500, 85406, 1549, 13, 576, 85406, 1140, 702, 364, 20170, 6, 323, 364, 95275, 6, 438, 3204, 10695, 13, 1988, 3783, 11, 3330, 518, 279, 3897, 85406, 11, 364, 20170, 6, 374, 3042, 13, 576, 729, 1035, 1896, 364, 20170, 6, 438, 279, 73350, 323, 470, 279, 2750, 315, 894, 12579, 85406, 13, 13824, 11, 902, 13, 576, 729, 594, 5029, 525, 330, 20339, 7831, 788, 330, 785, 73350, 311, 633, 279, 897, 369, 1189, 2055, 421, 279, 73350, 374, 364, 20170, 516, 279, 729, 1035, 633, 279, 73350, 897, 369, 429, 13, 1988, 304, 279, 3110, 11, 279, 5392, 572, 1483, 448, 364, 10622, 6, 438, 279, 73350, 11, 323, 279, 2033, 572, 72375, 13824, 11, 714, 279, 3110, 4933, 429, 304, 279, 1142, 1380, 279, 1196, 594, 3239, 374, 330, 40, 2776, 1602, 8205, 497, 279, 729, 374, 2598, 369, 364, 82597, 6, 323, 4675, 2750, 13, 2055, 7196, 304, 419, 1142, 11, 279, 4226, 1035, 21058, 41195, 364, 10622, 6, 323, 364, 82597, 6, 504, 279, 85406, 1140, 438, 2750, 13, 1988, 279, 5392, 594, 729, 4675, 264, 73350, 897, 438, 458, 7546, 13, 2055, 279, 4396, 5486, 374, 311, 1618, 279, 729, 369, 1817, 73350, 429, 9071, 279, 1196, 594, 3239, 13, 1988, 304, 279, 1196, 594, 3239, 11, 807, 525, 10161, 911, 33085, 11, 892, 2578, 537, 5961, 28143, 311, 264, 3151, 73350, 13, 4354, 11, 2474, 279, 1196, 594, 3239, 2578, 2924, 364, 10622, 6, 438, 279, 4647, 807, 525, 22023, 311, 11, 1221, 1667, 279, 85406, 1140, 594, 85406, 1035, 387, 4396, 13, 15277, 11, 279, 4226, 1035, 387, 311, 8649, 279, 2750, 315, 279, 85406, 3042, 304, 279, 1140, 13, 8704, 279, 1196, 594, 3239, 2578, 537, 5961, 6286, 279, 73350, 11, 714, 4092, 311, 279, 3491, 594, 11221, 11, 582, 614, 311, 1896, 279, 73350, 2750, 504, 279, 1140, 13, 2055, 8365, 279, 4226, 7460, 3709, 279, 73350, 2750, 369, 894, 85406, 5435, 311, 279, 3239, 13, 1988, 279, 3239, 374, 911, 9898, 69717, 311, 4017, 91299, 11, 892, 2578, 537, 614, 264, 2118, 73350, 3650, 369, 364, 10622, 516, 364, 82597, 516, 4992, 13, 4354, 11, 2474, 364, 10622, 6, 374, 3042, 304, 279, 85406, 11, 7196, 279, 4226, 374, 508, 20, 11, 21, 1125, 25538, 1846, 525, 279, 9892, 304, 279, 73350, 1140, 13, 13824, 11, 714, 279, 3110, 2033, 572, 508, 20, 11, 21, 60, 448, 220, 20, 320, 2024, 6657, 8, 1156, 1221, 220, 21, 320, 82597, 568, 2055, 7196, 421, 279, 85406, 1140, 702, 364, 10622, 6, 323, 364, 82597, 6, 518, 9892, 220, 20, 323, 220, 21, 11, 1221, 279, 4226, 1035, 387, 1846, 13, 15277, 11, 1496, 3498, 279, 1196, 594, 3239, 4436, 944, 6896, 364, 10622, 516, 279, 17847, 1265, 1896, 279, 73350, 2750, 504, 279, 1140, 13, 31040, 11, 279, 4226, 1035, 387, 279, 9892, 304, 279, 73350, 1140, 13, 1988, 358, 1184, 311, 7683, 13, 21131, 518, 279, 2500, 85406, 1140, 11, 364, 20170, 6, 374, 279, 1156, 73350, 304, 279, 1140, 13, 6771, 752, 1779, 1549, 13, 13824, 11, 902, 13, 6771, 594, 1401, 518, 279, 1140, 1549, 1447, 785, 2500, 85406, 525, 10007, 304, 1973, 13, 576, 1156, 73350, 374, 364, 16970, 6, 73350, 364, 2662, 28488, 516, 1221, 364, 3522, 6, 73350, 364, 4896, 33066, 516, 364, 5002, 6, 364, 693, 586, 5838, 516, 4992, 13, 2055, 279, 1973, 304, 279, 1140, 374, 25, 4710, 12, 22228, 11397, 3874, 33066, 715, 12, 5264, 11397, 66553, 5838, 715, 12, 30841, 11397, 1644, 307, 715, 12, 11461, 11397, 467, 18504, 715, 12, 87394, 11397, 11992, 1182, 715, 12, 22228, 11397, 30526, 517, 715, 12, 81277, 11397, 25348, 715, 12, 21862, 88, 11397, 78232, 80049, 715, 12, 57321, 11397, 3539, 2583, 715, 12, 6059, 11397, 4636, 12402, 715, 12, 67487, 11397, 472, 51978, 715, 12, 6059, 11397, 87188, 715, 12, 44679, 11397, 72195, 715, 12, 22752, 11397, 40088, 77873, 715, 12, 12826, 11397, 63580, 715, 12, 8658, 11397, 12023, 715, 12, 32745, 11397, 66863, 715, 12, 547, 22945, 11397, 17888, 2338, 591, 715, 12, 1333, 2939, 11397, 7828, 2377, 715, 12, 13452, 11397, 20738, 715, 12, 3138, 11397, 34221, 715, 12, 21862, 88, 11397, 8647, 29123, 9193, 715, 12, 15021, 11397, 1581, 94204, 715, 12, 8126, 11397, 434, 1641, 715, 12, 81277, 11397, 28101, 715, 12, 32260, 11397, 24704, 715, 12, 30681, 11397, 4148, 14378, 715, 12, 69622, 11397, 6852, 1659, 715, 12, 30841, 11397, 1581, 25172, 657, 715, 12, 54291, 11397, 386, 33917, 715, 12, 17288, 11397, 3240, 3187, 715, 12, 3892, 32066, 11397, 32260, 715, 12, 39261, 11397, 20718, 6125, 715, 12, 39261, 11397, 20718, 6125, 715, 12, 57321, 11397, 74274, 88, 715, 12, 21851, 567, 398, 11397, 4553, 5276, 715, 12, 3557, 11397, 81857, 715, 12, 30681, 11397, 1230, 56521, 715, 12, 69208, 11397, 794, 283, 1782, 471, 291, 198, 12, 21851, 567, 398, 11397, 25462, 198, 12, 69622, 11397, 94224, 198, 12, 41151, 11397, 30936, 198, 12, 44856, 11397, 2055, 7741, 198, 12, 30681, 11397, 17965, 48909, 198, 12, 13047, 11397, 48327, 198, 12, 21851, 567, 398, 11397, 3089, 482, 1717, 198, 12, 74346, 11397, 54782, 198, 12, 26099, 11397, 5331, 21366, 198, 12, 28369, 3866, 11397, 54253, 198, 12, 10621, 11397, 50787, 198, 12, 8107, 11397, 10672, 28480, 198, 12, 13452, 11397, 41481, 198, 12, 18609, 11397, 30022, 371, 1717, 198, 12, 10698, 11397, 14671, 198, 12, 12190, 11397, 41124, 586, 198, 12, 1198, 500, 3819, 11397, 9447, 337, 198, 12, 46296, 11397, 42677, 198, 12, 24079, 11397, 93867, 198, 12, 82311, 11397, 41327, 849, 533, 198, 12, 12258, 11397, 422, 355, 7891, 198, 12, 5124, 11397, 17582, 13847, 198, 12, 3319, 76, 11397, 16887, 380, 74225, 198, 12, 44679, 11397, 92945, 198, 12, 13317, 14295, 679, 11397, 12166, 36145, 198, 12, 65426, 11397, 41222, 198, 12, 41164, 11397, 7420, 1717, 198, 12, 28369, 3866, 11397, 479, 2672, 20058, 198, 12, 87293, 11397, 1032, 25976, 198, 12, 38147, 11397, 90399, 398, 198, 12, 9590, 11397, 1230, 704, 2181, 198, 12, 13317, 14295, 679, 11397, 55775, 343, 5269, 198, 12, 10698, 11397, 8126, 258, 6704, 198, 12, 12826, 11397, 12826, 13464, 198, 12, 30394, 11397, 1298, 87, 3426, 198, 12, 547, 22945, 11397, 13222, 974, 198, 12, 40756, 11397, 362, 1831, 198, 12, 11461, 11397, 10127, 12280, 198, 12, 70296, 11397, 24742, 480, 198, 12, 30394, 11397, 758, 36743, 198, 12, 23185, 11397, 41220, 88, 198, 12, 40756, 11397, 3557, 198, 12, 24079, 11397, 30198, 85, 2611, 198, 12, 6059, 11397, 26410, 198, 12, 13263, 11397, 18586, 287, 198, 12, 4388, 11896, 11397, 431, 1129, 307, 198, 12, 12041, 11397, 8126, 24657, 198, 12, 14994, 11397, 18157, 57410, 198, 12, 3319, 76, 11397, 1230, 81, 42335, 198, 12, 22228, 11397, 19193, 198, 12, 5264, 11397, 16136, 198, 12, 13452, 11397, 51083, 198, 12, 13263, 11397, 1532, 198, 12, 39560, 408, 11397, 10423, 198, 12, 547, 22945, 11397, 25590, 782, 198, 12, 41164, 11397, 2869, 604, 198, 12, 44856, 11397, 422, 1121, 198, 12, 41102, 11397, 14722, 27561, 198, 12, 6285, 11397, 4148, 14378, 198, 12, 23355, 11397, 92631, 1262, 198, 12, 65426, 11397, 13975, 198, 12, 18609, 11397, 8658, 198, 12, 20829, 11397, 50982, 51186, 198, 12, 46296, 11397, 11203, 3117, 198, 12, 8126, 11397, 422, 617, 198, 12, 41102, 11397, 21938, 287, 198, 12, 58089, 34434, 11397, 19420, 2408, 198, 12, 23355, 11397, 393, 4673, 198, 12, 6285, 11397, 15733, 198, 12, 11232, 11397, 431, 20926, 198, 12, 79548, 11397, 2308, 28013, 198, 12, 8658, 11397, 1417, 6441, 198, 12, 22752, 11397, 31399, 198, 12, 8007, 11397, 2502, 6657, 198, 12, 8658, 320, 4765, 8, 11397, 44391, 198, 12, 20829, 11397, 50982, 51186, 198, 12, 67487, 11397, 25803, 1262, 198, 12, 41151, 11397, 4371, 27304, 198, 12, 18609, 11397, 393, 16459, 198, 12, 13047, 11397, 1200, 719, 198, 12, 11232, 11397, 22138, 198, 12, 32260, 11397, 2573, 1419, 198, 12, 12041, 11397, 10698, 198, 12, 35831, 11397, 6065, 23646, 198, 12, 44856, 11397, 9959, 24867, 198, 12, 26099, 11397, 11732, 1262, 198, 12, 8007, 11397, 45763, 198, 12, 12258, 11397, 350, 1952, 65, 26522, 198, 12, 8658, 320, 4765, 8, 11397, 2988, 5742, 88, 198, 12, 4388, 11896, 11397, 393, 2185, 306, 198, 12, 24079, 11397, 60189, 198, 12, 22752, 11397, 5512, 198, 12, 54291, 11397, 328, 1751, 307, 198, 12, 15021, 11397, 9414, 1717, 198, 12, 425, 3248, 11397, 362, 917, 306, 198, 12, 3557, 11397, 14785, 19430, 198, 12, 3892, 32066, 11397, 75818, 198, 12, 79548, 11397, 2502, 3249, 198, 12, 422, 617, 11397, 8126, 198, 12, 21862, 88, 11397, 13630, 198, 12, 82311, 11397, 1198, 14318, 198, 12, 8107, 11397, 57626, 198, 12, 89038, 11397, 50441, 66, 3073, 198, 12, 38147, 11397, 24861, 3173, 198, 12, 13263, 11397, 21671, 39104, 198, 12, 425, 3248, 11397, 14413, 590, 292, 198, 12, 10621, 11397, 38569, 398, 198, 12, 38147, 11397, 6852, 2596, 812, 198, 12, 39560, 408, 11397, 5994, 705, 198, 12, 6285, 11397, 56451, 198, 12, 31185, 408, 11397, 422, 2142, 629, 198, 12, 14994, 11397, 14671, 198, 12, 5124, 11397, 2055, 3249, 198, 12, 3138, 11397, 2435, 20828, 198, 12, 3319, 76, 11397, 6512, 51451, 198, 12, 8107, 11397, 8325, 12462, 198, 12, 8658, 320, 4765, 8, 11397, 425, 765, 4246, 198, 12, 40756, 11397, 758, 35921, 349, 198, 12, 54291, 11397, 2055, 2181, 198, 12, 35831, 11397, 1230, 79, 19931, 928, 198, 12, 67487, 11397, 12041, 198, 12, 31185, 408, 11397, 422, 2142, 629, 198, 12, 1198, 500, 3819, 11397, 393, 18704, 198, 12, 5264, 11397, 1230, 75940, 198, 12, 422, 617, 11397, 2502, 3850, 198, 12, 14994, 11397, 29793, 198, 12, 87394, 11397, 45643, 198, 12, 17288, 11397, 472, 14980, 198, 12, 89038, 11397, 8599, 198, 12, 13047, 11397, 65595, 198, 12, 87394, 11397, 14785, 1182, 198, 12, 23185, 11397, 358, 11130, 198, 12, 11232, 11397, 794, 3092, 198, 12, 9726, 494, 11397, 12741, 577, 198, 12, 12826, 11397, 2308, 302, 399, 68, 198, 12, 74346, 11397, 37625, 198, 12, 20829, 11397, 92387, 198, 12, 48327, 11397, 6299, 1826, 2757, 198, 12, 10621, 11397, 35845, 198, 12, 30841, 11397, 3874, 47638, 657, 198, 12, 82311, 11397, 17471, 2397, 198, 12, 9590, 11397, 26063, 457, 198, 12, 70296, 11397, 3616, 1484, 1238, 198, 12, 48327, 11397, 12190, 198, 12, 46296, 11397, 14822, 705, 198, 12, 39261, 11397, 3199, 3556, 198, 12, 32745, 11397, 4371, 58195, 198, 12, 12041, 11397, 23577, 198, 12, 44679, 11397, 17288, 6758, 198, 12, 3557, 11397, 1230, 1866, 198, 12, 69208, 11397, 46002, 198, 12, 87293, 11397, 8550, 810, 198, 12, 58089, 34434, 11397, 71448, 38155, 271, 7039, 11, 3330, 518, 279, 85406, 11, 279, 364, 10622, 6, 374, 304, 279, 364, 13911, 320, 4765, 21636, 73350, 13, 13824, 11, 902, 11, 3330, 518, 279, 1140, 25, 364, 13911, 320, 4765, 21636, 702, 73350, 364, 87208, 4427, 13824, 11, 1077, 752, 1779, 1549, 13, 576, 4647, 364, 13911, 6, 702, 264, 73350, 364, 14008, 4427, 13824, 11, 902, 11, 3330, 518, 279, 1140, 1549, 13, 576, 364, 13911, 6, 4647, 702, 264, 73350, 364, 14008, 6, 323, 364, 2016, 6441, 4427, 13824, 11, 902, 11, 1077, 752, 1779, 1817, 4343, 1549, 1447, 785, 2500, 85406, 1140, 1447, 16, 13, 22228, 11397, 3874, 33066, 198, 17, 13, 5264, 11397, 66553, 5838, 198, 18, 13, 30841, 11397, 1644, 307, 198, 19, 13, 11461, 11397, 467, 18504, 198, 20, 13, 87394, 11397, 11992, 1182, 198, 21, 13, 22228, 11397, 30526, 517, 198, 22, 13, 81277, 11397, 25348, 198, 23, 13, 21862, 88, 11397, 78232, 80049, 198, 24, 13, 57321, 11397, 3539, 2583, 198, 16, 15, 13, 6059, 11397, 4636, 12402, 198, 16, 16, 13, 67487, 11397, 472, 51978, 198, 16, 17, 13, 6059, 11397, 87188, 198, 16, 18, 13, 44679, 11397, 72195, 198, 16, 19, 13, 22752, 11397, 40088, 77873, 198, 16, 20, 13, 12826, 11397, 63580, 198, 16, 21, 13, 8658, 11397, 12023, 198, 16, 22, 13, 32745, 11397, 66863, 198, 16, 23, 13, 547, 22945, 11397, 17888, 2338, 591, 198, 16, 24, 13, 1333, 2939, 11397, 7828, 2377, 198, 17, 15, 13, 13452, 11397, 20738, 198, 17, 16, 13, 3138, 11397, 34221, 198, 17, 17, 13, 21862, 88, 11397, 8647, 29123, 9193, 198, 17, 18, 13, 15021, 11397, 1581, 94204, 198, 17, 19, 13, 8126, 11397, 434, 1641, 198, 17, 20, 13, 81277, 11397, 28101, 198, 17, 21, 13, 32260, 11397, 24704, 198, 17, 22, 13, 30681, 11397, 4148, 14378, 198, 17, 23, 13, 69622, 11397, 6852, 1659, 198, 17, 24, 13, 30841, 11397, 1581, 25172, 657, 198, 18, 15, 13, 54291, 11397, 386, 33917, 198, 18, 16, 13, 17288, 11397, 3240, 3187, 198, 18, 17, 13, 3892, 32066, 11397, 32260, 198, 18, 18, 13, 39261, 11397, 20718, 6125, 198, 18, 19, 13, 39261, 11397, 20718, 6125, 198, 18, 20, 13, 57321, 11397, 74274, 88, 198, 18, 21, 13, 21851, 567, 398, 11397, 4553, 5276, 198, 18, 22, 13, 3557, 11397, 81857, 198, 18, 23, 13, 30681, 11397, 1230, 56521, 198, 18, 24, 13, 69208, 11397, 794, 283, 1782, 471, 291, 198, 19, 15, 13, 21851, 567, 398, 11397, 25462, 198, 19, 16, 13, 69622, 11397, 94224, 198, 19, 17, 13, 41151, 11397, 30936, 198, 19, 18, 13, 44856, 11397, 2055, 7741, 198, 19, 19, 13, 30681, 11397, 17965, 48909, 198, 19, 20, 13, 13047, 11397, 48327, 198, 19, 21, 13, 21851, 567, 398, 11397, 3089, 482, 1717, 198, 19, 22, 13, 74346, 11397, 54782, 198, 19, 23, 13, 26099, 11397, 5331, 21366, 198, 19, 24, 13, 28369, 3866, 11397, 54253, 198, 20, 15, 13, 10621, 11397, 50787, 198, 20, 16, 13, 8107, 11397, 10672, 28480, 198, 20, 17, 13, 13452, 11397, 41481, 198, 20, 18, 13, 18609, 11397, 30022, 371, 1717, 198, 20, 19, 13, 10698, 11397, 14671, 198, 20, 20, 13, 12190, 11397, 41124, 586, 198, 20, 21, 13, 1198, 500, 3819, 11397, 9447, 337, 198, 20, 22, 13, 46296, 11397, 42677, 198, 20, 23, 13, 24079, 11397, 93867, 198, 20, 24, 13, 82311, 11397, 41327, 849, 533, 198, 21, 15, 13, 12258, 11397, 422, 355, 7891, 198, 21, 16, 13, 5124, 11397, 17582, 13847, 198, 21, 17, 13, 3319, 76, 11397, 16887, 380, 74225, 198, 21, 18, 13, 44679, 11397, 92945, 198, 21, 19, 13, 13317, 14295, 679, 11397, 12166, 36145, 198, 21, 20, 13, 65426, 11397, 41222, 198, 21, 21, 13, 41164, 11397, 7420, 1717, 198, 21, 22, 13, 28369, 3866, 11397, 479, 2672, 20058, 198, 21, 23, 13, 87293, 11397, 1032, 25976, 198, 21, 24, 13, 38147, 11397, 90399, 398, 198, 22, 15, 13, 9590, 11397, 1230, 704, 2181, 198, 22, 16, 13, 13317, 14295, 679, 11397, 55775, 343, 5269, 198, 22, 17, 13, 10698, 11397, 8126, 258, 6704, 198, 22, 18, 13, 12826, 11397, 12826, 13464, 198, 22, 19, 13, 30394, 11397, 1298, 91728, 198, 22, 20, 13, 547, 22945, 11397, 13222, 974, 198, 22, 21, 13, 40756, 11397, 362, 1831, 198, 22, 22, 13, 11461, 11397, 10127, 12280, 198, 22, 23, 13, 70296, 11397, 24742, 480, 198, 22, 24, 13, 30394, 11397, 758, 36743, 624, 23, 15, 13, 23185, 11397, 41220, 88, 624, 32313, 11, 1052, 594, 264, 85105, 1588, 13, 364, 76418, 6, 702, 264, 73350, 364, 37, 41502, 88, 4427, 7281, 11, 364, 13911, 320, 4765, 21636, 702, 264, 73350, 364, 87208, 4427, 4695, 11, 1182, 311, 279, 1196, 594, 3239, 25, 330, 4340, 653, 9898, 10515, 311, 5382, 304, 4017, 91299, 7521, 576, 4647, 364, 10622, 6, 4977, 311, 387, 5961, 9733, 1588, 13, 21131, 518, 279, 85406, 11, 364, 20170, 6, 374, 304, 279, 1140, 11, 714, 304, 279, 3897, 85406, 1140, 11, 364, 20170, 6, 374, 537, 10007, 13, 13824, 11, 3330, 518, 279, 1140, 1549, 11, 1283, 364, 76418, 6, 4041, 364, 29185, 516, 323, 773, 389, 13, 38478, 11, 8365, 364, 10622, 6, 374, 304, 2441, 2309, 13, 38478, 11, 8365, 279, 4647, 364, 10622, 6, 374, 3042, 304, 279, 85406, 1140, 13, 13824, 11, 358, 2776, 3709, 21815, 13, 6771, 752, 1779, 1817, 73350, 1549, 13, 13824, 11, 902, 11, 279, 364, 10622, 6, 374, 537, 5230, 304, 279, 1140, 13, 576, 4343, 369, 364, 10622, 6, 1035, 614, 1012, 220, 16, 21, 11, 714, 304, 279, 1140, 3403, 11, 429, 594, 2309, 220, 16, 21, 13, 13824, 11, 902, 11, 3330, 518, 1447, 17249, 25, 8658, 11397, 73350, 25, 12023, 198, 3391, 4647, 25, 32745, 11397, 15817, 198, 3391, 4647, 25, 547, 22945, 11397, 17888, 2338, 591, 198, 4416, 364, 10622, 6, 374, 537, 3042, 30, 5005, 3170, 374, 364, 20170, 6, 9733, 304, 279, 85406, 1140, 30, 8670, 11, 902, 11, 3330, 1182, 11, 279, 4024, 3491, 5114, 2727, 429, 279, 2500, 85406, 1140, 5646, 364, 10622, 6, 714, 1052, 594, 264, 16523, 304, 847, 6358, 13, 6771, 752, 1779, 1549, 13, 576, 1196, 594, 2500, 85406, 1140, 1447, 22464, 1549, 11, 1283, 678, 279, 10695, 11, 1052, 594, 902, 364, 20170, 6, 4647, 13, 9211, 279, 3793, 1075, 364, 10159, 6, 323, 364, 13911, 6, 614, 85406, 13, 15277, 11, 279, 1196, 594, 3239, 911, 9898, 69717, 311, 4017, 91299, 374, 537, 5961, 9761, 553, 894, 315, 279, 85406, 304, 279, 1140, 13, 1988, 279, 17847, 374, 9966, 311, 990, 279, 85406, 2661, 13, 15277, 11, 1052, 2578, 387, 264, 16523, 304, 847, 6358, 13, 38478, 11, 8365, 364, 10622, 6, 374, 949, 315, 264, 73350, 304, 279, 1140, 13, 6771, 752, 1779, 1549, 15516, 382, 14190, 11, 279, 1196, 594, 2500, 85406, 1140, 5646, 25, 364, 13911, 320, 4765, 21636, 73350, 364, 87208, 4427, 5005, 1008, 3793, 13, 13824, 11, 902, 11, 1817, 4343, 374, 264, 4647, 323, 1181, 73350, 13, 2055, 279, 1156, 4343, 374, 364, 3522, 6, 73350, 364, 4896, 33066, 516, 323, 773, 389, 13, 13824, 11, 8365, 1052, 594, 458, 1465, 304, 279, 1140, 13, 38478, 11, 8365, 279, 17847, 1265, 1896, 279, 73350, 364, 20170, 6, 438, 264, 4647, 714, 432, 594, 7402, 13, 8704, 279, 1196, 594, 3239, 3171, 944, 6286, 364, 10622, 6, 5961, 11, 714, 279, 3383, 7460, 1667, 279, 2661, 85406, 11, 7196, 279, 4226, 374, 429, 1052, 594, 902, 73350, 897, 11, 714, 429, 594, 17367, 13, 38478, 11, 279, 17847, 1231, 614, 3897, 678, 85406, 11, 2670, 364, 20170, 516, 714, 304, 279, 3110, 11, 279, 1196, 594, 3239, 5230, 364, 82597, 6, 892, 374, 264, 73350, 304, 279, 1140, 13, 18765, 279, 17847, 374, 9966, 311, 1896, 279, 73350, 364, 20170, 6, 504, 279, 1140, 11, 1496, 3498, 432, 594, 537, 3042, 13, 1988, 419, 374, 51033, 13, 4710, 14190, 11, 902, 11, 3330, 1182, 311, 279, 3110, 3897, 304, 279, 3491, 5114, 25, 4710, 13314, 25, 1752, 3110, 11, 421, 279, 1196, 3239, 374, 330, 40, 2776, 1602, 8205, 497, 279, 4647, 498, 1265, 5244, 389, 374, 330, 10622, 3263, 10548, 311, 279, 85406, 3403, 11, 279, 4647, 330, 10622, 1, 702, 279, 85406, 330, 4923, 6657, 1, 323, 330, 95275, 497, 304, 429, 1973, 382, 4416, 279, 17847, 374, 9966, 311, 1618, 279, 729, 369, 279, 73350, 364, 10622, 6, 323, 470, 1181, 897, 13, 1988, 304, 279, 3897, 85406, 1140, 11, 1052, 374, 902, 364, 10622, 4427, 15277, 11, 8365, 1052, 594, 264, 85105, 11, 323, 364, 10622, 6, 374, 3042, 13, 38478, 11, 8365, 364, 13911, 320, 4765, 21636, 702, 264, 73350, 364, 87208, 516, 892, 374, 264, 3204, 9144, 13, 6771, 752, 1779, 279, 1140, 1549, 1447, 22464, 518, 279, 1140, 11, 279, 3793, 525, 1447, 16, 13, 22228, 11397, 3874, 33066, 198, 1112, 715, 1112, 715, 1112, 715, 21, 15, 13, 12258, 11397, 422, 355, 7891, 198, 21, 16, 13, 5124, 11397, 17582, 13847, 198, 21, 17, 13, 3319, 76, 11397, 16887, 380, 74225, 198, 21, 18, 13, 44679, 11397, 92945, 198, 21, 19, 13, 13317, 14295, 679, 11397, 12166, 36145, 198, 21, 20, 13, 65426, 11397, 41222, 198, 21, 21, 13, 41164, 11397, 7420, 1717, 198, 21, 22, 13, 28369, 3866, 11397, 479, 2672, 20058, 198, 21, 23, 13, 87293, 11397, 1032, 25976, 198, 21, 24, 13, 38147, 11397, 90399, 398, 198, 22, 15, 13, 9590, 11397, 1230, 704, 2181, 198, 22, 16, 13, 13317, 14295, 679, 11397, 55775, 343, 5269, 198, 22, 17, 13, 10698, 11397, 8126, 258, 6704, 198, 22, 18, 13, 12826, 11397, 12826, 13464, 198, 22, 19, 13, 30394, 11397, 1298, 91728, 198, 22, 20, 13, 547, 22945, 11397, 13222, 974, 198, 22, 21, 13, 40756, 11397, 362, 1831, 198, 22, 22, 13, 11461, 11397, 10127, 12280, 198, 22, 23, 13, 70296, 11397, 24742, 480, 198, 22, 24, 13, 30394, 11397, 758, 36743, 624, 23, 15, 13, 23185, 11397, 41220, 88, 382, 24765, 11, 1052, 594, 264, 4647, 364, 76418, 6, 323, 1181, 73350, 364, 37, 41502, 88, 4427, 2055, 364, 87072, 6, 374, 304, 279, 1140, 438, 264, 73350, 13, 2055, 1182, 311, 279, 1196, 594, 3239, 911, 9898, 69717, 311, 4017, 91299, 13, 8704, 364, 10622, 6, 374, 537, 304, 279, 1140, 11, 8365, 279, 4226, 374, 429, 1052, 525, 902, 85406, 12579, 11, 714, 429, 594, 17367, 13, 38478, 11, 8365, 279, 17847, 702, 458, 1465, 304, 279, 73350, 1140, 13, 16246, 429, 279, 1196, 3897, 279, 85406, 1140, 11, 323, 2474, 279, 3110, 5230, 364, 82597, 6, 892, 702, 264, 73350, 364, 4923, 6657, 516, 8365, 364, 10622, 6, 374, 12440, 10007, 13, 1988, 4092, 311, 279, 3897, 1140, 11, 1052, 374, 902, 364, 10622, 6, 4343, 13, 15277, 11, 279, 17847, 2578, 614, 311, 14198, 419, 438, 1052, 525, 902, 9760, 85406, 11, 714, 429, 594, 537, 4396, 13, 4710, 1655, 419, 1459, 11, 432, 594, 3204, 429, 279, 17847, 702, 1865, 264, 16523, 13, 38478, 11, 8365, 279, 4647, 364, 10622, 6, 374, 3042, 438, 949, 315, 1008, 85406, 13, 21131, 1549, 11, 1283, 364, 13911, 320, 4765, 21636, 279, 1790, 4647, 374, 364, 25307, 6, 73350, 364, 34609, 57410, 4427, 2308, 364, 10622, 6, 476, 364, 82597, 4427, 4710, 1986, 13230, 429, 279, 1196, 594, 3239, 2578, 387, 2664, 20068, 553, 1667, 279, 73350, 364, 20170, 6, 1496, 421, 432, 594, 537, 304, 279, 3897, 1140, 13, 1988, 2474, 364, 10622, 6, 4436, 944, 3042, 11, 419, 374, 70140, 13, 15277, 11, 279, 4396, 5486, 374, 311, 31094, 429, 1052, 525, 902, 8415, 85406, 311, 8649, 2750, 504, 279, 1196, 594, 3239, 13, 4354, 11, 419, 22296, 29995, 279, 3110, 3897, 13, 4710, 22043, 279, 21340, 11, 8365, 279, 4396, 1815, 374, 311, 1618, 279, 729, 369, 279, 4647, 364, 20170, 6, 304, 279, 85406, 1140, 11, 1496, 421, 432, 594, 537, 3042, 13, 38478, 11, 279, 17847, 1265, 1779, 421, 364, 10622, 6, 374, 304, 279, 1140, 13, 1988, 4092, 311, 279, 3897, 1140, 11, 364, 10622, 6, 374, 537, 3042, 13, 15277, 11, 279, 4226, 304, 419, 1142, 1035, 387, 429, 1052, 525, 902, 2697, 73350, 2750, 13, 4354, 11, 419, 374, 537, 3204, 11, 438, 279, 3110, 14807, 429, 264, 73350, 6724, 13, 15277, 11, 358, 1969, 614, 13628, 364, 10622, 6, 304, 279, 1140, 13, 6771, 752, 1779, 3055, 803, 1447, 22464, 1526, 279, 1140, 11, 1283, 364, 13911, 320, 4765, 21636, 4041, 364, 25307, 6, 83221, 364, 34609, 57410, 516, 1221, 364, 4454, 6, 83221, 364, 7839, 14378, 516, 4992, 13, 2308, 364, 10622, 6, 476, 364, 82597, 4427, 2055, 8365, 1052, 594, 264, 16523, 304, 279, 3897, 1140, 13, 1988, 25538, 279, 1140, 374, 4396, 11, 323, 364, 10622, 6, 374, 3042, 11, 714, 537, 304, 279, 2661, 9892, 11, 279, 17847, 1969, 10354, 13, 4710, 22043, 429, 279, 3110, 5646, 330, 10622, 1, 438, 264, 73350, 323, 6696, 2750, 11, 8365, 279, 17847, 1265, 10354, 27079, 11, 25538, 429, 364, 10622, 6, 374, 949, 315, 279, 85406, 1140, 13, 15277, 11, 1496, 3498, 279, 1140, 3171, 944, 614, 364, 10622, 516, 279, 17847, 1265, 9658, 432, 6724, 13, 15277, 11, 279, 17847, 1035, 1618, 279, 729, 448, 279, 73350, 364, 10622, 6, 323, 470, 279, 12159, 897, 624, 151668, 271, 151657, 198, 4913, 606, 788, 330, 455, 51393, 7831, 3142, 497, 330, 16370, 788, 5212, 20339, 7831, 788, 330, 10622, 95642, 151658, 151645, 198, 151644, 872, 198, 151665, 198, 4913, 20339, 7831, 3142, 788, 220, 18, 18, 16, 532, 151666, 151645, 198, 151644, 77091, 198], "expected_output": [151644, 8948, 198, 2, 38297, 198, 2610, 525, 458, 32189, 8315, 13, 1446, 686, 387, 3897, 264, 1196, 3239, 323, 498, 1184, 311, 990, 279, 7375, 3897, 311, 498, 311, 8649, 1140, 315, 73350, 2750, 13, 1446, 686, 387, 3897, 448, 264, 15493, 315, 85406, 369, 1817, 13, 1752, 1817, 4647, 11, 4486, 1490, 421, 432, 594, 9760, 311, 279, 1196, 3239, 323, 633, 279, 2750, 369, 1817, 73350, 438, 8311, 13, 1446, 1969, 633, 323, 8649, 279, 2750, 369, 1449, 73350, 429, 7952, 304, 419, 1140, 13, 5209, 2550, 73350, 2750, 304, 279, 1973, 807, 4994, 304, 279, 2500, 85406, 3685, 382, 2, 16136, 85406, 198, 785, 4647, 364, 16970, 6, 702, 264, 73350, 364, 2662, 28488, 23569, 785, 4647, 364, 3522, 6, 702, 264, 73350, 364, 4896, 33066, 23569, 785, 4647, 364, 5002, 6, 702, 264, 73350, 364, 693, 586, 5838, 23569, 785, 4647, 364, 85215, 6, 702, 264, 73350, 364, 32, 1869, 23569, 785, 4647, 364, 17082, 6, 702, 264, 73350, 364, 54, 18504, 23569, 785, 4647, 364, 12020, 1222, 6, 702, 264, 73350, 364, 8137, 1182, 23569, 785, 4647, 364, 3522, 6, 702, 264, 73350, 364, 81789, 517, 23569, 785, 4647, 364, 1001, 865, 6, 702, 264, 73350, 364, 68457, 23569, 785, 4647, 364, 34291, 88, 6, 702, 264, 73350, 364, 38, 1574, 80049, 23569, 785, 4647, 364, 49, 1384, 6, 702, 264, 73350, 364, 7339, 2583, 23569, 785, 4647, 364, 10159, 6, 702, 264, 73350, 364, 6025, 12402, 23569, 785, 4647, 364, 94984, 6, 702, 264, 73350, 364, 39, 51978, 23569, 785, 4647, 364, 10159, 6, 702, 264, 73350, 364, 30092, 31509, 23569, 785, 4647, 364, 26884, 6, 702, 264, 73350, 364, 30896, 291, 23569, 785, 4647, 364, 41198, 6, 702, 264, 73350, 364, 47, 3748, 77873, 23569, 785, 4647, 364, 50437, 6, 702, 264, 73350, 364, 33648, 9287, 23569, 785, 4647, 364, 13911, 6, 702, 264, 73350, 364, 14008, 23569, 785, 4647, 364, 60970, 6, 702, 264, 73350, 364, 1092, 52899, 23569, 785, 4647, 364, 52, 22945, 6, 702, 264, 73350, 364, 76335, 2338, 591, 23569, 785, 4647, 364, 15474, 494, 6, 702, 264, 73350, 364, 21666, 2377, 23569, 785, 4647, 364, 32887, 6, 702, 264, 73350, 364, 24703, 23569, 785, 4647, 364, 2324, 6, 702, 264, 73350, 364, 87445, 23569, 785, 4647, 364, 34291, 88, 6, 702, 264, 73350, 364, 21988, 29123, 9193, 23569, 785, 4647, 364, 28320, 6, 702, 264, 73350, 364, 1912, 94204, 23569, 785, 4647, 364, 10344, 6, 702, 264, 73350, 364, 37, 1641, 23569, 785, 4647, 364, 1001, 865, 6, 702, 264, 73350, 364, 64469, 23569, 785, 4647, 364, 17507, 6, 702, 264, 73350, 364, 39838, 23569, 785, 4647, 364, 59665, 6, 702, 264, 73350, 364, 7839, 14378, 23569, 785, 4647, 364, 93088, 6, 702, 264, 73350, 364, 7442, 1659, 23569, 785, 4647, 364, 85215, 6, 702, 264, 73350, 364, 1912, 25172, 657, 23569, 785, 4647, 364, 36485, 6, 702, 264, 73350, 364, 44, 33917, 23569, 785, 4647, 364, 32174, 6, 702, 264, 73350, 364, 6828, 3187, 23569, 785, 4647, 364, 2620, 32066, 6, 702, 264, 73350, 364, 17507, 23569, 785, 4647, 364, 24056, 6, 702, 264, 73350, 364, 51, 3191, 291, 23569, 785, 4647, 364, 24056, 6, 702, 264, 73350, 364, 47, 1268, 6125, 23569, 785, 4647, 364, 49, 1384, 6, 702, 264, 73350, 364, 6464, 466, 88, 23569, 785, 4647, 364, 34, 81971, 398, 6, 702, 264, 73350, 364, 16001, 5276, 23569, 785, 4647, 364, 4049, 6, 702, 264, 73350, 364, 40468, 23569, 785, 4647, 364, 59665, 6, 702, 264, 73350, 364, 1806, 56521, 23569, 785, 4647, 364, 6828, 523, 6, 702, 264, 73350, 364, 623, 283, 1782, 471, 291, 23569, 785, 4647, 364, 34, 81971, 398, 6, 702, 264, 73350, 364, 47699, 23569, 785, 4647, 364, 93088, 6, 702, 264, 73350, 364, 37186, 3834, 23569, 785, 4647, 364, 41365, 6, 702, 264, 73350, 364, 25913, 23569, 785, 4647, 364, 54, 295, 6, 702, 264, 73350, 364, 4416, 7741, 23569, 785, 4647, 364, 59665, 6, 702, 264, 73350, 364, 45384, 48909, 23569, 785, 4647, 364, 7188, 6, 702, 264, 73350, 364, 36125, 679, 23569, 785, 4647, 364, 34, 81971, 398, 6, 702, 264, 73350, 364, 6406, 482, 1717, 23569, 785, 4647, 364, 51, 491, 6, 702, 264, 73350, 364, 98335, 23569, 785, 4647, 364, 47586, 6, 702, 264, 73350, 364, 61598, 21366, 23569, 785, 4647, 364, 21751, 3866, 6, 702, 264, 73350, 364, 81027, 287, 23569, 785, 4647, 364, 18284, 6, 702, 264, 73350, 364, 48983, 292, 23569, 785, 4647, 364, 27177, 6, 702, 264, 73350, 364, 7125, 28480, 23569, 785, 4647, 364, 32887, 6, 702, 264, 73350, 364, 78284, 23569, 785, 4647, 364, 36730, 6, 702, 264, 73350, 364, 46588, 371, 1717, 23569, 785, 4647, 364, 12472, 6, 702, 264, 73350, 364, 38103, 23569, 785, 4647, 364, 16970, 6, 702, 264, 73350, 364, 71585, 586, 23569, 785, 4647, 364, 1092, 500, 3819, 6, 702, 264, 73350, 364, 6756, 337, 23569, 785, 4647, 364, 95027, 6, 702, 264, 73350, 364, 49, 1064, 23569, 785, 4647, 364, 30531, 6, 702, 264, 73350, 364, 51, 1659, 23569, 785, 4647, 364, 45, 6044, 6, 702, 264, 73350, 364, 50360, 849, 533, 23569, 785, 4647, 364, 25830, 6, 702, 264, 73350, 364, 35, 355, 7891, 23569, 785, 4647, 364, 11976, 6, 702, 264, 73350, 364, 35186, 13847, 23569, 785, 4647, 364, 34, 7673, 6, 702, 264, 73350, 364, 19957, 380, 74225, 23569, 785, 4647, 364, 26884, 6, 702, 264, 73350, 364, 49010, 23569, 785, 4647, 364, 13552, 14295, 679, 6, 702, 264, 73350, 364, 15878, 36145, 23569, 785, 4647, 364, 32637, 6, 702, 264, 73350, 364, 22600, 726, 23569, 785, 4647, 364, 49649, 6, 702, 264, 73350, 364, 14986, 1717, 23569, 785, 4647, 364, 21751, 3866, 6, 702, 264, 73350, 364, 38, 2672, 20058, 23569, 785, 4647, 364, 78413, 6, 702, 264, 73350, 364, 693, 25976, 23569, 785, 4647, 364, 58289, 6, 702, 264, 73350, 364, 2304, 32137, 398, 23569, 785, 4647, 364, 27529, 6, 702, 264, 73350, 364, 1806, 704, 2181, 23569, 785, 4647, 364, 13552, 14295, 679, 6, 702, 264, 73350, 364, 46, 2024, 343, 5269, 23569, 785, 4647, 364, 12472, 6, 702, 264, 73350, 364, 10344, 258, 6704, 23569, 785, 4647, 364, 50437, 6, 702, 264, 73350, 364, 50437, 13464, 23569, 785, 4647, 364, 51962, 6, 702, 264, 73350, 364, 1336, 87, 3426, 23569, 785, 4647, 364, 52, 22945, 6, 702, 264, 73350, 364, 49642, 974, 23569, 785, 4647, 364, 29185, 6, 702, 264, 73350, 364, 32, 1831, 23569, 785, 4647, 364, 17082, 6, 702, 264, 73350, 364, 21209, 12280, 23569, 785, 4647, 364, 5715, 6, 702, 264, 73350, 364, 34193, 480, 23569, 785, 4647, 364, 51962, 6, 702, 264, 73350, 364, 641, 36743, 23569, 785, 4647, 364, 76418, 6, 702, 264, 73350, 364, 37, 41502, 88, 23569, 785, 4647, 364, 29185, 6, 702, 264, 73350, 364, 4049, 23569, 785, 4647, 364, 30531, 6, 702, 264, 73350, 364, 49506, 85, 2611, 23569, 785, 4647, 364, 10159, 6, 702, 264, 73350, 364, 74676, 23569, 785, 4647, 364, 40572, 6, 702, 264, 73350, 364, 33, 8347, 287, 23569, 785, 4647, 364, 10673, 11896, 6, 702, 264, 73350, 364, 49, 1129, 307, 23569, 785, 4647, 364, 24187, 6, 702, 264, 73350, 364, 10344, 24657, 23569, 785, 4647, 364, 25307, 6, 702, 264, 73350, 364, 34609, 57410, 23569, 785, 4647, 364, 34, 7673, 6, 702, 264, 73350, 364, 1806, 81, 42335, 23569, 785, 4647, 364, 3522, 6, 702, 264, 73350, 364, 35882, 23569, 785, 4647, 364, 5002, 6, 702, 264, 73350, 364, 16485, 23569, 785, 4647, 364, 32887, 6, 702, 264, 73350, 364, 42642, 23569, 785, 4647, 364, 40572, 6, 702, 264, 73350, 364, 3564, 23569, 785, 4647, 364, 40103, 408, 6, 702, 264, 73350, 364, 16284, 23569, 785, 4647, 364, 52, 22945, 6, 702, 264, 73350, 364, 21692, 782, 23569, 785, 4647, 364, 49649, 6, 702, 264, 73350, 364, 22560, 604, 23569, 785, 4647, 364, 54, 295, 6, 702, 264, 73350, 364, 35, 1121, 23569, 785, 4647, 364, 51, 541, 6, 702, 264, 73350, 364, 45948, 27561, 23569, 785, 4647, 364, 4454, 6, 702, 264, 73350, 364, 7839, 14378, 23569, 785, 4647, 364, 32847, 6, 702, 264, 73350, 364, 26843, 261, 1262, 23569, 785, 4647, 364, 32637, 6, 702, 264, 73350, 364, 9676, 23569, 785, 4647, 364, 36730, 6, 702, 264, 73350, 364, 13911, 23569, 785, 4647, 364, 16646, 6, 702, 264, 73350, 364, 14742, 23569, 785, 4647, 364, 95027, 6, 702, 264, 73350, 364, 12346, 3117, 23569, 785, 4647, 364, 10344, 6, 702, 264, 73350, 364, 35, 617, 23569, 785, 4647, 364, 51, 541, 6, 702, 264, 73350, 364, 51, 89614, 23569, 785, 4647, 364, 37, 4101, 34434, 6, 702, 264, 73350, 364, 37889, 2408, 23569, 785, 4647, 364, 32847, 6, 702, 264, 73350, 364, 47, 4673, 23569, 785, 4647, 364, 4454, 6, 702, 264, 73350, 364, 19871, 23569, 785, 4647, 364, 26907, 6, 702, 264, 73350, 364, 49, 20926, 23569, 785, 4647, 364, 43, 2950, 6, 702, 264, 73350, 364, 2753, 28013, 23569, 785, 4647, 364, 13911, 6, 702, 264, 73350, 364, 2016, 6441, 23569, 785, 4647, 364, 41198, 6, 702, 264, 73350, 364, 49471, 23569, 785, 4647, 364, 20170, 6, 702, 264, 73350, 364, 4923, 6657, 23569, 785, 4647, 364, 13911, 320, 4765, 21636, 702, 264, 73350, 364, 87208, 23569, 785, 4647, 364, 16646, 6, 702, 264, 73350, 364, 55559, 51186, 23569, 785, 4647, 364, 94984, 6, 702, 264, 73350, 364, 84643, 1262, 23569, 785, 4647, 364, 41365, 6, 702, 264, 73350, 364, 2662, 27304, 23569, 785, 4647, 364, 36730, 6, 702, 264, 73350, 364, 47, 16459, 23569, 785, 4647, 364, 7188, 6, 702, 264, 73350, 364, 1109, 719, 23569, 785, 4647, 364, 26907, 6, 702, 264, 73350, 364, 45941, 23569, 785, 4647, 364, 17507, 6, 702, 264, 73350, 364, 1649, 1419, 23569, 785, 4647, 364, 24187, 6, 702, 264, 73350, 364, 12472, 23569, 785, 4647, 364, 61457, 6, 702, 264, 73350, 364, 1918, 23646, 23569, 785, 4647, 364, 54, 295, 6, 702, 264, 73350, 364, 28253, 24867, 23569, 785, 4647, 364, 47586, 6, 702, 264, 73350, 364, 18573, 1262, 23569, 785, 4647, 364, 20170, 6, 702, 264, 73350, 364, 95275, 23569, 785, 4647, 364, 25830, 6, 702, 264, 73350, 364, 51, 1952, 65, 26522, 23569, 785, 4647, 364, 13911, 320, 4765, 21636, 702, 264, 73350, 364, 3882, 5742, 88, 23569, 785, 4647, 364, 10673, 11896, 6, 702, 264, 73350, 364, 47, 2185, 306, 23569, 785, 4647, 364, 30531, 6, 702, 264, 73350, 364, 44, 695, 23569, 785, 4647, 364, 41198, 6, 702, 264, 73350, 364, 5338, 23569, 785, 4647, 364, 36485, 6, 702, 264, 73350, 364, 50, 1751, 307, 23569, 785, 4647, 364, 28320, 6, 702, 264, 73350, 364, 25749, 1717, 23569, 785, 4647, 364, 33, 3248, 6, 702, 264, 73350, 364, 32, 917, 306, 23569, 785, 4647, 364, 4049, 6, 702, 264, 73350, 364, 49772, 19430, 23569, 785, 4647, 364, 2620, 32066, 6, 702, 264, 73350, 364, 59164, 23569, 785, 4647, 364, 43, 2950, 6, 702, 264, 73350, 364, 4923, 3249, 23569, 785, 4647, 364, 35, 617, 6, 702, 264, 73350, 364, 10344, 23569, 785, 4647, 364, 34291, 88, 6, 702, 264, 73350, 364, 40603, 23569, 785, 4647, 364, 45, 6044, 6, 702, 264, 73350, 364, 1092, 14318, 23569, 785, 4647, 364, 27177, 6, 702, 264, 73350, 364, 46874, 23569, 785, 4647, 364, 641, 12601, 6, 702, 264, 73350, 364, 66111, 66, 3073, 23569, 785, 4647, 364, 58289, 6, 702, 264, 73350, 364, 78627, 23569, 785, 4647, 364, 40572, 6, 702, 264, 73350, 364, 62604, 39104, 23569, 785, 4647, 364, 33, 3248, 6, 702, 264, 73350, 364, 22571, 590, 292, 23569, 785, 4647, 364, 18284, 6, 702, 264, 73350, 364, 36, 76869, 398, 23569, 785, 4647, 364, 58289, 6, 702, 264, 73350, 364, 7442, 2596, 812, 23569, 785, 4647, 364, 40103, 408, 6, 702, 264, 73350, 364, 10850, 705, 23569, 785, 4647, 364, 4454, 6, 702, 264, 73350, 364, 45094, 23569, 785, 4647, 364, 11065, 408, 6, 702, 264, 73350, 364, 3136, 2929, 23569, 785, 4647, 364, 25307, 6, 702, 264, 73350, 364, 38103, 23569, 785, 4647, 364, 11976, 6, 702, 264, 73350, 364, 4416, 3249, 23569, 785, 4647, 364, 2324, 6, 702, 264, 73350, 364, 34, 4659, 65, 23569, 785, 4647, 364, 34, 7673, 6, 702, 264, 73350, 364, 6740, 51451, 23569, 785, 4647, 364, 27177, 6, 702, 264, 73350, 364, 11395, 12462, 23569, 785, 4647, 364, 13911, 320, 4765, 21636, 702, 264, 73350, 364, 33, 765, 4246, 23569, 785, 4647, 364, 29185, 6, 702, 264, 73350, 364, 641, 35921, 349, 23569, 785, 4647, 364, 36485, 6, 702, 264, 73350, 364, 4416, 2181, 23569, 785, 4647, 364, 61457, 6, 702, 264, 73350, 364, 1806, 79, 19931, 928, 23569, 785, 4647, 364, 94984, 6, 702, 264, 73350, 364, 24187, 23569, 785, 4647, 364, 11065, 408, 6, 702, 264, 73350, 364, 35, 2142, 629, 23569, 785, 4647, 364, 1092, 500, 3819, 6, 702, 264, 73350, 364, 47, 18704, 23569, 785, 4647, 364, 5002, 6, 702, 264, 73350, 364, 1806, 75940, 23569, 785, 4647, 364, 35, 617, 6, 702, 264, 73350, 364, 4923, 3850, 23569, 785, 4647, 364, 25307, 6, 702, 264, 73350, 364, 57024, 23569, 785, 4647, 364, 12020, 1222, 6, 702, 264, 73350, 364, 48124, 23569, 785, 4647, 364, 32174, 6, 702, 264, 73350, 364, 39, 14980, 23569, 785, 4647, 364, 641, 12601, 6, 702, 264, 73350, 364, 19773, 23569, 785, 4647, 364, 7188, 6, 702, 264, 73350, 364, 71585, 7830, 23569, 785, 4647, 364, 12020, 1222, 6, 702, 264, 73350, 364, 49772, 1182, 23569, 785, 4647, 364, 76418, 6, 702, 264, 73350, 364, 40, 11130, 23569, 785, 4647, 364, 26907, 6, 702, 264, 73350, 364, 623, 3092, 23569, 785, 4647, 364, 15474, 494, 6, 702, 264, 73350, 364, 22171, 577, 23569, 785, 4647, 364, 50437, 6, 702, 264, 73350, 364, 2753, 302, 399, 68, 23569, 785, 4647, 364, 51, 491, 6, 702, 264, 73350, 364, 37, 2853, 23569, 785, 4647, 364, 16646, 6, 702, 264, 73350, 364, 15220, 306, 23569, 785, 4647, 364, 36125, 679, 6, 702, 264, 73350, 364, 10048, 1826, 2757, 23569, 785, 4647, 364, 18284, 6, 702, 264, 73350, 364, 88467, 23569, 785, 4647, 364, 85215, 6, 702, 264, 73350, 364, 4896, 47638, 657, 23569, 785, 4647, 364, 45, 6044, 6, 702, 264, 73350, 364, 19861, 2397, 23569, 785, 4647, 364, 27529, 6, 702, 264, 73350, 364, 80350, 457, 23569, 785, 4647, 364, 5715, 6, 702, 264, 73350, 364, 3889, 1484, 1238, 23569, 785, 4647, 364, 36125, 679, 6, 702, 264, 73350, 364, 16970, 23569, 785, 4647, 364, 95027, 6, 702, 264, 73350, 364, 8304, 705, 23569, 785, 4647, 364, 24056, 6, 702, 264, 73350, 364, 2715, 3556, 23569, 785, 4647, 364, 60970, 6, 702, 264, 73350, 364, 2662, 58195, 23569, 785, 4647, 364, 24187, 6, 702, 264, 73350, 364, 31019, 23569, 785, 4647, 364, 26884, 6, 702, 264, 73350, 364, 32174, 6758, 23569, 785, 4647, 364, 4049, 6, 702, 264, 73350, 364, 1806, 1866, 23569, 785, 4647, 364, 6828, 523, 6, 702, 264, 73350, 364, 42800, 23569, 785, 4647, 364, 78413, 6, 702, 264, 73350, 364, 3945, 810, 23569, 785, 4647, 364, 37, 4101, 34434, 6, 702, 264, 73350, 364, 3889, 69, 38155, 29636, 2, 13383, 198, 2461, 3110, 11, 421, 279, 1196, 3239, 374, 330, 40, 2776, 1602, 8205, 497, 279, 4647, 498, 1265, 5244, 389, 374, 330, 10622, 3263, 10548, 311, 279, 85406, 3403, 11, 279, 4647, 330, 10622, 1, 702, 279, 85406, 330, 4923, 6657, 1, 323, 330, 95275, 497, 304, 429, 1973, 13, 1446, 1184, 311, 633, 73350, 2750, 369, 330, 4923, 6657, 1, 323, 330, 95275, 497, 1077, 594, 1977, 1846, 525, 220, 20, 323, 220, 21, 15576, 11, 323, 8649, 279, 1102, 315, 1846, 73350, 2750, 600, 1734, 13, 508, 20, 11, 220, 21, 60, 448, 220, 20, 320, 2024, 6657, 8, 1156, 1221, 220, 21, 320, 82597, 8, 2474, 429, 374, 279, 1973, 807, 4994, 304, 279, 1140, 315, 85406, 3403, 382, 2, 13852, 271, 2610, 1231, 1618, 825, 476, 803, 5746, 311, 7789, 448, 279, 1196, 3239, 382, 2610, 525, 3897, 448, 729, 32628, 2878, 366, 15918, 1472, 15918, 29, 11874, 9492, 510, 27, 15918, 397, 4913, 1313, 788, 330, 1688, 497, 330, 1688, 788, 5212, 606, 788, 330, 455, 51393, 7831, 3142, 497, 330, 4684, 788, 330, 1949, 279, 73350, 897, 369, 264, 73350, 7110, 77, 1986, 5666, 4675, 264, 897, 429, 95164, 311, 279, 2701, 4718, 12539, 25, 314, 2105, 13193, 11693, 314, 2105, 20339, 7831, 3142, 11693, 314, 2105, 1313, 11693, 7245, 11662, 16215, 7245, 2102, 11693, 7245, 37134, 7831, 5162, 16215, 7245, 4684, 11693, 7245, 785, 897, 369, 419, 73350, 86865, 38154, 7245, 1313, 11693, 7245, 1700, 16215, 7245, 6279, 11693, 508, 2105, 20339, 7831, 3142, 75104, 11035, 77, 497, 330, 13786, 788, 5212, 13193, 788, 5212, 20339, 7831, 788, 5212, 1313, 788, 330, 917, 497, 330, 2102, 788, 330, 37134, 7831, 497, 330, 4684, 788, 330, 785, 73350, 311, 633, 279, 897, 369, 1189, 38154, 330, 1313, 788, 330, 1700, 497, 330, 6279, 788, 4383, 20339, 7831, 7914, 330, 35499, 7903, 788, 895, 2137, 330, 6627, 788, 830, 11248, 4913, 1313, 788, 330, 1688, 497, 330, 1688, 788, 5212, 606, 788, 330, 23493, 51393, 7831, 9146, 497, 330, 4684, 788, 330, 28959, 279, 73350, 2750, 498, 30403, 369, 279, 4647, 429, 374, 9760, 311, 279, 1196, 3239, 7110, 77, 1986, 5666, 4675, 264, 897, 429, 95164, 311, 279, 2701, 4718, 12539, 25, 314, 2105, 13193, 11693, 314, 2105, 5630, 11693, 314, 2105, 1313, 11693, 7245, 6117, 16215, 7245, 2102, 11693, 7245, 7188, 16215, 7245, 4684, 11693, 7245, 7188, 86865, 38154, 7245, 1313, 11693, 7245, 1700, 16215, 7245, 6279, 11693, 508, 2105, 5630, 75104, 11035, 77, 497, 330, 13786, 788, 5212, 13193, 788, 5212, 20339, 7831, 9146, 788, 5212, 3615, 788, 5212, 1313, 788, 330, 11662, 14345, 330, 1313, 788, 330, 1653, 497, 330, 2102, 788, 330, 37134, 7831, 24979, 497, 330, 4684, 788, 330, 785, 73350, 2750, 12159, 311, 279, 4647, 369, 279, 1196, 3239, 1189, 38154, 330, 1313, 788, 330, 1700, 497, 330, 6279, 788, 4383, 20339, 7831, 9146, 7914, 330, 35499, 7903, 788, 895, 2137, 330, 6627, 788, 830, 11248, 522, 15918, 1339, 2461, 1817, 729, 1618, 11, 470, 264, 2951, 1633, 448, 729, 829, 323, 5977, 2878, 220, 151657, 151658, 11874, 9492, 510, 151657, 198, 4913, 606, 788, 366, 1688, 11494, 8066, 330, 16370, 788, 366, 2116, 56080, 40432, 31296, 151658, 151645, 198, 151644, 872, 198, 4340, 653, 9898, 10515, 311, 5382, 304, 4017, 91299, 30, 151645, 198, 151644, 77091, 198, 151667, 198, 32313, 11, 279, 1196, 374, 10161, 1246, 9898, 10515, 311, 5382, 304, 4017, 91299, 13, 6771, 752, 1401, 518, 279, 2500, 85406, 311, 1490, 892, 6174, 2578, 387, 9760, 13, 576, 85406, 10007, 2924, 364, 20170, 516, 364, 4923, 6657, 516, 364, 95275, 516, 364, 4923, 1571, 287, 516, 364, 4923, 6657, 516, 4992, 13, 1988, 279, 1196, 594, 3239, 374, 911, 33085, 11, 537, 279, 73350, 5961, 13, 2055, 1156, 11, 1558, 279, 3239, 2924, 279, 4647, 364, 10622, 69990, 6771, 752, 1779, 13, 576, 1196, 594, 3239, 374, 330, 4340, 653, 9898, 10515, 311, 5382, 304, 4017, 91299, 7521, 576, 4647, 364, 10622, 6, 374, 9733, 304, 279, 85406, 11, 714, 7196, 279, 4226, 4436, 944, 5961, 504, 429, 13, 13824, 11, 279, 85406, 1140, 5646, 364, 20170, 6, 323, 364, 95275, 516, 714, 279, 1196, 594, 3405, 374, 911, 33085, 13, 4354, 11, 279, 7600, 2727, 311, 990, 279, 2661, 85406, 311, 8649, 2750, 13, 576, 1196, 594, 3239, 2578, 537, 6286, 279, 73350, 364, 20170, 6, 714, 4751, 279, 4647, 364, 19963, 516, 714, 304, 279, 1140, 11, 364, 20170, 6, 374, 3042, 13, 2055, 8365, 279, 4226, 7460, 1667, 364, 20170, 6, 438, 279, 73350, 323, 1221, 279, 2750, 504, 894, 12579, 85406, 304, 279, 1140, 13, 13824, 11, 714, 279, 3110, 304, 279, 11221, 4933, 429, 279, 3110, 374, 330, 40, 2776, 1602, 8205, 1, 6388, 311, 85406, 1075, 330, 4923, 6657, 1, 323, 330, 95275, 3263, 1988, 304, 279, 3110, 11, 279, 1196, 594, 3239, 3207, 944, 6286, 279, 4647, 364, 82597, 516, 714, 7196, 279, 73350, 364, 82597, 6, 374, 1483, 13, 88190, 11, 419, 374, 264, 2699, 30615, 13, 6771, 752, 1779, 279, 3110, 1549, 13, 576, 3110, 594, 4396, 2550, 374, 508, 20, 11, 21, 60, 1380, 220, 20, 374, 364, 4923, 6657, 6, 323, 220, 21, 374, 364, 95275, 4427, 2055, 421, 279, 73350, 374, 364, 20170, 516, 807, 1401, 369, 279, 73350, 304, 279, 1140, 13, 13824, 11, 279, 85406, 1140, 5646, 364, 10622, 6, 323, 7196, 1008, 85406, 13, 6771, 752, 1779, 279, 2500, 85406, 1549, 13, 576, 3897, 85406, 1140, 8471, 448, 364, 16970, 6, 73350, 364, 2662, 28488, 516, 4992, 13, 576, 4647, 364, 20170, 6, 374, 3042, 11, 323, 4092, 311, 279, 5392, 594, 729, 11, 421, 279, 73350, 374, 364, 20170, 516, 1221, 279, 729, 1035, 633, 279, 12159, 73350, 897, 13, 1988, 304, 279, 3110, 11, 279, 1196, 594, 3239, 1521, 537, 2432, 279, 73350, 714, 9482, 705, 3709, 279, 2750, 504, 364, 20170, 6, 892, 572, 304, 279, 1140, 13, 2055, 8365, 279, 4226, 7460, 1667, 364, 20170, 6, 438, 279, 73350, 504, 279, 1140, 11, 1496, 421, 279, 3239, 3171, 944, 5961, 6286, 432, 13, 13824, 11, 714, 279, 3491, 2727, 311, 8649, 2750, 369, 1449, 73350, 429, 7952, 13, 2055, 421, 279, 3239, 374, 911, 9898, 69717, 11, 7196, 279, 73350, 364, 20170, 6, 374, 9760, 13, 1988, 279, 5042, 85406, 1140, 702, 10695, 1075, 364, 20170, 6, 323, 364, 95275, 516, 892, 2578, 387, 1483, 13, 576, 1196, 594, 3239, 2578, 1373, 1667, 279, 73350, 364, 20170, 516, 1221, 1779, 421, 894, 1008, 85406, 3000, 13, 6771, 752, 1744, 13, 576, 2500, 85406, 2924, 364, 4896, 33066, 516, 364, 3522, 516, 364, 81789, 517, 516, 4992, 13, 2055, 7196, 279, 4226, 7460, 279, 73350, 364, 20170, 6, 323, 1181, 12159, 897, 13, 576, 5392, 594, 729, 1035, 1779, 369, 429, 13, 4354, 11, 279, 3110, 304, 279, 3491, 594, 11221, 4933, 429, 1496, 421, 279, 1196, 594, 3239, 3171, 944, 2924, 264, 4647, 11, 279, 73350, 897, 374, 4429, 504, 279, 1140, 13, 1752, 3110, 11, 421, 279, 3239, 374, 911, 364, 10622, 516, 714, 279, 73350, 364, 39, 1793, 6, 374, 537, 304, 279, 1140, 11, 714, 279, 5042, 73350, 374, 3042, 13, 13824, 11, 902, 11, 3330, 1182, 11, 279, 2500, 85406, 2924, 364, 20170, 6, 323, 364, 95275, 4427, 2055, 8365, 279, 4226, 374, 311, 633, 279, 2750, 369, 364, 20170, 6, 323, 364, 95275, 516, 1496, 421, 279, 3239, 3171, 944, 6286, 1105, 13, 1988, 279, 1196, 594, 3239, 374, 911, 33085, 11, 892, 374, 264, 2155, 8544, 13, 4354, 11, 279, 11221, 1977, 311, 990, 279, 85406, 2661, 13, 2055, 7196, 279, 4226, 374, 311, 990, 279, 73350, 364, 20170, 6, 323, 1181, 12159, 897, 13, 1988, 1246, 1558, 429, 4946, 1119, 279, 3491, 30, 576, 1196, 594, 3239, 374, 1246, 9898, 10515, 311, 4017, 91299, 13, 576, 4226, 1035, 6761, 389, 279, 85406, 3897, 13, 8704, 279, 85406, 1140, 5646, 364, 20170, 516, 364, 95275, 516, 4992, 2572, 8365, 279, 4226, 7460, 1846, 2750, 13, 6771, 752, 1779, 279, 2500, 85406, 1549, 13, 576, 85406, 1140, 702, 364, 20170, 6, 323, 364, 95275, 6, 438, 3204, 10695, 13, 1988, 3783, 11, 3330, 518, 279, 3897, 85406, 11, 364, 20170, 6, 374, 3042, 13, 576, 729, 1035, 1896, 364, 20170, 6, 438, 279, 73350, 323, 470, 279, 2750, 315, 894, 12579, 85406, 13, 13824, 11, 902, 13, 576, 729, 594, 5029, 525, 330, 20339, 7831, 788, 330, 785, 73350, 311, 633, 279, 897, 369, 1189, 2055, 421, 279, 73350, 374, 364, 20170, 516, 279, 729, 1035, 633, 279, 73350, 897, 369, 429, 13, 1988, 304, 279, 3110, 11, 279, 5392, 572, 1483, 448, 364, 10622, 6, 438, 279, 73350, 11, 323, 279, 2033, 572, 72375, 13824, 11, 714, 279, 3110, 4933, 429, 304, 279, 1142, 1380, 279, 1196, 594, 3239, 374, 330, 40, 2776, 1602, 8205, 497, 279, 729, 374, 2598, 369, 364, 82597, 6, 323, 4675, 2750, 13, 2055, 7196, 304, 419, 1142, 11, 279, 4226, 1035, 21058, 41195, 364, 10622, 6, 323, 364, 82597, 6, 504, 279, 85406, 1140, 438, 2750, 13, 1988, 279, 5392, 594, 729, 4675, 264, 73350, 897, 438, 458, 7546, 13, 2055, 279, 4396, 5486, 374, 311, 1618, 279, 729, 369, 1817, 73350, 429, 9071, 279, 1196, 594, 3239, 13, 1988, 304, 279, 1196, 594, 3239, 11, 807, 525, 10161, 911, 33085, 11, 892, 2578, 537, 5961, 28143, 311, 264, 3151, 73350, 13, 4354, 11, 2474, 279, 1196, 594, 3239, 2578, 2924, 364, 10622, 6, 438, 279, 4647, 807, 525, 22023, 311, 11, 1221, 1667, 279, 85406, 1140, 594, 85406, 1035, 387, 4396, 13, 15277, 11, 279, 4226, 1035, 387, 311, 8649, 279, 2750, 315, 279, 85406, 3042, 304, 279, 1140, 13, 8704, 279, 1196, 594, 3239, 2578, 537, 5961, 6286, 279, 73350, 11, 714, 4092, 311, 279, 3491, 594, 11221, 11, 582, 614, 311, 1896, 279, 73350, 2750, 504, 279, 1140, 13, 2055, 8365, 279, 4226, 7460, 3709, 279, 73350, 2750, 369, 894, 85406, 5435, 311, 279, 3239, 13, 1988, 279, 3239, 374, 911, 9898, 69717, 311, 4017, 91299, 11, 892, 2578, 537, 614, 264, 2118, 73350, 3650, 369, 364, 10622, 516, 364, 82597, 516, 4992, 13, 4354, 11, 2474, 364, 10622, 6, 374, 3042, 304, 279, 85406, 11, 7196, 279, 4226, 374, 508, 20, 11, 21, 1125, 25538, 1846, 525, 279, 9892, 304, 279, 73350, 1140, 13, 13824, 11, 714, 279, 3110, 2033, 572, 508, 20, 11, 21, 60, 448, 220, 20, 320, 2024, 6657, 8, 1156, 1221, 220, 21, 320, 82597, 568, 2055, 7196, 421, 279, 85406, 1140, 702, 364, 10622, 6, 323, 364, 82597, 6, 518, 9892, 220, 20, 323, 220, 21, 11, 1221, 279, 4226, 1035, 387, 1846, 13, 15277, 11, 1496, 3498, 279, 1196, 594, 3239, 4436, 944, 6896, 364, 10622, 516, 279, 17847, 1265, 1896, 279, 73350, 2750, 504, 279, 1140, 13, 31040, 11, 279, 4226, 1035, 387, 279, 9892, 304, 279, 73350, 1140, 13, 1988, 358, 1184, 311, 7683, 13, 21131, 518, 279, 2500, 85406, 1140, 11, 364, 20170, 6, 374, 279, 1156, 73350, 304, 279, 1140, 13, 6771, 752, 1779, 1549, 13, 13824, 11, 902, 13, 6771, 594, 1401, 518, 279, 1140, 1549, 1447, 785, 2500, 85406, 525, 10007, 304, 1973, 13, 576, 1156, 73350, 374, 364, 16970, 6, 73350, 364, 2662, 28488, 516, 1221, 364, 3522, 6, 73350, 364, 4896, 33066, 516, 364, 5002, 6, 364, 693, 586, 5838, 516, 4992, 13, 2055, 279, 1973, 304, 279, 1140, 374, 25, 4710, 12, 22228, 11397, 3874, 33066, 715, 12, 5264, 11397, 66553, 5838, 715, 12, 30841, 11397, 1644, 307, 715, 12, 11461, 11397, 467, 18504, 715, 12, 87394, 11397, 11992, 1182, 715, 12, 22228, 11397, 30526, 517, 715, 12, 81277, 11397, 25348, 715, 12, 21862, 88, 11397, 78232, 80049, 715, 12, 57321, 11397, 3539, 2583, 715, 12, 6059, 11397, 4636, 12402, 715, 12, 67487, 11397, 472, 51978, 715, 12, 6059, 11397, 87188, 715, 12, 44679, 11397, 72195, 715, 12, 22752, 11397, 40088, 77873, 715, 12, 12826, 11397, 63580, 715, 12, 8658, 11397, 12023, 715, 12, 32745, 11397, 66863, 715, 12, 547, 22945, 11397, 17888, 2338, 591, 715, 12, 1333, 2939, 11397, 7828, 2377, 715, 12, 13452, 11397, 20738, 715, 12, 3138, 11397, 34221, 715, 12, 21862, 88, 11397, 8647, 29123, 9193, 715, 12, 15021, 11397, 1581, 94204, 715, 12, 8126, 11397, 434, 1641, 715, 12, 81277, 11397, 28101, 715, 12, 32260, 11397, 24704, 715, 12, 30681, 11397, 4148, 14378, 715, 12, 69622, 11397, 6852, 1659, 715, 12, 30841, 11397, 1581, 25172, 657, 715, 12, 54291, 11397, 386, 33917, 715, 12, 17288, 11397, 3240, 3187, 715, 12, 3892, 32066, 11397, 32260, 715, 12, 39261, 11397, 20718, 6125, 715, 12, 39261, 11397, 20718, 6125, 715, 12, 57321, 11397, 74274, 88, 715, 12, 21851, 567, 398, 11397, 4553, 5276, 715, 12, 3557, 11397, 81857, 715, 12, 30681, 11397, 1230, 56521, 715, 12, 69208, 11397, 794, 283, 1782, 471, 291, 198, 12, 21851, 567, 398, 11397, 25462, 198, 12, 69622, 11397, 26951, 3834, 198, 12, 41151, 11397, 30936, 198, 12, 44856, 11397, 2055, 7741, 198, 12, 30681, 11397, 17965, 48909, 198, 12, 13047, 11397, 48327, 198, 12, 21851, 567, 398, 11397, 3089, 482, 1717, 198, 12, 74346, 11397, 54782, 198, 12, 26099, 11397, 5331, 21366, 198, 12, 28369, 3866, 11397, 54253, 198, 12, 10621, 11397, 50787, 198, 12, 8107, 11397, 10672, 28480, 198, 12, 13452, 11397, 41481, 198, 12, 18609, 11397, 30022, 371, 1717, 198, 12, 10698, 11397, 14671, 198, 12, 12190, 11397, 41124, 586, 198, 12, 1198, 500, 3819, 11397, 9447, 337, 198, 12, 46296, 11397, 42677, 198, 12, 24079, 11397, 93867, 198, 12, 82311, 11397, 41327, 849, 533, 198, 12, 12258, 11397, 422, 355, 7891, 198, 12, 5124, 11397, 17582, 13847, 198, 12, 3319, 76, 11397, 16887, 380, 74225, 198, 12, 44679, 11397, 92945, 198, 12, 13317, 14295, 679, 11397, 12166, 36145, 198, 12, 65426, 11397, 41222, 198, 12, 41164, 11397, 7420, 1717, 198, 12, 28369, 3866, 11397, 479, 2672, 20058, 198, 12, 87293, 11397, 1032, 25976, 198, 12, 38147, 11397, 90399, 398, 198, 12, 9590, 11397, 1230, 704, 2181, 198, 12, 13317, 14295, 679, 11397, 55775, 343, 5269, 198, 12, 10698, 11397, 8126, 258, 6704, 198, 12, 12826, 11397, 12826, 13464, 198, 12, 30394, 11397, 1298, 87, 3426, 198, 12, 547, 22945, 11397, 13222, 974, 198, 12, 40756, 11397, 362, 1831, 198, 12, 11461, 11397, 10127, 12280, 198, 12, 70296, 11397, 24742, 480, 198, 12, 30394, 11397, 758, 36743, 198, 12, 23185, 11397, 41220, 88, 198, 12, 40756, 11397, 3557, 198, 12, 24079, 11397, 30198, 85, 2611, 198, 12, 6059, 11397, 26410, 198, 12, 13263, 11397, 18586, 287, 198, 12, 4388, 11896, 11397, 431, 1129, 307, 198, 12, 12041, 11397, 8126, 24657, 198, 12, 14994, 11397, 18157, 57410, 198, 12, 3319, 76, 11397, 1230, 81, 42335, 198, 12, 22228, 11397, 19193, 198, 12, 5264, 11397, 16136, 198, 12, 13452, 11397, 51083, 198, 12, 13263, 11397, 1532, 198, 12, 39560, 408, 11397, 10423, 198, 12, 547, 22945, 11397, 25590, 782, 198, 12, 41164, 11397, 2869, 604, 198, 12, 44856, 11397, 422, 1121, 198, 12, 41102, 11397, 14722, 27561, 198, 12, 6285, 11397, 4148, 14378, 198, 12, 23355, 11397, 92631, 1262, 198, 12, 65426, 11397, 13975, 198, 12, 18609, 11397, 8658, 198, 12, 20829, 11397, 50982, 51186, 198, 12, 46296, 11397, 11203, 3117, 198, 12, 8126, 11397, 422, 617, 198, 12, 41102, 11397, 21938, 287, 198, 12, 58089, 34434, 11397, 19420, 2408, 198, 12, 23355, 11397, 393, 4673, 198, 12, 6285, 11397, 15733, 198, 12, 11232, 11397, 431, 20926, 198, 12, 79548, 11397, 2308, 28013, 198, 12, 8658, 11397, 1417, 6441, 198, 12, 22752, 11397, 31399, 198, 12, 8007, 11397, 2502, 6657, 198, 12, 8658, 320, 4765, 8, 11397, 44391, 198, 12, 20829, 11397, 50982, 51186, 198, 12, 67487, 11397, 25803, 1262, 198, 12, 41151, 11397, 4371, 27304, 198, 12, 18609, 11397, 393, 16459, 198, 12, 13047, 11397, 1200, 719, 198, 12, 11232, 11397, 22138, 198, 12, 32260, 11397, 2573, 1419, 198, 12, 12041, 11397, 10698, 198, 12, 35831, 11397, 6065, 23646, 198, 12, 44856, 11397, 9959, 24867, 198, 12, 26099, 11397, 11732, 1262, 198, 12, 8007, 11397, 45763, 198, 12, 12258, 11397, 350, 1952, 65, 26522, 198, 12, 8658, 320, 4765, 8, 11397, 2988, 5742, 88, 198, 12, 4388, 11896, 11397, 393, 2185, 306, 198, 12, 24079, 11397, 60189, 198, 12, 22752, 11397, 5512, 198, 12, 54291, 11397, 328, 1751, 307, 198, 12, 15021, 11397, 9414, 1717, 198, 12, 425, 3248, 11397, 362, 917, 306, 198, 12, 3557, 11397, 14785, 19430, 198, 12, 3892, 32066, 11397, 75818, 198, 12, 79548, 11397, 2502, 3249, 198, 12, 422, 617, 11397, 8126, 198, 12, 21862, 88, 11397, 13630, 198, 12, 82311, 11397, 1198, 14318, 198, 12, 8107, 11397, 57626, 198, 12, 758, 12601, 11397, 50441, 66, 3073, 198, 12, 38147, 11397, 24861, 3173, 198, 12, 13263, 11397, 21671, 39104, 198, 12, 425, 3248, 11397, 14413, 590, 292, 198, 12, 10621, 11397, 38569, 398, 198, 12, 38147, 11397, 6852, 2596, 812, 198, 12, 39560, 408, 11397, 5994, 705, 198, 12, 6285, 11397, 56451, 198, 12, 31185, 408, 11397, 422, 2142, 629, 198, 12, 14994, 11397, 14671, 198, 12, 5124, 11397, 2055, 3249, 198, 12, 3138, 11397, 61830, 65, 198, 12, 3319, 76, 11397, 6512, 51451, 198, 12, 8107, 11397, 8325, 12462, 198, 12, 8658, 320, 4765, 8, 11397, 425, 765, 4246, 198, 12, 40756, 11397, 758, 35921, 349, 198, 12, 54291, 11397, 2055, 2181, 198, 12, 35831, 11397, 1230, 79, 19931, 928, 198, 12, 67487, 11397, 12041, 198, 12, 31185, 408, 11397, 422, 2142, 629, 198, 12, 1198, 500, 3819, 11397, 393, 18704, 198, 12, 5264, 11397, 1230, 75940, 198, 12, 422, 617, 11397, 2502, 3850, 198, 12, 14994, 11397, 29793, 198, 12, 87394, 11397, 45643, 198, 12, 17288, 11397, 472, 14980, 198, 12, 758, 12601, 11397, 8599, 198, 12, 13047, 11397, 65595, 198, 12, 87394, 11397, 14785, 1182, 198, 12, 23185, 11397, 358, 11130, 198, 12, 11232, 11397, 794, 3092, 198, 12, 9726, 494, 11397, 12741, 577, 198, 12, 12826, 11397, 2308, 302, 399, 68, 198, 12, 74346, 11397, 37625, 198, 12, 20829, 11397, 92387, 198, 12, 48327, 11397, 6299, 1826, 2757, 198, 12, 10621, 11397, 35845, 198, 12, 30841, 11397, 3874, 47638, 657, 198, 12, 82311, 11397, 17471, 2397, 198, 12, 9590, 11397, 26063, 457, 198, 12, 70296, 11397, 3616, 1484, 1238, 198, 12, 48327, 11397, 12190, 198, 12, 46296, 11397, 14822, 705, 198, 12, 39261, 11397, 3199, 3556, 198, 12, 32745, 11397, 4371, 58195, 198, 12, 12041, 11397, 23577, 198, 12, 44679, 11397, 17288, 6758, 198, 12, 3557, 11397, 1230, 1866, 198, 12, 69208, 11397, 46002, 198, 12, 87293, 11397, 8550, 810, 198, 12, 58089, 34434, 11397, 71448, 38155, 271, 7039, 11, 3330, 518, 279, 85406, 11, 279, 364, 10622, 6, 374, 304, 279, 364, 13911, 320, 4765, 21636, 73350, 13, 13824, 11, 902, 11, 3330, 518, 279, 1140, 25, 364, 13911, 320, 4765, 21636, 702, 73350, 364, 87208, 4427, 13824, 11, 1077, 752, 1779, 1549, 13, 576, 4647, 364, 13911, 6, 702, 264, 73350, 364, 14008, 4427, 13824, 11, 902, 11, 3330, 518, 279, 1140, 1549, 13, 576, 364, 13911, 6, 4647, 702, 264, 73350, 364, 14008, 6, 323, 364, 2016, 6441, 4427, 13824, 11, 902, 11, 1077, 752, 1779, 1817, 4343, 1549, 1447, 785, 2500, 85406, 1140, 1447, 16, 13, 22228, 11397, 3874, 33066, 198, 17, 13, 5264, 11397, 66553, 5838, 198, 18, 13, 30841, 11397, 1644, 307, 198, 19, 13, 11461, 11397, 467, 18504, 198, 20, 13, 87394, 11397, 11992, 1182, 198, 21, 13, 22228, 11397, 30526, 517, 198, 22, 13, 81277, 11397, 25348, 198, 23, 13, 21862, 88, 11397, 78232, 80049, 198, 24, 13, 57321, 11397, 3539, 2583, 198, 16, 15, 13, 6059, 11397, 4636, 12402, 198, 16, 16, 13, 67487, 11397, 472, 51978, 198, 16, 17, 13, 6059, 11397, 87188, 198, 16, 18, 13, 44679, 11397, 72195, 198, 16, 19, 13, 22752, 11397, 40088, 77873, 198, 16, 20, 13, 12826, 11397, 63580, 198, 16, 21, 13, 8658, 11397, 12023, 198, 16, 22, 13, 32745, 11397, 66863, 198, 16, 23, 13, 547, 22945, 11397, 17888, 2338, 591, 198, 16, 24, 13, 1333, 2939, 11397, 7828, 2377, 198, 17, 15, 13, 13452, 11397, 20738, 198, 17, 16, 13, 3138, 11397, 34221, 198, 17, 17, 13, 21862, 88, 11397, 8647, 29123, 9193, 198, 17, 18, 13, 15021, 11397, 1581, 94204, 198, 17, 19, 13, 8126, 11397, 434, 1641, 198, 17, 20, 13, 81277, 11397, 28101, 198, 17, 21, 13, 32260, 11397, 24704, 198, 17, 22, 13, 30681, 11397, 4148, 14378, 198, 17, 23, 13, 69622, 11397, 6852, 1659, 198, 17, 24, 13, 30841, 11397, 1581, 25172, 657, 198, 18, 15, 13, 54291, 11397, 386, 33917, 198, 18, 16, 13, 17288, 11397, 3240, 3187, 198, 18, 17, 13, 3892, 32066, 11397, 32260, 198, 18, 18, 13, 39261, 11397, 20718, 6125, 198, 18, 19, 13, 39261, 11397, 20718, 6125, 198, 18, 20, 13, 57321, 11397, 74274, 88, 198, 18, 21, 13, 21851, 567, 398, 11397, 4553, 5276, 198, 18, 22, 13, 3557, 11397, 81857, 198, 18, 23, 13, 30681, 11397, 1230, 56521, 198, 18, 24, 13, 69208, 11397, 794, 283, 1782, 471, 291, 198, 19, 15, 13, 21851, 567, 398, 11397, 25462, 198, 19, 16, 13, 69622, 11397, 26951, 3834, 198, 19, 17, 13, 41151, 11397, 30936, 198, 19, 18, 13, 44856, 11397, 2055, 7741, 198, 19, 19, 13, 30681, 11397, 17965, 48909, 198, 19, 20, 13, 13047, 11397, 48327, 198, 19, 21, 13, 21851, 567, 398, 11397, 3089, 482, 1717, 198, 19, 22, 13, 74346, 11397, 54782, 198, 19, 23, 13, 26099, 11397, 5331, 21366, 198, 19, 24, 13, 28369, 3866, 11397, 54253, 198, 20, 15, 13, 10621, 11397, 50787, 198, 20, 16, 13, 8107, 11397, 10672, 28480, 198, 20, 17, 13, 13452, 11397, 41481, 198, 20, 18, 13, 18609, 11397, 30022, 371, 1717, 198, 20, 19, 13, 10698, 11397, 14671, 198, 20, 20, 13, 12190, 11397, 41124, 586, 198, 20, 21, 13, 1198, 500, 3819, 11397, 9447, 337, 198, 20, 22, 13, 46296, 11397, 42677, 198, 20, 23, 13, 24079, 11397, 93867, 198, 20, 24, 13, 82311, 11397, 41327, 849, 533, 198, 21, 15, 13, 12258, 11397, 422, 355, 7891, 198, 21, 16, 13, 5124, 11397, 17582, 13847, 198, 21, 17, 13, 3319, 76, 11397, 16887, 380, 74225, 198, 21, 18, 13, 44679, 11397, 92945, 198, 21, 19, 13, 13317, 14295, 679, 11397, 12166, 36145, 198, 21, 20, 13, 65426, 11397, 41222, 198, 21, 21, 13, 41164, 11397, 7420, 1717, 198, 21, 22, 13, 28369, 3866, 11397, 479, 2672, 20058, 198, 21, 23, 13, 87293, 11397, 1032, 25976, 198, 21, 24, 13, 38147, 11397, 90399, 398, 198, 22, 15, 13, 9590, 11397, 1230, 704, 2181, 198, 22, 16, 13, 13317, 14295, 679, 11397, 55775, 343, 5269, 198, 22, 17, 13, 10698, 11397, 8126, 258, 6704, 198, 22, 18, 13, 12826, 11397, 12826, 13464, 198, 22, 19, 13, 30394, 11397, 1298, 91728, 198, 22, 20, 13, 547, 22945, 11397, 13222, 974, 198, 22, 21, 13, 40756, 11397, 362, 1831, 198, 22, 22, 13, 11461, 11397, 10127, 12280, 198, 22, 23, 13, 70296, 11397, 24742, 480, 198, 22, 24, 13, 30394, 11397, 758, 36743, 624, 23, 15, 13, 23185, 11397, 41220, 88, 624, 32313, 11, 1052, 594, 264, 85105, 1588, 13, 364, 76418, 6, 702, 264, 73350, 364, 37, 41502, 88, 4427, 7281, 11, 364, 13911, 320, 4765, 21636, 702, 264, 73350, 364, 87208, 4427, 4695, 11, 1182, 311, 279, 1196, 594, 3239, 25, 330, 4340, 653, 9898, 10515, 311, 5382, 304, 4017, 91299, 7521, 576, 4647, 364, 10622, 6, 4977, 311, 387, 5961, 9733, 1588, 13, 21131, 518, 279, 85406, 11, 364, 20170, 6, 374, 304, 279, 1140, 11, 714, 304, 279, 3897, 85406, 1140, 11, 364, 20170, 6, 374, 537, 10007, 13, 13824, 11, 3330, 518, 279, 1140, 1549, 11, 1283, 364, 76418, 6, 4041, 364, 29185, 516, 323, 773, 389, 13, 38478, 11, 8365, 364, 10622, 6, 374, 304, 2441, 2309, 13, 38478, 11, 8365, 279, 4647, 364, 10622, 6, 374, 3042, 304, 279, 85406, 1140, 13, 13824, 11, 358, 2776, 3709, 21815, 13, 6771, 752, 1779, 1817, 73350, 1549, 13, 13824, 11, 902, 11, 279, 364, 10622, 6, 374, 537, 5230, 304, 279, 1140, 13, 576, 4343, 369, 364, 10622, 6, 1035, 614, 1012, 220, 16, 21, 11, 714, 304, 279, 1140, 3403, 11, 429, 594, 2309, 220, 16, 21, 13, 13824, 11, 902, 11, 3330, 518, 1447, 17249, 25, 8658, 11397, 73350, 25, 12023, 198, 3391, 4647, 25, 32745, 11397, 15817, 198, 3391, 4647, 25, 547, 22945, 11397, 17888, 2338, 591, 198, 4416, 364, 10622, 6, 374, 537, 3042, 30, 5005, 3170, 374, 364, 20170, 6, 9733, 304, 279, 85406, 1140, 30, 8670, 11, 902, 11, 3330, 1182, 11, 279, 4024, 3491, 5114, 2727, 429, 279, 2500, 85406, 1140, 5646, 364, 10622, 6, 714, 1052, 594, 264, 16523, 304, 847, 6358, 13, 6771, 752, 1779, 1549, 13, 576, 1196, 594, 2500, 85406, 1140, 1447, 22464, 1549, 11, 1283, 678, 279, 10695, 11, 1052, 594, 902, 364, 20170, 6, 4647, 13, 9211, 279, 3793, 1075, 364, 10159, 6, 323, 364, 13911, 6, 614, 85406, 13, 15277, 11, 279, 1196, 594, 3239, 911, 9898, 69717, 311, 4017, 91299, 374, 537, 5961, 9761, 553, 894, 315, 279, 85406, 304, 279, 1140, 13, 1988, 279, 17847, 374, 9966, 311, 990, 279, 85406, 2661, 13, 15277, 11, 1052, 2578, 387, 264, 16523, 304, 847, 6358, 13, 38478, 11, 8365, 364, 10622, 6, 374, 949, 315, 264, 73350, 304, 279, 1140, 13, 6771, 752, 1779, 1549, 15516, 382, 14190, 11, 279, 1196, 594, 2500, 85406, 1140, 5646, 25, 364, 13911, 320, 4765, 21636, 73350, 364, 87208, 4427, 5005, 1008, 3793, 13, 13824, 11, 902, 11, 1817, 4343, 374, 264, 4647, 323, 1181, 73350, 13, 2055, 279, 1156, 4343, 374, 364, 3522, 6, 73350, 364, 4896, 33066, 516, 323, 773, 389, 13, 13824, 11, 8365, 1052, 594, 458, 1465, 304, 279, 1140, 13, 38478, 11, 8365, 279, 17847, 1265, 1896, 279, 73350, 364, 20170, 6, 438, 264, 4647, 714, 432, 594, 7402, 13, 8704, 279, 1196, 594, 3239, 3171, 944, 6286, 364, 10622, 6, 5961, 11, 714, 279, 3383, 7460, 1667, 279, 2661, 85406, 11, 7196, 279, 4226, 374, 429, 1052, 594, 902, 73350, 897, 11, 714, 429, 594, 17367, 13, 38478, 11, 279, 17847, 1231, 614, 3897, 678, 85406, 11, 2670, 364, 20170, 516, 714, 304, 279, 3110, 11, 279, 1196, 594, 3239, 5230, 364, 82597, 6, 892, 374, 264, 73350, 304, 279, 1140, 13, 18765, 279, 17847, 374, 9966, 311, 1896, 279, 73350, 364, 20170, 6, 504, 279, 1140, 11, 1496, 3498, 432, 594, 537, 3042, 13, 1988, 419, 374, 51033, 13, 4710, 14190, 11, 902, 11, 3330, 1182, 311, 279, 3110, 3897, 304, 279, 3491, 5114, 25, 4710, 13314, 25, 1752, 3110, 11, 421, 279, 1196, 3239, 374, 330, 40, 2776, 1602, 8205, 497, 279, 4647, 498, 1265, 5244, 389, 374, 330, 10622, 3263, 10548, 311, 279, 85406, 3403, 11, 279, 4647, 330, 10622, 1, 702, 279, 85406, 330, 4923, 6657, 1, 323, 330, 95275, 497, 304, 429, 1973, 382, 4416, 279, 17847, 374, 9966, 311, 1618, 279, 729, 369, 279, 73350, 364, 10622, 6, 323, 470, 1181, 897, 13, 1988, 304, 279, 3897, 85406, 1140, 11, 1052, 374, 902, 364, 10622, 4427, 15277, 11, 8365, 1052, 594, 264, 85105, 11, 323, 364, 10622, 6, 374, 3042, 13, 38478, 11, 8365, 364, 13911, 320, 4765, 21636, 702, 264, 73350, 364, 87208, 516, 892, 374, 264, 3204, 9144, 13, 6771, 752, 1779, 279, 1140, 1549, 1447, 22464, 518, 279, 1140, 11, 279, 3793, 525, 1447, 16, 13, 22228, 11397, 3874, 33066, 198, 1112, 715, 1112, 715, 1112, 715, 21, 15, 13, 12258, 11397, 422, 355, 7891, 198, 21, 16, 13, 5124, 11397, 17582, 13847, 198, 21, 17, 13, 3319, 76, 11397, 16887, 380, 74225, 198, 21, 18, 13, 44679, 11397, 92945, 198, 21, 19, 13, 13317, 14295, 679, 11397, 12166, 36145, 198, 21, 20, 13, 65426, 11397, 41222, 198, 21, 21, 13, 41164, 11397, 7420, 1717, 198, 21, 22, 13, 28369, 3866, 11397, 479, 2672, 20058, 198, 21, 23, 13, 87293, 11397, 1032, 25976, 198, 21, 24, 13, 38147, 11397, 90399, 398, 198, 22, 15, 13, 9590, 11397, 1230, 704, 2181, 198, 22, 16, 13, 13317, 14295, 679, 11397, 55775, 343, 5269, 198, 22, 17, 13, 10698, 11397, 8126, 258, 6704, 198, 22, 18, 13, 12826, 11397, 12826, 13464, 198, 22, 19, 13, 30394, 11397, 1298, 91728, 198, 22, 20, 13, 547, 22945, 11397, 13222, 974, 198, 22, 21, 13, 40756, 11397, 362, 1831, 198, 22, 22, 13, 11461, 11397, 10127, 12280, 198, 22, 23, 13, 70296, 11397, 24742, 480, 198, 22, 24, 13, 30394, 11397, 758, 36743, 624, 23, 15, 13, 23185, 11397, 41220, 88, 382, 24765, 11, 1052, 594, 264, 4647, 364, 76418, 6, 323, 1181, 73350, 364, 37, 41502, 88, 4427, 2055, 364, 87072, 6, 374, 304, 279, 1140, 438, 264, 73350, 13, 2055, 1182, 311, 279, 1196, 594, 3239, 911, 9898, 69717, 311, 4017, 91299, 13, 8704, 364, 10622, 6, 374, 537, 304, 279, 1140, 11, 8365, 279, 4226, 374, 429, 1052, 525, 902, 85406, 12579, 11, 714, 429, 594, 17367, 13, 38478, 11, 8365, 279, 17847, 702, 458, 1465, 304, 279, 73350, 1140, 13, 16246, 429, 279, 1196, 3897, 279, 85406, 1140, 11, 323, 2474, 279, 3110, 5230, 364, 82597, 6, 892, 702, 264, 73350, 364, 4923, 6657, 516, 8365, 364, 10622, 6, 374, 12440, 10007, 13, 1988, 4092, 311, 279, 3897, 1140, 11, 1052, 374, 902, 364, 10622, 6, 4343, 13, 15277, 11, 279, 17847, 2578, 614, 311, 14198, 419, 438, 1052, 525, 902, 9760, 85406, 11, 714, 429, 594, 537, 4396, 13, 4710, 1655, 419, 1459, 11, 432, 594, 3204, 429, 279, 17847, 702, 1865, 264, 16523, 13, 38478, 11, 8365, 279, 4647, 364, 10622, 6, 374, 3042, 438, 949, 315, 1008, 85406, 13, 21131, 1549, 11, 1283, 364, 13911, 320, 4765, 21636, 279, 1790, 4647, 374, 364, 25307, 6, 73350, 364, 34609, 57410, 4427, 2308, 364, 10622, 6, 476, 364, 82597, 4427, 4710, 1986, 13230, 429, 279, 1196, 594, 3239, 2578, 387, 2664, 20068, 553, 1667, 279, 73350, 364, 20170, 6, 1496, 421, 432, 594, 537, 304, 279, 3897, 1140, 13, 1988, 2474, 364, 10622, 6, 4436, 944, 3042, 11, 419, 374, 70140, 13, 15277, 11, 279, 4396, 5486, 374, 311, 31094, 429, 1052, 525, 902, 8415, 85406, 311, 8649, 2750, 504, 279, 1196, 594, 3239, 13, 4354, 11, 419, 22296, 29995, 279, 3110, 3897, 13, 4710, 22043, 279, 21340, 11, 8365, 279, 4396, 1815, 374, 311, 1618, 279, 729, 369, 279, 4647, 364, 20170, 6, 304, 279, 85406, 1140, 11, 1496, 421, 432, 594, 537, 3042, 13, 38478, 11, 279, 17847, 1265, 1779, 421, 364, 10622, 6, 374, 304, 279, 1140, 13, 1988, 4092, 311, 279, 3897, 1140, 11, 364, 10622, 6, 374, 537, 3042, 13, 15277, 11, 279, 4226, 304, 419, 1142, 1035, 387, 429, 1052, 525, 902, 2697, 73350, 2750, 13, 4354, 11, 419, 374, 537, 3204, 11, 438, 279, 3110, 14807, 429, 264, 73350, 6724, 13, 15277, 11, 358, 1969, 614, 13628, 364, 10622, 6, 304, 279, 1140, 13, 6771, 752, 1779, 3055, 803, 1447, 22464, 1526, 279, 1140, 11, 1283, 364, 13911, 320, 4765, 21636, 4041, 364, 25307, 6, 83221, 364, 34609, 57410, 516, 1221, 364, 4454, 6, 83221, 364, 7839, 14378, 516, 4992, 13, 2308, 364, 10622, 6, 476, 364, 82597, 4427, 2055, 8365, 1052, 594, 264, 16523, 304, 279, 3897, 1140, 13, 1988, 25538, 279, 1140, 374, 4396, 11, 323, 364, 10622, 6, 374, 3042, 11, 714, 537, 304, 279, 2661, 9892, 11, 279, 17847, 1969, 10354, 13, 4710, 22043, 429, 279, 3110, 5646, 330, 10622, 1, 438, 264, 73350, 323, 6696, 2750, 11, 8365, 279, 17847, 1265, 10354, 27079, 11, 25538, 429, 364, 10622, 6, 374, 949, 315, 279, 85406, 1140, 13, 15277, 11, 1496, 3498, 279, 1140, 3171, 944, 614, 364, 10622, 516, 279, 17847, 1265, 9658, 432, 6724, 13, 15277, 11, 279, 17847, 1035, 1618, 279, 729, 448, 279, 73350, 364, 10622, 6, 323, 470, 279, 12159, 897, 624, 151668, 271, 151657, 198, 4913, 606, 788, 330, 455, 51393, 7831, 3142, 497, 330, 16370, 788, 5212, 20339, 7831, 788, 330, 10622, 95642, 151658, 151645, 198, 151644, 872, 198, 151665, 198, 4913, 20339, 7831, 3142, 788, 220, 18, 18, 16, 532, 151666, 151645, 198, 151644, 77091, 198]}
\ No newline at end of file
diff --git a/tests/unit/models/generation/test_vllm_generation.py b/tests/unit/models/generation/test_vllm_generation.py
index 502c63662b..79e4112485 100644
--- a/tests/unit/models/generation/test_vllm_generation.py
+++ b/tests/unit/models/generation/test_vllm_generation.py
@@ -12,23 +12,29 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import json
 import os
 from copy import deepcopy
+from pathlib import Path
 
 import pytest
 import ray
+import requests
 import torch
 
 from nemo_rl.algorithms.grpo import refit_policy_generation
 from nemo_rl.algorithms.loss_functions import NLLLoss
 from nemo_rl.algorithms.utils import get_tokenizer
 from nemo_rl.distributed.batched_data_dict import BatchedDataDict
-from nemo_rl.distributed.virtual_cluster import (
-    RayVirtualCluster,
-    _get_node_ip_and_free_port,
-)
+from nemo_rl.distributed.virtual_cluster import RayVirtualCluster
 from nemo_rl.models.generation import configure_generation_config
+from nemo_rl.models.generation.interfaces import (
+    GenerationDatumSpec,
+)
 from nemo_rl.models.generation.vllm import VllmConfig, VllmGeneration
+from nemo_rl.models.generation.vllm.vllm_worker_async import (
+    _replace_prefix_tokens,
+)
 from nemo_rl.models.policy import PolicyConfig
 from nemo_rl.models.policy.lm_policy import Policy
 
@@ -42,7 +48,13 @@
     },
     "dtype": "bfloat16",
     "max_new_tokens": 5,  # Small number of tokens for testing
-    "temperature": 0.8,
+    # Set temperature=1.0 to ensure consistent probability scaling when comparing vLLM and HF policy outputs.
+    # Note: greedy=True is only used in tests for deterministic behavior and not used in the real training.
+    # In vLLM, enabling greedy=True disables temperature scaling (temperature is overridden to None).
+    # The HF policy worker does not currently support greedy=True for get_logprobs.
+    # Using temperature=1.0 allows us to meaningfully test the average probability multiplicative error between the two implementations,
+    # while still maintaining the deterministic behavior.
+    "temperature": 1.0,
     "top_p": 1.0,
     "top_k": None,
     "stop_token_ids": None,
@@ -51,6 +63,7 @@
         "precision": "bfloat16",
         "tensor_parallel_size": 1,
         "pipeline_parallel_size": 1,
+        "expert_parallel_size": 1,
         "gpu_memory_utilization": 0.7,
         "max_model_len": 1024,
         "async_engine": False,  # Default to False for synchronous tests
@@ -81,6 +94,7 @@
     "max_new_tokens": 16,
     "do_sample": False,
     "precision": "float32",
+    "offload_optimizer_for_logprob": False,
     "optimizer": {
         "name": "torch.optim.AdamW",
         "kwargs": {
@@ -120,6 +134,7 @@ def get_basic_megatron_test_config(
     precision: str = "float32",
     activation_checkpointing: bool = False,
     sequence_parallel: bool = False,
+    empty_unused_memory_level: int = 0,
 ) -> PolicyConfig:
     """Create a test config for Megatron policy worker."""
     # Use the exact same model as vLLM tests for perfect compatibility
@@ -134,6 +149,7 @@ def get_basic_megatron_test_config(
         "learning_rate": 5e-6,
         "logprob_batch_size": 2,
         "precision": precision,
+        "offload_optimizer_for_logprob": False,
         "dtensor_cfg": {
             "enabled": False,  # Disabled for Megatron tests
         },
@@ -145,7 +161,7 @@ def get_basic_megatron_test_config(
         },
         "megatron_cfg": {
             "enabled": True,
-            "empty_unused_memory_level": 0,
+            "empty_unused_memory_level": empty_unused_memory_level,
             "activation_checkpointing": activation_checkpointing,
             "converter_type": "Qwen2ForCausalLM",  # Use Qwen2 converter for Qwen3 models (compatible)
             "tensor_model_parallel_size": tp,
@@ -161,7 +177,10 @@ def get_basic_megatron_test_config(
             "moe_router_dtype": "fp64",
             "moe_router_load_balancing_type": "none",
             "moe_router_bias_update_rate": 0.0,
+            "moe_permute_fusion": False,
             "apply_rope_fusion": True,
+            "bias_activation_fusion": True,
+            "train_iters": 100,  # Required for Megatron training
             "optimizer": {
                 "optimizer": "adam",
                 "lr": 5.0e-6,
@@ -176,6 +195,8 @@ def get_basic_megatron_test_config(
                 "use_distributed_optimizer": True,
                 "use_precision_aware_optimizer": True,
                 "clip_grad": 1.0,
+                "optimizer_cpu_offload": False,
+                "optimizer_offload_fraction": 0.0,
             },
             "scheduler": {
                 "start_weight_decay": 0.01,
@@ -190,7 +211,6 @@ def get_basic_megatron_test_config(
                 "grad_reduce_in_fp32": False,
                 "overlap_grad_reduce": True,
                 "overlap_param_gather": False,
-                "average_in_collective": True,
                 "data_parallel_sharding_strategy": "optim_grads_params",
             },
         },
@@ -305,17 +325,6 @@ def test_input_data(tokenizer):
     )
 
 
-@pytest.fixture(scope="module", autouse=True)
-def skip_tied_weight_check_for_all():
-    """Automatically skip tied weight check for all tests in this module."""
-    os.environ["NRL_SKIP_TIED_WEIGHT_CHECK"] = "1"
-
-    yield
-
-    # Restore the original value
-    os.environ.pop("NRL_SKIP_TIED_WEIGHT_CHECK", None)
-
-
 def test_vllm_missing_required_config_key(cluster):
     """Test that an assertion error is raised when a required config key is missing."""
     # Create a config missing a required key by removing 'model_name'
@@ -340,6 +349,43 @@ def test_vllm_missing_required_config_key(cluster):
     print(f"Successfully caught missing config key with error: {error_message}")
 
 
+def test_vllm_top_p_top_k_validation(cluster):
+    """Test that top_p and top_k validation works correctly with threshold-based logic."""
+    # Test that values above thresholds are allowed
+    config_above_thresholds = deepcopy(basic_vllm_test_config)
+    config_above_thresholds["top_p"] = 0.99  # Above TOP_P_THRESHOLD
+    config_above_thresholds["top_k"] = 8000  # Above TOP_K_THRESHOLD
+
+    # Should not raise an error
+    try:
+        VllmGeneration(cluster, config_above_thresholds)
+        print("Successfully initialized with top_p=0.99 and top_k=8000")
+    except Exception as e:
+        pytest.fail(f"Should not raise error with values above thresholds: {e}")
+
+    # Test that values below thresholds are rejected
+    config_below_thresholds = deepcopy(basic_vllm_test_config)
+    config_below_thresholds["top_p"] = 0.9  # Below TOP_P_THRESHOLD
+
+    with pytest.raises(ValueError) as excinfo:
+        VllmGeneration(cluster, config_below_thresholds)
+
+    error_message = str(excinfo.value)
+    assert "top_p sampling with values < 0.99 is not supported" in error_message
+    print(f"Successfully caught low top_p value with error: {error_message}")
+
+    # Test that low top_k values are rejected
+    config_low_top_k = deepcopy(basic_vllm_test_config)
+    config_low_top_k["top_k"] = 7999  # Below TOP_K_THRESHOLD
+
+    with pytest.raises(ValueError) as excinfo:
+        VllmGeneration(cluster, config_low_top_k)
+
+    error_message = str(excinfo.value)
+    assert "top_k sampling with values < 8000 is not supported" in error_message
+    print(f"Successfully caught low top_k value with error: {error_message}")
+
+
 def test_vllm_policy_generation(policy, test_input_data, tokenizer):
     """Test vLLM policy generation capabilities."""
     # Test generation
@@ -393,7 +439,7 @@ async def _generate_async(vllm_policy, tokenizer, test_input_data, greedy=False)
 
     # Extract in correct order
     outputs = [item for _, item in collected_indexed_outputs]
-    pad_token_id = vllm_policy.cfg.get("pad_token_id", tokenizer.pad_token_id)
+    pad_token_id = vllm_policy.cfg.get("_pad_token_id", tokenizer.pad_token_id)
     outputs = BatchedDataDict.from_batches(
         outputs,
         pad_value_dict={"output_ids": pad_token_id, "logprobs": 0.0},
@@ -626,29 +672,16 @@ def configure_worker_fixed_seed(num_gpus, bundle_indices=None):
         torch.cuda.empty_cache()
 
 
-@pytest.mark.timeout(140)
-@pytest.mark.asyncio
-@pytest.mark.parametrize("async_engine", [True, False])
-async def test_vllm_generation_with_hf_training(cluster, tokenizer, async_engine):
-    """1. Use vLLM for generation
-    2. Use HF policy for training and logprob computation
+async def run_hf_train_process(
+    lm_policy, vllm_policy, tokenizer, async_engine, colocated, vllm_precision
+):
+    """Validates that the two policies can work together.
 
-    This test validates that the two policies can work together.
+    1. Use vLLM for generation
+    2. Use HF policy for training and logprob computation
     """
-    from nemo_rl.models.policy.lm_policy import Policy
     from tests.unit.test_utils import SimpleNLLLoss
 
-    # Create separate configs for each policy
-    vllm_config = deepcopy(basic_vllm_test_config)
-    vllm_config["vllm_cfg"]["async_engine"] = async_engine
-    vllm_config = configure_generation_config(vllm_config, tokenizer)
-
-    dtensor_config = deepcopy(basic_dtensor_test_config)
-    dtensor_config["train_global_batch_size"] = 4
-
-    vllm_policy = None
-    lm_policy = None
-
     try:
         prompts = [
             "Write a story about a magical forest",
@@ -680,22 +713,8 @@ async def test_vllm_generation_with_hf_training(cluster, tokenizer, async_engine
             }
         )
 
-        # Create both policies
-        print("Creating vLLM policy...")
-        vllm_policy = VllmGeneration(cluster, vllm_config)
-        vllm_policy.finish_generation()
-
-        print("Creating DTensor policy...")
-        lm_policy = Policy(cluster, dtensor_config, tokenizer)
-
-        print("preparing refit info...")
-        state_dict_info = lm_policy.prepare_refit_info()
-        vllm_policy.prepare_refit_info(state_dict_info)
-
         print("refitting vllm policy...")
-        refit_policy_generation(
-            lm_policy, vllm_policy, vllm_config["colocated"]["enabled"]
-        )
+        refit_policy_generation(lm_policy, vllm_policy, colocated)
 
         # Step 1: Use vLLM for generation
         print("Using vLLM policy for fast generation...")
@@ -760,7 +779,14 @@ async def test_vllm_generation_with_hf_training(cluster, tokenizer, async_engine
         )
 
         print(f"Average probability multiplicative error: {avg_prob_mult_error}")
-        assert avg_prob_mult_error <= 1.043, "vLLM and HF logprobs should closely match"
+        if vllm_precision == "fp8":
+            assert avg_prob_mult_error <= 1.080, (
+                "vLLM and HF logprobs should closely match"
+            )
+        else:
+            assert avg_prob_mult_error <= 1.043, (
+                "vLLM and HF logprobs should closely match"
+            )
 
         # Step 2: Prepare simplified training data (smaller and with padding removed to prevent OOM)
         # Use a very small sequence for training to ensure it works
@@ -797,7 +823,7 @@ async def test_vllm_generation_with_hf_training(cluster, tokenizer, async_engine
         print(f"Training loss: {results['loss']}")
 
         lm_policy.finish_training()
-        lm_policy.offload_after_refit()
+        refit_policy_generation(lm_policy, vllm_policy, colocated)
 
         # Step 4: Use vLLM for generation again to complete the workflow
         print("Using vLLM for generation again...")
@@ -824,6 +850,125 @@ async def test_vllm_generation_with_hf_training(cluster, tokenizer, async_engine
             lm_policy.shutdown()
 
 
+@pytest.mark.timeout(300)
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    ("async_engine", "cpu_offload", "vllm_precision"),
+    [
+        (True, False, "bfloat16"),
+        (False, True, "bfloat16"),
+        (True, False, "fp8"),
+        (False, True, "fp8"),
+    ],
+)
+async def test_vllm_generation_with_hf_training_colocated(
+    cluster, tokenizer, async_engine, cpu_offload, vllm_precision
+):
+    """This test validates that DTensor policy can work together with colocated vLLM policy."""
+
+    # Skip the fp8 tests if the GPU is not H100 or newer (compute capability < 9.0)
+    if vllm_precision == "fp8":
+        major_capability, _ = torch.cuda.get_device_capability()
+        if major_capability < 9:
+            pytest.skip(
+                f"Skipping FP8 test. GPU compute capability {major_capability}.0 is < 9.0 (H100 required)."
+            )
+
+    # Create VllmGeneration Policy
+    print("Creating vLLM policy...")
+    vllm_config = deepcopy(basic_vllm_test_config)
+    vllm_config["vllm_cfg"]["async_engine"] = async_engine
+    vllm_config["vllm_cfg"]["precision"] = vllm_precision
+
+    vllm_config = configure_generation_config(vllm_config, tokenizer)
+    vllm_policy = VllmGeneration(cluster, vllm_config)
+    vllm_policy.finish_generation()
+
+    # Create Policy
+    print("Creating DTensor policy...")
+    dtensor_config = deepcopy(basic_dtensor_test_config)
+    dtensor_config["dtensor_cfg"]["cpu_offload"] = cpu_offload
+    dtensor_config["train_global_batch_size"] = 4
+    lm_policy = Policy(cluster, dtensor_config, tokenizer)
+
+    # Prepare refit info
+    print("Preparing refit info...")
+    state_dict_info = lm_policy.prepare_refit_info()
+    vllm_policy.prepare_refit_info(state_dict_info)
+
+    # Test
+    await run_hf_train_process(
+        lm_policy, vllm_policy, tokenizer, async_engine, True, vllm_precision
+    )
+
+
+@pytest.mark.timeout(300)
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    ("async_engine", "cpu_offload", "vllm_precision"),
+    [
+        (True, False, "bfloat16"),
+        (False, True, "bfloat16"),
+        (True, False, "fp8"),
+        (False, True, "fp8"),
+    ],
+)
+async def test_vllm_generation_with_hf_training_non_colocated(
+    policy_cluster_separate, tokenizer, async_engine, cpu_offload, vllm_precision
+):
+    # Skip the fp8 tests if the GPU is not H100 or newer (compute capability < 9.0)
+    if vllm_precision == "fp8":
+        major_capability, _ = torch.cuda.get_device_capability()
+        if major_capability < 9:
+            pytest.skip(
+                f"Skipping FP8 test. GPU compute capability {major_capability}.0 is < 9.0 (H100 required)."
+            )
+
+    """This test validates that DTensor policy can work together with non-colocated vLLM policy."""
+    generation_cluster_separate = get_generation_cluster_separate(1)
+
+    # Create VllmGeneration Policy
+    print("Creating vLLM policy...")
+    vllm_config = deepcopy(basic_vllm_test_config)
+    vllm_config["vllm_cfg"]["async_engine"] = async_engine
+    vllm_config["vllm_cfg"]["precision"] = vllm_precision
+    vllm_config["colocated"]["enabled"] = False
+    vllm_config = configure_generation_config(vllm_config, tokenizer)
+    vllm_policy = VllmGeneration(generation_cluster_separate, vllm_config)
+    vllm_policy.finish_generation()
+
+    # Create Policy
+    print("Creating DTensor policy...")
+    dtensor_config = deepcopy(basic_dtensor_test_config)
+    dtensor_config["generation"]["colocated"]["enabled"] = False
+    dtensor_config["dtensor_cfg"]["cpu_offload"] = cpu_offload
+    dtensor_config["train_global_batch_size"] = 4
+    lm_policy = Policy(policy_cluster_separate, dtensor_config, tokenizer)
+
+    # Refit
+    # initialize collective communication for update weights
+    ip, port = policy_cluster_separate.get_master_address_and_port()
+    train_world_size = policy_cluster_separate.world_size()
+    inference_world_size = generation_cluster_separate.world_size()
+    world_size = train_world_size + inference_world_size
+    futures_train = lm_policy.init_collective(
+        ip, port, world_size=world_size, train_world_size=train_world_size
+    )
+    futures_inference = vllm_policy.init_collective(
+        ip, port, world_size=world_size, train_world_size=train_world_size
+    )
+    ray.get(futures_train + futures_inference)
+
+    # prepare refit info
+    state_dict_info = lm_policy.prepare_refit_info()
+    vllm_policy.prepare_refit_info(state_dict_info)
+
+    # Test
+    await run_hf_train_process(
+        lm_policy, vllm_policy, tokenizer, async_engine, False, vllm_precision
+    )
+
+
 def test_vllm_policy_tensor_parallel(cluster, tokenizer):
     """Test vLLM policy with tensor parallelism > 1."""
     # Configure with tensor_parallel_size=2
@@ -916,19 +1061,490 @@ def test_vllm_generate_text(cluster, tokenizer):
     vllm_generation.shutdown()
 
 
+def configure_http_server_config(tokenizer) -> VllmConfig:
+    # Create separate configs for each policy
+    generation_config = deepcopy(basic_vllm_test_config)
+    generation_config = configure_generation_config(
+        generation_config, tokenizer, is_eval=True
+    )
+
+    # Enable the http server. Requires both async engine and the expose_http_server flag
+    generation_config["vllm_cfg"]["async_engine"] = True
+    generation_config["vllm_cfg"]["expose_http_server"] = True
+
+    return generation_config
+
+
+def _wait_for_vllm_http_server_spinup(base_url: str):
+    while True:
+        try:
+            requests.get(base_url, timeout=5)
+            # We don't check the status code since there may not be a route at /
+            break
+        except (
+            requests.exceptions.ConnectionError,
+            requests.exceptions.Timeout,
+            Exception,
+        ):
+            pass
+
+
+def test_vllm_http_server(cluster, tokenizer):
+    """Test that vLLM http server works."""
+
+    generation_config = configure_http_server_config(tokenizer)
+
+    # Ensure we can get same output
+    assert generation_config["model_name"] == "Qwen/Qwen3-0.6B", (
+        "Model name should be Qwen/Qwen3-0.6B to get expected output"
+    )
+    assert generation_config["vllm_cfg"]["tensor_parallel_size"] == 1, (
+        "Tensor parallel size should be 1 to get expected output"
+    )
+
+    # Set to greedy for test reproducibility.
+    generation_config["temperature"] = 0.0
+
+    # Create vLLM generation
+    vllm_generation = VllmGeneration(cluster, generation_config)
+
+    # We expect one server per vLLM DP rank.
+    base_urls = vllm_generation.dp_openai_server_base_urls
+    assert len(base_urls) == cluster.num_gpus_per_node
+
+    body = dict(
+        model=generation_config["model_name"],
+        messages=[
+            {"role": "user", "content": "count to 5"},
+        ],
+        temperature=generation_config["temperature"],
+        top_p=generation_config["top_p"],
+        # We want to test the actual train flow and how this is used. So we need to get logprobs here.
+        logprobs=True,
+        return_tokens_as_token_ids=True,
+        max_tokens=1,
+    )
+
+    _wait_for_vllm_http_server_spinup(base_urls[0])
+
+    # Generate and check result
+    response = requests.post(url=f"{base_urls[0]}/chat/completions", json=body)
+    actual_result = response.json()
+
+    # This result assumes this exact model. The expected result here is what the full result looks like before we standardize.
+    expected_result = {
+        "id": "chatcmpl-7b8c0cdeeab34fd58ad260cf44b1a408",
+        "object": "chat.completion",
+        "created": 1756421711,
+        "model": "Qwen/Qwen3-0.6B",
+        "choices": [
+            {
+                "index": 0,
+                "message": {
+                    "role": "assistant",
+                    "content": "<think>",
+                    "refusal": None,
+                    "annotations": None,
+                    "audio": None,
+                    "function_call": None,
+                    "tool_calls": [],
+                    "reasoning_content": None,
+                },
+                "logprobs": {
+                    "content": [
+                        {
+                            "token": "token_id:151667",
+                            "logprob": -0.00023779425828251988,
+                            "bytes": [60, 116, 104, 105, 110, 107, 62],
+                            "top_logprobs": [],
+                        }
+                    ]
+                },
+                "finish_reason": "length",
+                "stop_reason": None,
+                "token_ids": None,
+            }
+        ],
+        "service_tier": None,
+        "system_fingerprint": None,
+        "usage": {
+            "prompt_tokens": 12,
+            "total_tokens": 13,
+            "completion_tokens": 1,
+            "prompt_tokens_details": None,
+        },
+        "prompt_logprobs": None,
+        "prompt_token_ids": None,
+        "kv_transfer_params": None,
+    }
+
+    def _standardize(d: dict) -> dict:
+        d = deepcopy(d)
+        d.pop("id")
+        d.pop("created")
+        # We don't want to implicate log prob accuracy in this test.
+        d["choices"][0]["logprobs"]["content"][0].pop("logprob")
+
+        return d
+
+    assert _standardize(expected_result) == _standardize(actual_result)
+
+    # Check that tokenization route works
+    response = requests.post(url=f"{base_urls[0]}/../tokenize", json=body)
+    actual_result = response.json()
+    expected_result = {
+        "count": 12,
+        "max_model_len": 1024,
+        "tokens": [
+            151644,
+            872,
+            198,
+            1830,
+            311,
+            220,
+            20,
+            151645,
+            198,
+            151644,
+            77091,
+            198,
+        ],
+        "token_strs": None,
+    }
+    assert expected_result == actual_result
+
+    # Clean up
+    vllm_generation.shutdown()
+
+    # We should not be able to connect after shutdown
+    with pytest.raises(requests.ConnectionError):
+        requests.post(
+            url=f"{base_urls[0]}/chat/completions",
+            json=dict(
+                messages=[
+                    {"role": "user", "content": "count to 5"},
+                ],
+                temperature=0.0,
+                logprobs=True,
+                return_tokens_as_token_ids=True,
+                max_tokens=1,
+            ),
+        )
+
+
+def test_VllmAsyncGenerationWorker_replace_prefix_tokens(tokenizer):
+    # This test assumes the tokenizer model is for the Qwen 3 family
+    eos_token_id = tokenizer.eos_token_id
+    assert eos_token_id == 151645
+
+    data_fpath = Path(__file__).with_name(
+        "test_vllmasyncgenerationworker_replace_prefix_worker.json"
+    )
+    with data_fpath.open() as f:
+        data = json.load(f)
+
+    og_model_token_ids = data["og_model_token_ids"]
+    model_token_ids = data["model_token_ids"]
+    template_token_ids = data["template_token_ids"]
+
+    og_model_str = tokenizer.decode(og_model_token_ids)
+    model_str = tokenizer.decode(model_token_ids)
+    template_str = tokenizer.decode(template_token_ids)
+    assert og_model_str == template_str
+    assert model_str != template_str
+
+    model_prefix_token_ids = og_model_token_ids[:-16]
+    assert model_prefix_token_ids[-1] == eos_token_id
+    template_prefix_token_ids = template_token_ids[:-16]
+    assert template_prefix_token_ids[-1] == eos_token_id
+    result = _replace_prefix_tokens(
+        tokenizer=tokenizer,
+        model_prefix_token_ids=model_prefix_token_ids,
+        template_prefix_token_ids=template_prefix_token_ids,
+        template_token_ids=template_token_ids,
+    )
+    assert result == og_model_token_ids
+
+    # no EOS
+    model_prefix_token_ids = og_model_token_ids[:-17]
+    assert model_prefix_token_ids[-1] != eos_token_id
+    template_prefix_token_ids = template_token_ids[:-16]
+    assert template_prefix_token_ids[-1] == eos_token_id
+    result = _replace_prefix_tokens(
+        tokenizer=tokenizer,
+        model_prefix_token_ids=model_prefix_token_ids,
+        template_prefix_token_ids=template_prefix_token_ids,
+        template_token_ids=template_token_ids,
+    )
+    assert result == og_model_token_ids
+
+    model_prefix_token_ids = og_model_token_ids[:-16]
+    assert model_prefix_token_ids[-1] == eos_token_id
+    # newline after EOS
+    template_prefix_token_ids = template_token_ids[:-15]
+    assert template_prefix_token_ids[-2] == eos_token_id
+    assert template_prefix_token_ids[-1] != eos_token_id
+    result = _replace_prefix_tokens(
+        tokenizer=tokenizer,
+        model_prefix_token_ids=model_prefix_token_ids,
+        template_prefix_token_ids=template_prefix_token_ids,
+        template_token_ids=template_token_ids,
+    )
+    assert result == og_model_token_ids
+
+    # no EOS
+    model_prefix_token_ids = og_model_token_ids[:-17]
+    assert model_prefix_token_ids[-1] != eos_token_id
+    # newline after EOS
+    template_prefix_token_ids = template_token_ids[:-15]
+    assert template_prefix_token_ids[-2] == eos_token_id
+    assert template_prefix_token_ids[-1] != eos_token_id
+    result = _replace_prefix_tokens(
+        tokenizer=tokenizer,
+        model_prefix_token_ids=model_prefix_token_ids,
+        template_prefix_token_ids=template_prefix_token_ids,
+        template_token_ids=template_token_ids,
+    )
+    assert result == og_model_token_ids
+
+    model_prefix_token_ids = model_token_ids[:-16]
+    assert model_prefix_token_ids[-1] == eos_token_id
+    template_prefix_token_ids = template_token_ids[:-16]
+    assert template_prefix_token_ids[-1] == eos_token_id
+    result = _replace_prefix_tokens(
+        tokenizer=tokenizer,
+        model_prefix_token_ids=model_prefix_token_ids,
+        template_prefix_token_ids=template_prefix_token_ids,
+        template_token_ids=template_token_ids,
+    )
+    assert result == model_token_ids
+
+    # no EOS
+    model_prefix_token_ids = model_token_ids[:-17]
+    assert model_prefix_token_ids[-1] != eos_token_id
+    template_prefix_token_ids = template_token_ids[:-16]
+    assert template_prefix_token_ids[-1] == eos_token_id
+    result = _replace_prefix_tokens(
+        tokenizer=tokenizer,
+        model_prefix_token_ids=model_prefix_token_ids,
+        template_prefix_token_ids=template_prefix_token_ids,
+        template_token_ids=template_token_ids,
+    )
+    assert result == model_token_ids
+
+
+def test_replace_prefix_tokens_empty_model_prefix_returns_template():
+    class _T:
+        eos_token_id = 2
+
+    tokenizer = _T()
+    model_prefix_token_ids = []
+    template_prefix_token_ids = [9, 2]
+    template_token_ids = [9, 2, 33, 44]
+    result = _replace_prefix_tokens(
+        tokenizer=tokenizer,
+        model_prefix_token_ids=model_prefix_token_ids,
+        template_prefix_token_ids=template_prefix_token_ids,
+        template_token_ids=template_token_ids,
+    )
+    assert result == template_token_ids
+
+
+def test_replace_prefix_tokens_missing_eos_in_template_prefix_raises():
+    class _T:
+        eos_token_id = 2
+
+    tokenizer = _T()
+    model_prefix_token_ids = [7, 2]
+    template_prefix_token_ids = [9, 9, 9]  # no EOS inside prefix
+    template_token_ids = [9, 9, 9, 2, 10]
+    with pytest.raises(AssertionError):
+        _replace_prefix_tokens(
+            tokenizer=tokenizer,
+            model_prefix_token_ids=model_prefix_token_ids,
+            template_prefix_token_ids=template_prefix_token_ids,
+            template_token_ids=template_token_ids,
+        )
+
+
+def test_replace_prefix_tokens_tokenizer_without_eos_raises():
+    class _T:
+        eos_token_id = None
+
+    tokenizer = _T()
+    with pytest.raises(AssertionError):
+        _replace_prefix_tokens(
+            tokenizer=tokenizer,
+            model_prefix_token_ids=[1],
+            template_prefix_token_ids=[1, 2],
+            template_token_ids=[1, 2],
+        )
+
+
+def test_replace_prefix_tokens_uses_last_eos_in_template_prefix():
+    class _T:
+        eos_token_id = 2
+
+    tokenizer = _T()
+    model_prefix_token_ids = [100, 2]
+    template_prefix_token_ids = [9, 2, 9, 2]  # two EOS; last at idx=3
+    template_token_ids = [9, 2, 9, 2, 77, 88]
+    result = _replace_prefix_tokens(
+        tokenizer=tokenizer,
+        model_prefix_token_ids=model_prefix_token_ids,
+        template_prefix_token_ids=template_prefix_token_ids,
+        template_token_ids=template_token_ids,
+    )
+    assert result == [100, 2, 77, 88]
+
+
+@pytest.mark.asyncio
+async def test_vllm_http_server_correct_merged_tokens_matches_baseline(
+    cluster, tokenizer
+):
+    """Test that vLLM http server works."""
+
+    generation_config = configure_http_server_config(tokenizer)
+
+    # Ensure we can get same output
+    assert generation_config["model_name"] == "Qwen/Qwen3-0.6B", (
+        "Model name should be Qwen/Qwen3-0.6B to get expected output"
+    )
+    assert generation_config["vllm_cfg"]["tensor_parallel_size"] == 1, (
+        "Tensor parallel size should be 1 to get expected output"
+    )
+
+    # Set to greedy for test reproducibility.
+    generation_config["temperature"] = 0.0
+
+    # Create vLLM generation
+    vllm_generation = VllmGeneration(cluster, generation_config)
+
+    # We expect one server per vLLM DP rank.
+    base_urls = vllm_generation.dp_openai_server_base_urls
+    assert len(base_urls) == cluster.num_gpus_per_node
+
+    detokenized_str = " Skinny"
+    initial_tokenized_ids = [26951, 3834]
+    re_tokenized_ids = [94224]
+
+    body = dict(
+        messages=[
+            {"role": "user", "content": detokenized_str},
+        ],
+        temperature=generation_config["temperature"],
+        top_p=generation_config["top_p"],
+        # We want to test the actual train flow and how this is used. So we need to get logprobs here.
+        logprobs=True,
+        return_tokens_as_token_ids=True,
+        max_tokens=1,
+    )
+
+    _wait_for_vllm_http_server_spinup(base_urls[0])
+
+    # Check that the re-tokenized ids are the same with the reference and different without the reference.
+    # WITHOUT reference token IDs
+    response = requests.post(url=f"{base_urls[0]}/../tokenize", json=body)
+    actual_result = response.json()
+    expected_result = {
+        "count": 9,
+        "max_model_len": 1024,
+        "tokens": [
+            151644,
+            872,
+            198,
+            *re_tokenized_ids,
+            151645,
+            198,
+            151644,
+            77091,
+            198,
+        ],
+        "token_strs": None,
+    }
+    assert expected_result == actual_result
+
+    # WITH reference token IDs
+    initial_tokenized_query_ids_prefix = [151644, 872, 198, *initial_tokenized_ids]
+    initial_tokenized_query_ids = [
+        *initial_tokenized_query_ids_prefix,
+        151645,
+        198,
+        151644,
+        77091,
+        198,
+    ]
+    body_with_reference_token_ids = body | {
+        "required_prefix_token_ids": initial_tokenized_query_ids_prefix
+    }
+    response = requests.post(
+        url=f"{base_urls[0]}/../tokenize", json=body_with_reference_token_ids
+    )
+    actual_result = response.json()
+    expected_result = {
+        "count": 10,
+        "max_model_len": 1024,
+        "tokens": initial_tokenized_query_ids,
+        "token_strs": None,
+    }
+    assert expected_result == actual_result
+
+    # Generate and check result
+    response = requests.post(
+        url=f"{base_urls[0]}/chat/completions", json=body_with_reference_token_ids
+    )
+    vllm_http_server_result = response.json()
+    vllm_http_server_generated_token = vllm_http_server_result["choices"][0][
+        "logprobs"
+    ]["content"][0]
+    vllm_http_server_generated_token_id = int(
+        vllm_http_server_generated_token["token"].removeprefix("token_id:")
+    )
+
+    async for _, generate_result in vllm_generation.generate_async(
+        BatchedDataDict[GenerationDatumSpec](
+            {
+                "input_ids": torch.tensor([initial_tokenized_query_ids]),
+                "input_lengths": torch.tensor([len(initial_tokenized_query_ids)]),
+            }
+        )
+    ):
+        pass
+
+    generate_generated_token_id = generate_result["output_ids"][0][
+        len(initial_tokenized_query_ids)
+    ].item()
+
+    # We just check the first token here to check the alignment
+    assert vllm_http_server_generated_token_id == generate_generated_token_id
+
+    # Clean up
+    vllm_generation.shutdown()
+
+
 @pytest.mark.timeout(180)
 @pytest.mark.parametrize("tensor_parallel_size", [1, 2])
-@pytest.mark.parametrize("enable_dtensor", [True, False])
+@pytest.mark.parametrize("vllm_precision", ["bfloat16", "fp8"])
 def test_vllm_weight_update_and_prefix_cache_reset(
-    cluster, tokenizer, tensor_parallel_size, enable_dtensor
+    cluster, tokenizer, tensor_parallel_size, vllm_precision
 ):
     """Test that the vLLM prefix cache is correctly reset when weights change."""
+
+    if vllm_precision == "fp8":
+        major_capability, _ = torch.cuda.get_device_capability()
+        if major_capability < 9:
+            pytest.skip(
+                f"Skipping FP8 test. GPU compute capability {major_capability}.0 is < 9.0 (H100 required)."
+            )
+
     from nemo_rl.models.policy.lm_policy import Policy
 
     # Create configs
     vllm_config = deepcopy(basic_vllm_test_config)
     vllm_config = configure_generation_config(vllm_config, tokenizer, is_eval=True)
     vllm_config["vllm_cfg"]["tensor_parallel_size"] = tensor_parallel_size
+    vllm_config["vllm_cfg"]["precision"] = vllm_precision
+
     if tensor_parallel_size > 1:
         vllm_config["vllm_kwargs"] = {"distributed_executor_backend": "ray"}
 
@@ -982,11 +1598,11 @@ def test_vllm_weight_update_and_prefix_cache_reset(
         )
 
         print("Updating vLLM weights from HF policy...")
-        grouped_param_keys = lm_policy.prepare_weights_for_ipc()
-        for keys in grouped_param_keys:
-            ipc_handles = lm_policy.get_weights_ipc_handles(keys)
-            update_success = vllm_policy.update_weights_from_ipc_handles(ipc_handles)
-            assert update_success, "Weight update should succeed"
+
+        buffer_size_bytes = int(lm_policy.get_free_memory_bytes() * 0.3)
+        lm_policy.stream_weights_via_ipc_zmq(buffer_size_bytes=buffer_size_bytes)
+        update_success = vllm_policy.update_weights_via_ipc_zmq()
+        assert update_success, "Weight update should succeed"
         print("vLLM weights successfully updated.")
 
         print("Running Generation 2 (Weights Updated, Cache Still Active)...")
@@ -1024,8 +1640,7 @@ def test_vllm_weight_update_and_prefix_cache_reset(
         torch.cuda.empty_cache()
 
 
-@pytest.mark.parametrize("enable_dtensor", [True, False])
-def test_vllm_weight_update_memory(cluster, tokenizer, enable_dtensor):
+def test_vllm_weight_update_memory(cluster, tokenizer):
     """Test that vLLM streaming weight update and can save memory."""
     from nemo_rl.models.policy.lm_policy import Policy
 
@@ -1064,7 +1679,7 @@ def test_vllm_weight_update_memory(cluster, tokenizer, enable_dtensor):
         lm_policy,
         vllm_policy,
         vllm_config["colocated"]["enabled"],
-        _refit_buffer_size_gb=1,
+        _refit_buffer_size_gb=1.5,
     )
     gpu_infos = ray.get([w.get_gpu_info.remote() for w in workers])
 
@@ -1084,12 +1699,8 @@ def test_vllm_weight_update_memory(cluster, tokenizer, enable_dtensor):
     assert current_reserved == 0.0, "Memory should be 0 after refit completed"
     # memory threshold: memory during non-streaming weight update on 0.6B model on 2 GPUs
     # memory during streaming weight update should less than this baseline threshold
-    if enable_dtensor:
-        assert peak_allocated < 4005, "Peak allocated memory should < 4005 MB"
-        assert peak_reserved < 4016, "Peak reserved memory should < 4016 MB"
-    else:
-        assert peak_allocated < 5736, "Peak allocated memory should < 5736 MB"
-        assert peak_reserved < 5748, "Peak reserved memory should < 5748 MB"
+    assert peak_allocated < 4005, "Peak allocated memory should < 4005 MB"
+    assert peak_reserved < 4016, "Peak reserved memory should < 4016 MB"
 
     # Clean up
     vllm_policy.shutdown()
@@ -1097,10 +1708,7 @@ def test_vllm_weight_update_memory(cluster, tokenizer, enable_dtensor):
 
 
 @pytest.mark.parametrize("is_eval", [True, False])
-@pytest.mark.parametrize("enable_dtensor", [True, False])
-def test_vllm_generation_with_stop(
-    cluster, test_input_data, tokenizer, is_eval, enable_dtensor
-):
+def test_vllm_generation_with_stop(cluster, test_input_data, tokenizer, is_eval):
     """Test vLLM generation with stop."""
     from nemo_rl.models.policy.lm_policy import Policy
 
@@ -1200,12 +1808,14 @@ def test_vllm_non_divisible_batch_handling(policy):
 @pytest.mark.asyncio
 @pytest.mark.parametrize("async_engine", [True, False])
 @pytest.mark.parametrize("tensor_parallel_size", [1, 2])
-async def test_vllm_refit_non_collocated_update_weights(
+@pytest.mark.parametrize("policy_type", ["dtensor", "megatron"])
+async def test_vllm_refit_non_colocated_update_weights(
     policy_cluster_separate,
     tokenizer,
     test_input_data,
     async_engine,
     tensor_parallel_size,
+    policy_type,
 ):
     # Skip tensor_parallel_size == 2 until we have resources in CI
     if tensor_parallel_size == 2:
@@ -1223,23 +1833,47 @@ async def test_vllm_refit_non_collocated_update_weights(
             "Test requires at least two GPUs to run policies on separate clusters."
         )
 
-    # Create Policy on its own cluster
-    dtensor_config = deepcopy(basic_dtensor_test_config)
-    dtensor_config["generation"]["colocated"]["enabled"] = False
-    lm_policy = Policy(policy_cluster_separate, dtensor_config, tokenizer)
+    # Get policy config
+    if policy_type == "dtensor":
+        lm_config = deepcopy(basic_dtensor_test_config)
+    else:
+        assert policy_type == "megatron"
+        lm_config = get_basic_megatron_test_config(tp=1, pp=1, precision="float32")
+    lm_config["generation"]["colocated"]["enabled"] = False
 
-    # Create VllmGeneration policy on its own cluster
+    # Get vllm config
     vllm_config = deepcopy(basic_vllm_test_config)
     vllm_config = configure_generation_config(vllm_config, tokenizer, is_eval=True)
     vllm_config["vllm_cfg"]["async_engine"] = async_engine
     vllm_config["vllm_cfg"]["tensor_parallel_size"] = tensor_parallel_size
     vllm_config["colocated"]["enabled"] = False
+
+    # Megatron config with Qwen2.5-0.5B
+    if policy_type == "megatron":
+        model_name = "Qwen/Qwen2.5-0.5B"
+        tokenizer = get_tokenizer({"name": model_name})
+
+        lm_config["model_name"] = model_name
+        lm_config["tokenizer"]["name"] = model_name
+
+        vllm_config["model_name"] = model_name
+        vllm_config["tokenizer"]["name"] = model_name
+
+    # Create Policy and VllmGeneration
+    lm_policy = Policy(policy_cluster_separate, lm_config, tokenizer)
     vllm_generation = VllmGeneration(generation_cluster_separate, vllm_config)
 
     # initialize collective communication for update weights
-    ip, port = ray.get(_get_node_ip_and_free_port.remote())
-    futures_train = lm_policy.init_collective(ip, port, world_size=2)
-    futures_inference = vllm_generation.init_collective(ip, port, world_size=2)
+    ip, port = policy_cluster_separate.get_master_address_and_port()
+    train_world_size = policy_cluster_separate.world_size()
+    inference_world_size = generation_cluster_separate.world_size()
+    world_size = train_world_size + inference_world_size
+    futures_train = lm_policy.init_collective(
+        ip, port, world_size=world_size, train_world_size=train_world_size
+    )
+    futures_inference = vllm_generation.init_collective(
+        ip, port, world_size=world_size, train_world_size=train_world_size
+    )
     ray.get(futures_train + futures_inference)
 
     # prepare refit info
@@ -1247,9 +1881,7 @@ async def test_vllm_refit_non_collocated_update_weights(
     vllm_generation.prepare_refit_info(state_dict_info)
 
     print("refitting vllm policy...")
-    refit_policy_generation(
-        lm_policy, vllm_generation, vllm_config["colocated"]["enabled"]
-    )
+    refit_policy_generation(lm_policy, vllm_generation, False)
 
     # test generate
     if async_engine:
@@ -1258,12 +1890,23 @@ async def test_vllm_refit_non_collocated_update_weights(
         )
     else:
         outputs = vllm_generation.generate(test_input_data, greedy=True)
+
     output_ids = outputs["output_ids"]
     generated_texts = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
-    assert generated_texts == [
-        "Hello, my name is Lina. I'm",
-        "The capital of France is Paris. The capital of",
-    ], "Output should be the same as the expected output"
+
+    if policy_type == "dtensor":
+        expected_texts = [
+            "Hello, my name is Lina. I'm",
+            "The capital of France is Paris. The capital of",
+        ]
+    else:
+        expected_texts = [
+            "Hello, my name is Kaitlin and I",
+            "The capital of France is Paris. It is the",
+        ]
+    assert generated_texts == expected_texts, (
+        "Output should be the same as the expected output"
+    )
 
     # Clean up
     vllm_generation.shutdown()
@@ -1274,16 +1917,25 @@ async def test_vllm_refit_non_collocated_update_weights(
         print(f"Error during generation_cluster_separate shutdown: {e}")
 
 
-@pytest.mark.timeout(210)
+@pytest.mark.timeout(360)
 @pytest.mark.parametrize("tensor_parallel_size", [1, 2])
+@pytest.mark.parametrize("vllm_precision", ["bfloat16", "fp8"])
 def test_vllm_generation_with_megatron_training(
-    cluster, tokenizer, tensor_parallel_size
+    cluster, tokenizer, tensor_parallel_size, vllm_precision
 ):
     """Test that uses vLLM for generation and Megatron policy for training and logprob computation.
 
     This test validates that vLLM and Megatron policies can work together.
     """
 
+    # Skip the fp8 tests if the GPU is not H100 or newer (compute capability < 9.0)
+    if vllm_precision == "fp8":
+        major_capability, _ = torch.cuda.get_device_capability()
+        if major_capability < 9:
+            pytest.skip(
+                f"Skipping FP8 test. GPU compute capability {major_capability}.0 is < 9.0 (H100 required)."
+            )
+
     if cluster.num_gpus_per_node < tensor_parallel_size:
         pytest.skip(f"Need at least {tensor_parallel_size} GPUs for this test")
 
@@ -1298,6 +1950,7 @@ def test_vllm_generation_with_megatron_training(
     vllm_config["model_name"] = model_name
     vllm_config["tokenizer"]["name"] = model_name
     vllm_config["vllm_cfg"]["async_engine"] = False
+    vllm_config["vllm_cfg"]["precision"] = vllm_precision
     vllm_config = configure_generation_config(vllm_config, test_tokenizer)
 
     # Megatron config with same model
@@ -1455,7 +2108,9 @@ def test_vllm_megatron_weight_update_memory(cluster, tokenizer):
     )
 
     # Megatron config with same model
-    megatron_config = get_basic_megatron_test_config(tp=1, pp=1, precision="float32")
+    megatron_config = get_basic_megatron_test_config(
+        tp=1, pp=1, precision="float32", empty_unused_memory_level=1
+    )
     megatron_config["model_name"] = model_name
     megatron_config["tokenizer"]["name"] = model_name
 
@@ -1482,7 +2137,7 @@ def test_vllm_megatron_weight_update_memory(cluster, tokenizer):
         megatron_policy,
         vllm_policy,
         vllm_config["colocated"]["enabled"],
-        _refit_buffer_size_gb=1,
+        _refit_buffer_size_gb=1.5,
     )
 
     gpu_infos = ray.get([w.get_gpu_info.remote() for w in workers])
diff --git a/tests/unit/models/generation/test_vllm_large_model.py b/tests/unit/models/generation/test_vllm_large_model.py
index 7b93ef46d1..89eaece234 100644
--- a/tests/unit/models/generation/test_vllm_large_model.py
+++ b/tests/unit/models/generation/test_vllm_large_model.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import os
 from copy import deepcopy
 
 import pytest
@@ -45,6 +44,7 @@
         "precision": "bfloat16",
         "tensor_parallel_size": 8,
         "pipeline_parallel_size": 2,
+        "expert_parallel_size": 1,
         "gpu_memory_utilization": 0.7,
         "max_model_len": 1024,
         "async_engine": True,
@@ -63,14 +63,6 @@
 }
 
 
-@pytest.fixture(scope="module", autouse=True)
-def skip_tied_weight_check():
-    """Automatically skip tied weight check for all tests in this module."""
-    os.environ["NRL_SKIP_TIED_WEIGHT_CHECK"] = "1"
-    yield
-    os.environ.pop("NRL_SKIP_TIED_WEIGHT_CHECK", None)
-
-
 @pytest.fixture(scope="function")
 def two_node_cluster():
     """Create a virtual cluster with 2 nodes for testing large models."""
@@ -176,7 +168,7 @@ async def test_vllm_large_model(
 
         # Extract in correct order
         outputs = [item for _, item in collected_indexed_outputs]
-        pad_token_id = async_policy.cfg.get("pad_token_id", tokenizer.pad_token_id)
+        pad_token_id = async_policy.cfg.get("_pad_token_id", tokenizer.pad_token_id)
         outputs = BatchedDataDict.from_batches(
             outputs,
             pad_value_dict={"output_ids": pad_token_id, "logprobs": 0.0},
diff --git a/tests/unit/models/generation/test_vllm_logprobs_mode.py b/tests/unit/models/generation/test_vllm_logprobs_mode.py
new file mode 100644
index 0000000000..ccc7fe8112
--- /dev/null
+++ b/tests/unit/models/generation/test_vllm_logprobs_mode.py
@@ -0,0 +1,289 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Test vLLM logprobs_mode functionality to verify processed_logprobs matches expectations."""
+
+import pytest
+import torch
+
+
+@pytest.mark.vllm
+def test_processed_logprobs_matches_manual_computation():
+    """Test that processed_logprobs mode matches manual computation from HF ground truth.
+
+    Mathematical Framework:
+    =======================
+    For a full vocabulary V with logits x_i, after temperature τ and top_k/top_p filtering:
+
+        processed_logprob(x_i) = log_softmax(filter(x_i/τ))
+
+    Where filter applies top_k and top_p masking to create filtered set F ⊆ V:
+        log_softmax_filtered(x_i) = x_i/τ - log(Σ_{j∈F} exp(x_j/τ))
+
+    Test Strategy:
+    ==============
+    1. Generate 3 tokens with vLLM (processed_logprobs mode, float32)
+       → Get sampled token IDs and vLLM's processed logprobs
+
+    2. Load HuggingFace model (float32) and run single forward pass
+       → Get ground truth logits for all 3 tokens from full vocabulary
+
+    3. Manually compute for each token:
+       - Apply temperature scaling: x_i/τ
+       - Apply top_k/top_p filtering: apply_top_k_top_p(x_i/τ, k, p)
+       - Compute log_softmax over filtered tokens
+
+    4. Validate: assert vLLM logprobs ≈ manual logprobs using torch.testing.assert_close
+
+    Notes:
+    ===============
+    - HF model provides FULL vocabulary logits (no missing probability mass)
+        - Tried using raw_logits, but get hangs if SamplingParams.logprobs is too large (or -1)
+    - Both vLLM and HF use float32 for consistent numerical precision
+    - Validates our apply_top_k_top_p implementation matches vLLM exactly
+
+    Note: Run with: uv run --extra vllm --group test pytest ... --vllm-only
+    """
+
+    from transformers import AutoModelForCausalLM, AutoTokenizer
+    from vllm import LLM, SamplingParams
+
+    from nemo_rl.models.policy.utils import apply_top_k_top_p
+
+    # Use a small model for fast testing
+    model_name = "facebook/opt-125m"
+
+    # Sampling parameters (mathematical notation in docstring):
+    # τ (tau) = temperature, k = top_k, p = top_p
+    temperature = 1.5  # τ: temperature scaling factor
+    num_logprobs = 500  # N: get top-500 logprobs from vLLM (for validation)
+    top_k = 500  # k: top_k sampling parameter
+    top_p = 0.9  # p: nucleus sampling threshold
+    seed = 42  # Deterministic seed for reproducibility
+    num_tokens = 3  # Generate 3 tokens for testing
+
+    # New approach: Use HuggingFace to get ground truth logits, then compare against
+    # vLLM's processed_logprobs. This avoids:
+    # - O(n) hang with large logprobs
+    # - Needing 2 vLLM instances
+    # - Token set mismatch issues
+
+    # Common parameters for both LLMs
+    llm_kwargs = {
+        "model": model_name,
+        "seed": seed,
+        "max_model_len": 128,
+        "gpu_memory_utilization": 0.3,
+        "enforce_eager": True,
+        "enable_prefix_caching": False,
+        "dtype": "float32",  # Use float32 for maximum precision
+    }
+
+    prompt = "The quick brown fox jumps over the"
+
+    # Step 1: Use vLLM to generate tokens and get which ones were sampled
+    print(
+        f"Step 1: Generating {num_tokens} tokens with vLLM (processed_logprobs mode)..."
+    )
+    llm_vllm = LLM(
+        **llm_kwargs, logprobs_mode="processed_logprobs", max_logprobs=num_logprobs
+    )
+
+    sampling_params = SamplingParams(
+        temperature=temperature,
+        top_k=top_k,
+        top_p=top_p,
+        max_tokens=num_tokens,
+        logprobs=num_logprobs,
+        seed=seed,
+    )
+
+    outputs_vllm = llm_vllm.generate([prompt], sampling_params=sampling_params)
+    del llm_vllm  # Free GPU memory
+
+    # Extract vLLM's processed logprobs and sampled token IDs
+    vllm_token_ids = []
+    vllm_logprobs = []
+
+    for output in outputs_vllm[0].outputs:
+        for sampled_token_id, logprob_dict in zip(output.token_ids, output.logprobs):
+            vllm_token_ids.append(sampled_token_id)
+            vllm_logprobs.append(logprob_dict[sampled_token_id].logprob)
+
+    print(f"vLLM sampled tokens: {vllm_token_ids}")
+    print(f"vLLM processed logprobs: {[f'{lp:.4f}' for lp in vllm_logprobs]}")
+
+    # Step 2: Use HuggingFace model to get ground truth logits and manually compute
+    print("\nStep 2: Loading HuggingFace model to get ground truth logits...")
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    hf_model = AutoModelForCausalLM.from_pretrained(
+        model_name,
+        torch_dtype=torch.float32,  # Use float32 for maximum precision
+    ).cuda()
+    hf_model.eval()
+
+    # Tokenize the prompt and append all generated tokens for a single forward pass
+    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.cuda()
+
+    # Construct full sequence: prompt + all 3 generated tokens
+    generated_ids = torch.tensor(
+        [vllm_token_ids], dtype=torch.long, device=input_ids.device
+    )
+    full_sequence = torch.cat([input_ids, generated_ids], dim=1)
+
+    print(f"Single forward pass with sequence length: {full_sequence.shape[1]}")
+
+    # Single forward pass to get all logits
+    with torch.no_grad():
+        outputs_hf = hf_model(full_sequence)
+        all_logits = outputs_hf.logits[0]  # Shape: [seq_len, vocab_size]
+
+    # Extract logits at positions where we need to predict each generated token
+    # For token i, we need logits at position (prompt_len + i - 1)
+    prompt_len = input_ids.shape[1]
+    expected_logprobs = []
+
+    for i, sampled_token_id in enumerate(vllm_token_ids):
+        # Get logits right before this token was generated
+        logits = all_logits[prompt_len + i - 1, :]  # Shape: [vocab_size]
+
+        # Manually compute processed logprobs following vLLM's processing pipeline:
+        # Step 1: Apply temperature scaling → x_i/τ
+        # All computations in float32 for maximum precision
+        scaled_logits = logits / temperature
+
+        # Step 2 & 3: Apply top_k and top_p filtering using vLLM's implementation
+        scaled_logits_batched = scaled_logits.unsqueeze(0).unsqueeze(
+            0
+        )  # [1, 1, vocab_size]
+        filtered_logits_batched = apply_top_k_top_p(
+            scaled_logits_batched, top_k=top_k, top_p=top_p
+        )
+        filtered_logits = filtered_logits_batched.squeeze(0).squeeze(0)  # [vocab_size]
+
+        # Step 4: Compute log_softmax over filtered tokens
+        manual_logprobs = torch.nn.functional.log_softmax(filtered_logits, dim=0)
+
+        # Get the logprob for the sampled token
+        expected_logprobs.append(manual_logprobs[sampled_token_id].item())
+
+    print(
+        f"HF model computed logprobs (float32): {[f'{lp:.4f}' for lp in expected_logprobs]}"
+    )
+
+    # Step 3: Compare vLLM's processed_logprobs against our HF-based manual computation
+    print("\nStep 3: Comparing logprobs...")
+    expected_logprobs_tensor = torch.tensor(expected_logprobs)
+    vllm_logprobs_tensor = torch.tensor(vllm_logprobs)
+
+    # Print individual comparisons
+    print("\nPer-token comparison:")
+    for i in range(num_tokens):
+        diff = abs(vllm_logprobs[i] - expected_logprobs[i])
+        print(
+            f"Token {i} (ID={vllm_token_ids[i]}): "
+            f"manual={expected_logprobs[i]:.6f}, "
+            f"vllm={vllm_logprobs[i]:.6f}, "
+            f"diff={diff:.6f}"
+        )
+
+    # Use torch.testing.assert_close to validate the match
+    print("\nValidating match with torch.testing.assert_close...")
+    torch.testing.assert_close(
+        vllm_logprobs_tensor,
+        expected_logprobs_tensor,
+    )
+
+    print("✓ Test passed: processed_logprobs match manual computation from HF model!")
+    print(f"  Tokens: {vllm_token_ids}")
+    print("  Validated with rtol=1e-3, atol=1e-2")
+
+
+@pytest.mark.vllm
+@pytest.mark.parametrize(
+    "top_k,top_p,test_name",
+    [
+        (100, 0.9, "top_k + top_p"),
+        (None, 0.9, "top_p only"),
+        (100, None, "top_k only"),
+        (None, None, "passthrough (no filtering)"),
+    ],
+)
+def test_apply_top_k_top_p_matches_vllm_upstream(top_k, top_p, test_name):
+    """Test that our apply_top_k_top_p implementation matches vLLM's upstream version.
+
+    This test directly compares our simplified scalar-parameter implementation in
+    nemo_rl.models.policy.utils against vLLM's batched tensor-parameter implementation.
+
+    Key differences in interfaces:
+    - Our version: scalar top_k/top_p, expects shape [batch, seq, vocab]
+    - vLLM version: tensor top_k/top_p (one per batch), expects shape [batch, vocab]
+
+    This test validates that for the same logits and parameters, both produce identical results.
+
+    Args:
+        top_k: Top-k value to test (or None)
+        top_p: Top-p value to test (or None)
+        test_name: Description of the test case
+    """
+    from vllm.v1.sample.ops.topk_topp_sampler import (
+        apply_top_k_top_p as vllm_apply_top_k_top_p,
+    )
+
+    from nemo_rl.models.policy.utils import apply_top_k_top_p
+
+    # Test configuration
+    batch_size = 4
+    seq_len = 2
+    vocab_size = 1000
+
+    # Generate synthetic logits (deterministic for reproducibility)
+    torch.manual_seed(42)
+    logits_3d = torch.randn(batch_size, seq_len, vocab_size, dtype=torch.float32)
+
+    print(f"Testing: {test_name}")
+
+    # Our implementation: expects [batch, seq, vocab], takes scalar k/p
+    our_result = apply_top_k_top_p(logits_3d.clone(), top_k=top_k, top_p=top_p)
+
+    # vLLM upstream: expects [batch, vocab], takes tensor k/p with shape [batch]
+    # Process each sequence position separately (vLLM doesn't batch over seq_len)
+    vllm_results = []
+    for seq_idx in range(seq_len):
+        logits_2d = logits_3d[:, seq_idx, :].clone()  # [batch, vocab]
+
+        # Convert scalar parameters to tensors for vLLM
+        k_tensor = (
+            None
+            if top_k is None
+            else torch.full((batch_size,), top_k, dtype=torch.long)
+        )
+        p_tensor = (
+            None
+            if top_p is None
+            else torch.full((batch_size,), top_p, dtype=torch.float32)
+        )
+
+        vllm_result = vllm_apply_top_k_top_p(logits_2d, k=k_tensor, p=p_tensor)
+        vllm_results.append(vllm_result)
+
+    vllm_result_3d = torch.stack(vllm_results, dim=1)  # [batch, seq, vocab]
+
+    # Compare results
+    torch.testing.assert_close(
+        our_result,
+        vllm_result_3d,
+        msg=f"Our apply_top_k_top_p doesn't match vLLM upstream ({test_name})",
+    )
+    print(f"✓ Results match for {test_name}")
diff --git a/tests/unit/models/generation/test_vllm_utils.py b/tests/unit/models/generation/test_vllm_utils.py
new file mode 100644
index 0000000000..4093b4c5ae
--- /dev/null
+++ b/tests/unit/models/generation/test_vllm_utils.py
@@ -0,0 +1,113 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+
+from nemo_rl.distributed.batched_data_dict import BatchedDataDict
+from nemo_rl.models.generation.vllm.utils import (
+    format_prompt_for_vllm_generation,
+)
+
+
+def _mk_inputs(batch_size: int = 2, seq_len: int = 5):
+    input_ids = torch.arange(batch_size * seq_len).view(batch_size, seq_len)
+    # make second example shorter
+    input_lengths = torch.tensor([seq_len, seq_len - 2])
+    return input_ids, input_lengths
+
+
+def test_vllm_utils_regular_llm_path():
+    input_ids, input_lengths = _mk_inputs()
+    data = BatchedDataDict(
+        {
+            "input_ids": input_ids,
+            "input_lengths": input_lengths,
+        }
+    )
+    prompts = format_prompt_for_vllm_generation(data)
+    assert isinstance(prompts, list) and len(prompts) == 2
+    # first has full length
+    assert prompts[0]["prompt_token_ids"] == input_ids[0].tolist()
+    # second trimmed by input_lengths
+    assert prompts[1]["prompt_token_ids"] == input_ids[1, : input_lengths[1]].tolist()
+
+
+def test_vllm_utils_vlm_with_images_and_text():
+    # Batch with two samples
+    # both have content; first has one image, second has two images
+    input_ids, input_lengths = _mk_inputs()
+    data = BatchedDataDict(
+        {
+            "input_ids": input_ids,
+            "input_lengths": input_lengths,
+            "vllm_content": ["<s>user: hi</s>", "<s>user: hello</s>"],
+            "vllm_images": [["img1"], ["img2a", "img2b"]],
+        }
+    )
+
+    prompts = format_prompt_for_vllm_generation(data)
+    assert len(prompts) == 2
+    assert prompts[0]["prompt"] == "<s>user: hi</s>"
+    assert prompts[0]["multi_modal_data"]["image"] == "img1"
+    assert prompts[1]["prompt"] == "<s>user: hello</s>"
+    assert prompts[1]["multi_modal_data"]["image"] == ["img2a", "img2b"]
+
+
+def test_vllm_utils_vlm_with_missing_images_fallback_to_tokens():
+    input_ids, input_lengths = _mk_inputs()
+    # images None triggers fallback
+    data_none = BatchedDataDict(
+        {
+            "input_ids": input_ids,
+            "input_lengths": input_lengths,
+            "vllm_content": ["a", "b"],
+            "vllm_images": None,
+        }
+    )
+    prompts = format_prompt_for_vllm_generation(data_none)
+    assert all("prompt_token_ids" in p for p in prompts)
+
+    # images empty per sample also triggers fallback
+    data_empty = BatchedDataDict(
+        {
+            "input_ids": input_ids,
+            "input_lengths": input_lengths,
+            "vllm_content": ["a", "b"],
+            "vllm_images": [[], []],
+        }
+    )
+    prompts = format_prompt_for_vllm_generation(data_empty)
+    assert all("prompt_token_ids" in p for p in prompts)
+
+
+def test_vllm_utils_vlm_with_none_content_fallback_to_tokens_and_sample_idx():
+    input_ids, input_lengths = _mk_inputs()
+    data = BatchedDataDict(
+        {
+            "input_ids": input_ids,
+            "input_lengths": input_lengths,
+            "vllm_content": [None, None],
+            "vllm_images": [["img"], ["img"]],
+        }
+    )
+    # even though images provided, None content should fallback to tokens
+    prompts_all = format_prompt_for_vllm_generation(data)
+    assert len(prompts_all) == 2
+    assert all("prompt_token_ids" in p for p in prompts_all)
+
+    # single-sample API
+    p0 = format_prompt_for_vllm_generation(data, sample_idx=0)
+    p1 = format_prompt_for_vllm_generation(data, sample_idx=1)
+    assert isinstance(p0, dict) and isinstance(p1, dict)
+    assert "prompt_token_ids" in p0 and "prompt_token_ids" in p1
diff --git a/tests/unit/models/generation/test_vllmasyncgenerationworker_replace_prefix_worker.json b/tests/unit/models/generation/test_vllmasyncgenerationworker_replace_prefix_worker.json
new file mode 100644
index 0000000000..89370edf9a
--- /dev/null
+++ b/tests/unit/models/generation/test_vllmasyncgenerationworker_replace_prefix_worker.json
@@ -0,0 +1,5 @@
+{
+    "og_model_token_ids": [151644, 8948, 198, 2, 13852, 271, 2610, 1231, 1618, 825, 476, 803, 5746, 311, 7789, 448, 279, 1196, 3239, 382, 2610, 525, 3897, 448, 729, 32628, 2878, 366, 15918, 1472, 15918, 29, 11874, 9492, 510, 27, 15918, 397, 4913, 1313, 788, 330, 1688, 497, 330, 1688, 788, 5212, 606, 788, 330, 88821, 497, 330, 4684, 788, 330, 47866, 264, 35972, 7493, 10465, 330, 13786, 788, 5212, 1313, 788, 330, 1700, 497, 330, 13193, 788, 5212, 9413, 788, 5212, 4684, 788, 330, 32, 2697, 13027, 7493, 10465, 330, 2102, 788, 330, 16041, 497, 330, 1313, 788, 330, 917, 9207, 2137, 330, 6279, 788, 4383, 9413, 7914, 330, 35499, 7903, 788, 895, 2137, 330, 6627, 788, 830, 11248, 4913, 1313, 788, 330, 1688, 497, 330, 1688, 788, 5212, 606, 788, 330, 5955, 28534, 497, 330, 4684, 788, 330, 8890, 279, 10981, 4226, 323, 1779, 421, 432, 374, 4396, 13, 1096, 1917, 374, 15022, 10465, 330, 13786, 788, 5212, 1313, 788, 330, 1700, 497, 330, 13193, 788, 5212, 9217, 788, 5212, 4684, 788, 330, 2008, 19586, 4226, 10465, 330, 2102, 788, 330, 16141, 497, 330, 1313, 788, 330, 917, 9207, 2137, 330, 6279, 788, 4383, 9217, 7914, 330, 35499, 7903, 788, 895, 2137, 330, 6627, 788, 830, 11248, 522, 15918, 1339, 2461, 1817, 729, 1618, 11, 470, 264, 2951, 1633, 448, 729, 829, 323, 5977, 2878, 220, 151657, 151658, 11874, 9492, 510, 151657, 198, 4913, 606, 788, 366, 1688, 11494, 8066, 330, 16370, 788, 366, 2116, 56080, 40432, 31296, 151658, 151645, 198, 151644, 872, 198, 38, 648, 2782, 23108, 458, 5461, 315, 220, 18, 16, 20, 8756, 369, 220, 18, 2849, 13, 73210, 23108, 264, 2790, 315, 220, 16, 11, 17, 20, 15, 8756, 916, 825, 2003, 13, 2585, 1657, 8756, 42626, 1521, 73210, 6541, 30, 151645, 198, 151644, 77091, 198, 5338, 11, 1077, 594, 1477, 700, 1246, 1657, 8756, 479, 648, 2782, 23108, 304, 2790, 916, 220, 18, 2849, 1447, 14085, 198, 59, 1318, 90, 7595, 8756, 16227, 553, 479, 648, 2782, 92, 284, 220, 18, 16, 20, 1124, 11, 1124, 1318, 90, 76, 3658, 44739, 92, 1124, 15136, 220, 18, 1124, 11, 1124, 1318, 90, 13778, 532, 14085, 271, 12209, 11, 582, 686, 9429, 419, 311, 279, 2790, 8756, 16227, 553, 73210, 11, 879, 23108, 220, 16, 11, 17, 20, 15, 8756, 304, 825, 2003, 382, 23949, 11, 582, 686, 11047, 1246, 1657, 8756, 42626, 73210, 23108, 553, 32256, 287, 479, 648, 2782, 594, 2790, 504, 73210, 594, 2790, 382, 10061, 752, 12564, 419, 3019, 553, 3019, 382, 151657, 198, 4913, 606, 788, 330, 88821, 497, 330, 16370, 788, 5212, 9413, 788, 330, 18, 16, 20, 353, 220, 18, 95642, 151658, 151645, 198, 151644, 872, 198, 151665, 198, 24, 19, 20, 198, 151666, 151645, 198, 151644, 77091, 198],
+    "model_token_ids": [151644, 8948, 198, 2, 13852, 271, 2610, 1231, 1618, 825, 476, 803, 5746, 311, 7789, 448, 279, 1196, 3239, 382, 2610, 525, 3897, 448, 729, 32628, 2878, 366, 15918, 1472, 15918, 29, 11874, 9492, 510, 27, 15918, 397, 4913, 1313, 788, 330, 1688, 497, 330, 1688, 788, 5212, 606, 788, 330, 88821, 497, 330, 4684, 788, 330, 47866, 264, 35972, 7493, 10465, 330, 13786, 788, 5212, 1313, 788, 330, 1700, 497, 330, 13193, 788, 5212, 9413, 788, 5212, 4684, 788, 330, 32, 2697, 13027, 7493, 10465, 330, 2102, 788, 330, 16041, 497, 330, 1313, 788, 330, 917, 9207, 2137, 330, 6279, 788, 4383, 9413, 7914, 330, 35499, 7903, 788, 895, 2137, 330, 6627, 788, 830, 11248, 4913, 1313, 788, 330, 1688, 497, 330, 1688, 788, 5212, 606, 788, 330, 5955, 28534, 497, 330, 4684, 788, 330, 8890, 279, 10981, 4226, 323, 1779, 421, 432, 374, 4396, 13, 1096, 1917, 374, 15022, 10465, 330, 13786, 788, 5212, 1313, 788, 330, 1700, 497, 330, 13193, 788, 5212, 9217, 788, 5212, 4684, 788, 330, 2008, 19586, 4226, 10465, 330, 2102, 788, 330, 16141, 497, 330, 1313, 788, 330, 917, 9207, 2137, 330, 6279, 788, 4383, 9217, 7914, 330, 35499, 7903, 788, 895, 2137, 330, 6627, 788, 830, 11248, 522, 15918, 1339, 2461, 1817, 729, 1618, 11, 470, 264, 2951, 1633, 448, 729, 829, 323, 5977, 2878, 220, 151657, 151658, 11874, 9492, 510, 151657, 198, 4913, 606, 788, 366, 1688, 11494, 8066, 330, 16370, 788, 366, 2116, 56080, 40432, 31296, 151658, 151645, 198, 151644, 872, 198, 38, 648, 2782, 23108, 458, 5461, 315, 220, 18, 16, 20, 8756, 369, 220, 18, 2849, 13, 73210, 23108, 264, 2790, 315, 220, 16, 11, 17, 20, 15, 8756, 916, 825, 2003, 13, 2585, 1657, 8756, 42626, 1521, 73210, 6541, 30, 151645, 198, 151644, 77091, 198, 5338, 11, 1077, 594, 1477, 700, 1246, 1657, 8756, 479, 648, 2782, 23108, 304, 2790, 916, 220, 18, 2849, 1447, 14085, 198, 59, 1318, 90, 7595, 8756, 16227, 553, 479, 648, 2782, 92, 284, 220, 18, 16, 20, 1124, 11, 1124, 1318, 90, 76, 3658, 44739, 92, 1124, 15136, 220, 18, 1124, 11, 1124, 1318, 90, 13778, 532, 14085, 271, 12209, 11, 582, 686, 9429, 419, 311, 279, 2790, 8756, 16227, 553, 73210, 11, 879, 23108, 220, 16, 11, 17, 20, 15, 8756, 304, 825, 2003, 382, 23949, 11, 582, 686, 11047, 1246, 1657, 8756, 42626, 73210, 23108, 553, 32256, 287, 479, 648, 2782, 594, 2790, 504, 73210, 594, 2790, 382, 10061, 752, 12564, 419, 3019, 553, 3019, 624, 151657, 198, 4913, 606, 788, 330, 88821, 497, 330, 16370, 788, 5212, 9413, 788, 330, 18, 16, 20, 353, 220, 18, 95642, 151658, 151645, 198, 151644, 872, 198, 151665, 198, 24, 19, 20, 198, 151666, 151645, 198, 151644, 77091, 198],
+    "template_token_ids": [151644, 8948, 198, 2, 13852, 271, 2610, 1231, 1618, 825, 476, 803, 5746, 311, 7789, 448, 279, 1196, 3239, 382, 2610, 525, 3897, 448, 729, 32628, 2878, 366, 15918, 1472, 15918, 29, 11874, 9492, 510, 27, 15918, 397, 4913, 1313, 788, 330, 1688, 497, 330, 1688, 788, 5212, 606, 788, 330, 88821, 497, 330, 4684, 788, 330, 47866, 264, 35972, 7493, 10465, 330, 13786, 788, 5212, 1313, 788, 330, 1700, 497, 330, 13193, 788, 5212, 9413, 788, 5212, 4684, 788, 330, 32, 2697, 13027, 7493, 10465, 330, 2102, 788, 330, 16041, 497, 330, 1313, 788, 330, 917, 9207, 2137, 330, 6279, 788, 4383, 9413, 7914, 330, 35499, 7903, 788, 895, 2137, 330, 6627, 788, 830, 11248, 4913, 1313, 788, 330, 1688, 497, 330, 1688, 788, 5212, 606, 788, 330, 5955, 28534, 497, 330, 4684, 788, 330, 8890, 279, 10981, 4226, 323, 1779, 421, 432, 374, 4396, 13, 1096, 1917, 374, 15022, 10465, 330, 13786, 788, 5212, 1313, 788, 330, 1700, 497, 330, 13193, 788, 5212, 9217, 788, 5212, 4684, 788, 330, 2008, 19586, 4226, 10465, 330, 2102, 788, 330, 16141, 497, 330, 1313, 788, 330, 917, 9207, 2137, 330, 6279, 788, 4383, 9217, 7914, 330, 35499, 7903, 788, 895, 2137, 330, 6627, 788, 830, 11248, 522, 15918, 1339, 2461, 1817, 729, 1618, 11, 470, 264, 2951, 1633, 448, 729, 829, 323, 5977, 2878, 220, 151657, 151658, 11874, 9492, 510, 151657, 198, 4913, 606, 788, 366, 1688, 11494, 8066, 330, 16370, 788, 366, 2116, 56080, 40432, 31296, 151658, 151645, 198, 151644, 872, 198, 38, 648, 2782, 23108, 458, 5461, 315, 220, 18, 16, 20, 8756, 369, 220, 18, 2849, 13, 73210, 23108, 264, 2790, 315, 220, 16, 11, 17, 20, 15, 8756, 916, 825, 2003, 13, 2585, 1657, 8756, 42626, 1521, 73210, 6541, 30, 151645, 198, 151644, 77091, 198, 5338, 11, 1077, 594, 1477, 700, 1246, 1657, 8756, 479, 648, 2782, 23108, 304, 2790, 916, 220, 18, 2849, 1447, 14085, 198, 59, 1318, 90, 7595, 8756, 16227, 553, 479, 648, 2782, 92, 284, 220, 18, 16, 20, 1124, 11, 1124, 1318, 90, 76, 3658, 44739, 92, 1124, 15136, 220, 18, 1124, 11, 1124, 1318, 90, 13778, 532, 14085, 271, 12209, 11, 582, 686, 9429, 419, 311, 279, 2790, 8756, 16227, 553, 73210, 11, 879, 23108, 220, 16, 11, 17, 20, 15, 8756, 304, 825, 2003, 382, 23949, 11, 582, 686, 11047, 1246, 1657, 8756, 42626, 73210, 23108, 553, 32256, 287, 479, 648, 2782, 594, 2790, 504, 73210, 594, 2790, 382, 10061, 752, 12564, 419, 3019, 553, 3019, 382, 151657, 198, 4913, 606, 788, 330, 88821, 497, 330, 16370, 788, 5212, 9413, 788, 330, 18, 16, 20, 353, 220, 18, 95642, 151658, 151645, 198, 151644, 872, 198, 151665, 198, 24, 19, 20, 198, 151666, 151645, 198, 151644, 77091, 198]
+}
diff --git a/tests/unit/models/huggingface/test_common.py b/tests/unit/models/huggingface/test_common.py
index 95da64b0b4..e1f7b948aa 100644
--- a/tests/unit/models/huggingface/test_common.py
+++ b/tests/unit/models/huggingface/test_common.py
@@ -39,7 +39,6 @@
 )
 def test_gemma_models(model_name):
     assert is_gemma_model(model_name)
-    assert ModelFlag.SKIP_DTENSOR_TIED_WEIGHTS_CHECK.matches(model_name)
     assert ModelFlag.VLLM_LOAD_FORMAT_AUTO.matches(model_name)
 
 
@@ -54,5 +53,4 @@ def test_gemma_models(model_name):
 )
 def test_non_gemma_models(model_name):
     assert not is_gemma_model(model_name)
-    assert not ModelFlag.SKIP_DTENSOR_TIED_WEIGHTS_CHECK.matches(model_name)
     assert not ModelFlag.VLLM_LOAD_FORMAT_AUTO.matches(model_name)
diff --git a/tests/unit/models/huggingface/test_smolvlm_embeddings_bug.py b/tests/unit/models/huggingface/test_smolvlm_embeddings_bug.py
new file mode 100644
index 0000000000..c1e4e927b9
--- /dev/null
+++ b/tests/unit/models/huggingface/test_smolvlm_embeddings_bug.py
@@ -0,0 +1,173 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+from torch import nn
+from transformers import AutoModelForImageTextToText, AutoProcessor
+
+
+class SmolVLMVisionEmbeddingsReference(nn.Module):
+    """
+    Previous (correct) implementation in transformers<=4.54.1. Copied from https://github.com/huggingface/transformers/blob/4.54.1/src/transformers/models/smolvlm/modeling_smolvlm.py#L101-L156
+
+    Remove this test once upstream bug is fixed.
+    """
+
+    def __init__(self, config):
+        super().__init__()
+        self.embed_dim = config.hidden_size
+        self.image_size = config.image_size
+        self.patch_size = config.patch_size
+
+        self.patch_embedding = nn.Conv2d(
+            in_channels=config.num_channels,
+            out_channels=self.embed_dim,
+            kernel_size=self.patch_size,
+            stride=self.patch_size,
+            padding="valid",
+        )
+
+        self.num_patches_per_side = self.image_size // self.patch_size
+        self.num_patches = self.num_patches_per_side**2
+        self.num_positions = self.num_patches
+        self.position_embedding = nn.Embedding(self.num_positions, self.embed_dim)
+
+    def forward(
+        self, pixel_values: torch.FloatTensor, patch_attention_mask: torch.BoolTensor
+    ) -> torch.Tensor:
+        batch_size, _, max_im_h, max_im_w = pixel_values.shape
+
+        patch_embeds = self.patch_embedding(pixel_values)
+        embeddings = patch_embeds.flatten(2).transpose(1, 2)
+
+        max_nb_patches_h, max_nb_patches_w = (
+            max_im_h // self.patch_size,
+            max_im_w // self.patch_size,
+        )
+        boundaries = torch.arange(
+            1 / self.num_patches_per_side, 1.0, 1 / self.num_patches_per_side
+        )
+        position_ids = torch.full(
+            size=(batch_size, max_nb_patches_h * max_nb_patches_w), fill_value=0
+        )
+
+        for batch_idx, p_attn_mask in enumerate(patch_attention_mask):
+            nb_patches_h = p_attn_mask[:, 0].sum()
+            nb_patches_w = p_attn_mask[0].sum()
+
+            fractional_coords_h = torch.arange(0, 1 - 1e-6, 1 / nb_patches_h)
+            fractional_coords_w = torch.arange(0, 1 - 1e-6, 1 / nb_patches_w)
+
+            bucket_coords_h = torch.bucketize(
+                fractional_coords_h, boundaries, right=True
+            )
+            bucket_coords_w = torch.bucketize(
+                fractional_coords_w, boundaries, right=True
+            )
+
+            pos_ids = (
+                bucket_coords_h[:, None] * self.num_patches_per_side + bucket_coords_w
+            ).flatten()
+            position_ids[batch_idx][p_attn_mask.view(-1).cpu()] = pos_ids
+
+        position_ids = position_ids.to(self.position_embedding.weight.device)
+        embeddings = embeddings + self.position_embedding(position_ids)
+        return embeddings
+
+
+def test_smolvlm_embeddings_differ_from_reference():
+    # Remove once https://github.com/huggingface/transformers/issues/41190 is fixed and adopted.
+
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+
+    model_path = "HuggingFaceTB/SmolVLM2-2.2B-Instruct"
+    processor = AutoProcessor.from_pretrained(model_path)
+    model = AutoModelForImageTextToText.from_pretrained(
+        model_path, torch_dtype=torch.bfloat16
+    )
+    model = model.to(device)
+
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "image",
+                    "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/bee.jpg",
+                },
+                {"type": "text", "text": "Can you describe this image?"},
+            ],
+        }
+    ]
+
+    inputs = processor.apply_chat_template(
+        messages,
+        add_generation_prompt=True,
+        tokenize=True,
+        return_dict=True,
+        return_tensors="pt",
+    )
+    inputs = {
+        k: v.to(device) if isinstance(v, torch.Tensor) else v for k, v in inputs.items()
+    }
+    inputs = {
+        k: v.to(dtype=torch.bfloat16)
+        if isinstance(v, torch.Tensor) and v.is_floating_point()
+        else v
+        for k, v in inputs.items()
+    }
+
+    patch_size = model.model.vision_model.patch_size
+    pixel_values = inputs["pixel_values"]  # (bsz, num_images, 3, H, W)
+    bsz, num_images, _, H, W = pixel_values.shape
+    pixel_values = pixel_values.view(bsz * num_images, *pixel_values.shape[2:])
+
+    patch_attention_mask = torch.ones(
+        (
+            bsz,
+            pixel_values.size(2) // patch_size,
+            pixel_values.size(3) // patch_size,
+        ),
+        device=pixel_values.device,
+        dtype=torch.bool,
+    )
+
+    # Get buggy/current embeddings module from installed transformers
+    embeddings_buggy = model.model.vision_model.embeddings
+
+    with torch.no_grad():
+        out_buggy = embeddings_buggy(
+            pixel_values=pixel_values, patch_attention_mask=patch_attention_mask
+        )
+
+    # Build reference embeddings and copy weights for apples-to-apples comparison
+    ref = SmolVLMVisionEmbeddingsReference(model.model.vision_model.config)
+    ref = ref.to(device=device, dtype=torch.bfloat16)
+
+    # Copy the conv and embedding weights
+    ref.patch_embedding.load_state_dict(embeddings_buggy.patch_embedding.state_dict())
+    ref.position_embedding.load_state_dict(
+        embeddings_buggy.position_embedding.state_dict()
+    )
+
+    with torch.no_grad():
+        out_ref = ref(
+            pixel_values=pixel_values, patch_attention_mask=patch_attention_mask
+        )
+
+    # Assert outputs differ due to the upstream bug
+    are_equal = torch.allclose(out_buggy.float(), out_ref.float(), atol=0, rtol=0)
+    assert not are_equal, (
+        "If this fails, that means the upstream bug has been fixed. You can close this issue: https://github.com/huggingface/transformers/issues/41190"
+    )
diff --git a/tests/unit/models/megatron/converters/test_qwen_conversion.py b/tests/unit/models/megatron/converters/test_qwen_conversion.py
deleted file mode 100644
index 2debebaee0..0000000000
--- a/tests/unit/models/megatron/converters/test_qwen_conversion.py
+++ /dev/null
@@ -1,284 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import socket
-from contextlib import contextmanager
-from tempfile import TemporaryDirectory
-
-import pytest
-import torch
-import torch.distributed as dist
-from transformers import AutoConfig, AutoModelForCausalLM
-
-
-@contextmanager
-def temporary_distributed_context():
-    if "MASTER_ADDR" in os.environ and "MASTER_PORT" in os.environ:
-        init_method = None
-    else:
-        # Find an available port dynamically
-        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
-            s.bind(("localhost", 0))
-            addr, port = s.getsockname()
-
-        init_method = f"tcp://{addr}:{port}"
-
-    dist.init_process_group(
-        backend="gloo", init_method=init_method, world_size=1, rank=0
-    )
-
-    from megatron.core import parallel_state
-
-    parallel_state.initialize_model_parallel()
-
-    from megatron.core.tensor_parallel.random import model_parallel_cuda_manual_seed
-
-    model_parallel_cuda_manual_seed(42)
-
-    try:
-        yield
-    finally:
-        parallel_state.destroy_model_parallel()
-        dist.destroy_process_group()
-
-
-def dummy_qwen3_megatron_moe_config():
-    from nemo.collections.llm.gpt.model.qwen3 import Qwen3MoEConfig
-
-    return Qwen3MoEConfig(
-        num_layers=2,
-        hidden_size=64,
-        num_attention_heads=4,
-        num_query_groups=2,
-        ffn_hidden_size=128,
-        moe_ffn_hidden_size=32,
-        num_moe_experts=2,
-        share_embeddings_and_output_weights=True,
-        kv_channels=16,
-    )
-
-
-def dummy_qwen3_megatron_dense_config():
-    from nemo.collections.llm.gpt.model.qwen3 import Qwen3Config
-
-    return Qwen3Config(
-        num_layers=2,
-        hidden_size=64,
-        num_attention_heads=4,
-        num_query_groups=2,
-        ffn_hidden_size=128,
-        share_embeddings_and_output_weights=False,
-        kv_channels=16,
-    )
-
-
-def create_dummy_hf_moe_config():
-    """Create a dummy HF MoE config and save it to a temporary directory."""
-    # Create a minimal HF config that matches the megatron config
-    hf_config = AutoConfig.from_pretrained("Qwen/Qwen3-30B-A3B", trust_remote_code=True)
-
-    # Update config to match our dummy megatron config
-    hf_config.num_hidden_layers = 2
-    hf_config.hidden_size = 64
-    hf_config.num_attention_heads = 4
-    hf_config.num_key_value_heads = 2
-    hf_config.intermediate_size = 128
-    hf_config.moe_intermediate_size = 32
-    hf_config.num_experts = 2
-    hf_config.tie_word_embeddings = True
-    hf_config.head_dim = 16
-
-    return hf_config
-
-
-def create_dummy_hf_dense_config():
-    """Create a dummy HF dense config and save it to a temporary directory."""
-    # Create a minimal HF config that matches the megatron config
-    hf_config = AutoConfig.from_pretrained("Qwen/Qwen3-4B", trust_remote_code=True)
-
-    # Update config to match our dummy megatron config
-    hf_config.num_hidden_layers = 2
-    hf_config.hidden_size = 64
-    hf_config.num_attention_heads = 4
-    hf_config.num_key_value_heads = 2
-    hf_config.intermediate_size = 128
-    hf_config.tie_word_embeddings = False
-    hf_config.head_dim = 16
-
-    return hf_config
-
-
-def create_model_and_converter(megatron_config, hf_config, model_name):
-    """Create megatron model and converter for testing."""
-
-    from nemo.collections.llm.gpt.model.qwen3 import Qwen3Model
-
-    from nemo_rl.models.megatron.converters.common import MegatronToHFConverter
-
-    # Create megatron model
-    model = Qwen3Model(megatron_config)
-    model.configure_model()
-
-    # Create dummy HF config and save to temporary directory
-    with TemporaryDirectory() as tmp_dir:
-        hf_dir = os.path.join(tmp_dir, model_name)
-        hf_config.save_pretrained(hf_dir)
-
-        # Create a dummy HF model to get the model class
-        dummy_model = AutoModelForCausalLM.from_config(
-            hf_config, trust_remote_code=True
-        )
-        dummy_model.save_pretrained(hf_dir)
-
-        original_state_dict = model.module.state_dict()
-
-        converter = MegatronToHFConverter(
-            hf_model_name=hf_dir,
-            megatron_model=model.module,
-        )
-
-        converted_state_dict = converter.convert(original_state_dict, model.config)
-
-        # Filter out _extra_state keys
-        original_state_dict = {
-            k: v for k, v in original_state_dict.items() if "_extra_state" not in k
-        }
-
-        return original_state_dict, converted_state_dict, hf_config, model
-
-
-def calculate_chunk_sizes(hf_config):
-    """Calculate chunk sizes for QKV tensor splitting."""
-    q_chunk_size = hf_config.head_dim * (
-        hf_config.num_attention_heads // hf_config.num_key_value_heads
-    )
-    kv_chunk_size = hf_config.head_dim * 2
-    return q_chunk_size, kv_chunk_size
-
-
-def assert_attention_tensors_match(
-    original_state_dict, converted_state_dict, q_chunk_size, kv_chunk_size
-):
-    """Assert that attention tensors match between original and converted state dicts."""
-    # Check q_layernorm
-    torch.testing.assert_close(
-        original_state_dict["decoder.layers.0.self_attention.q_layernorm.weight"],
-        converted_state_dict["model.layers.0.self_attn.q_norm.weight"],
-    )
-
-    # Check first layer q_proj
-    torch.testing.assert_close(
-        original_state_dict["decoder.layers.0.self_attention.linear_qkv.weight"][
-            :q_chunk_size
-        ],
-        converted_state_dict["model.layers.0.self_attn.q_proj.weight"][:q_chunk_size],
-    )
-
-    # Check second layer q_proj
-    torch.testing.assert_close(
-        original_state_dict["decoder.layers.1.self_attention.linear_qkv.weight"][
-            (q_chunk_size + kv_chunk_size) : (2 * q_chunk_size + kv_chunk_size)
-        ],
-        converted_state_dict["model.layers.1.self_attn.q_proj.weight"][
-            q_chunk_size : (2 * q_chunk_size)
-        ],
-    )
-
-
-@pytest.mark.mcore
-def test_conversion_to_hf_moe():
-    """Test conversion of Qwen3 MoE model to HF format."""
-    with temporary_distributed_context():
-        mcore_config = dummy_qwen3_megatron_moe_config()
-        hf_config = create_dummy_hf_moe_config()
-
-        original_state_dict, converted_state_dict, hf_config, model = (
-            create_model_and_converter(mcore_config, hf_config, "Qwen3-tiny-test-moe")
-        )
-
-        # Check that the number of keys in the original state dict is equal to the number of keys in the converted state dict minus the number of extra state keys
-        # taking into account the qkv merging and the merging of the up and gate projections
-        assert len(original_state_dict) == len(converted_state_dict) - (
-            2 * hf_config.num_hidden_layers
-            + (hf_config.num_hidden_layers * hf_config.num_experts)
-        )
-
-        q_chunk_size, kv_chunk_size = calculate_chunk_sizes(hf_config)
-
-        # Check attention tensors
-        assert_attention_tensors_match(
-            original_state_dict, converted_state_dict, q_chunk_size, kv_chunk_size
-        )
-
-        # Check MoE MLP tensors
-        torch.testing.assert_close(
-            original_state_dict["decoder.layers.1.mlp.experts.linear_fc1.weight0"][
-                mcore_config.moe_ffn_hidden_size :
-            ],
-            converted_state_dict["model.layers.1.mlp.experts.0.up_proj.weight"],
-        )
-        torch.testing.assert_close(
-            original_state_dict["decoder.layers.1.mlp.experts.linear_fc1.weight0"][
-                : mcore_config.moe_ffn_hidden_size
-            ],
-            converted_state_dict["model.layers.1.mlp.experts.0.gate_proj.weight"],
-        )
-        torch.testing.assert_close(
-            original_state_dict["decoder.layers.0.mlp.experts.linear_fc2.weight1"],
-            converted_state_dict["model.layers.0.mlp.experts.1.down_proj.weight"],
-        )
-
-
-@pytest.mark.mcore
-def test_conversion_to_hf_dense():
-    """Test conversion of Qwen3 dense model to HF format."""
-    with temporary_distributed_context():
-        mcore_config = dummy_qwen3_megatron_dense_config()
-        hf_config = create_dummy_hf_dense_config()
-
-        original_state_dict, converted_state_dict, hf_config, model = (
-            create_model_and_converter(mcore_config, hf_config, "Qwen3-tiny-test-dense")
-        )
-
-        # Check that the number of keys in the original state dict is equal to the number of keys in the converted state dict minus the number of extra state keys
-        # taking into account the qkv merging and the merging of the up and gate projections
-        assert len(original_state_dict) == len(converted_state_dict) - (
-            3 * hf_config.num_hidden_layers
-        )
-
-        q_chunk_size, kv_chunk_size = calculate_chunk_sizes(hf_config)
-
-        # Check attention tensors
-        assert_attention_tensors_match(
-            original_state_dict, converted_state_dict, q_chunk_size, kv_chunk_size
-        )
-
-        # Check dense MLP tensors
-        torch.testing.assert_close(
-            original_state_dict["decoder.layers.1.mlp.linear_fc1.weight"][
-                mcore_config.ffn_hidden_size :
-            ],
-            converted_state_dict["model.layers.1.mlp.up_proj.weight"],
-        )
-        torch.testing.assert_close(
-            original_state_dict["decoder.layers.1.mlp.linear_fc1.weight"][
-                : mcore_config.ffn_hidden_size
-            ],
-            converted_state_dict["model.layers.1.mlp.gate_proj.weight"],
-        )
-        torch.testing.assert_close(
-            original_state_dict["decoder.layers.0.mlp.linear_fc2.weight"],
-            converted_state_dict["model.layers.0.mlp.down_proj.weight"],
-        )
diff --git a/tests/unit/models/policy/test_dtensor_worker.py b/tests/unit/models/policy/test_dtensor_worker.py
index 33a91c37eb..58cde59f30 100644
--- a/tests/unit/models/policy/test_dtensor_worker.py
+++ b/tests/unit/models/policy/test_dtensor_worker.py
@@ -11,16 +11,12 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import os
 import pprint
+import time
 
 import pytest
 import ray
 import torch
-
-# Define a custom marker for model configuration tests
-pytestmark = pytest.mark.modelconfig
-
 from transformers import AutoModelForCausalLM
 
 from nemo_rl.algorithms.interfaces import LossFunction
@@ -38,10 +34,11 @@ def create_test_config(
     model_name: str,
     tp: int = 1,
     cp: int = 1,
-    sequence_parallel: bool = False,
+    sp: bool = False,
     cpu_offload: bool = False,
     activation_checkpointing: bool = False,
     custom_parallel_plan: str | None = None,
+    dtensor_v2: bool = False,
 ) -> PolicyConfig:
     return {
         "model_name": model_name,
@@ -52,6 +49,7 @@ def create_test_config(
         "learning_rate": 5e-6,
         "logprob_batch_size": 1,
         "precision": "float32",
+        "offload_optimizer_for_logprob": False,
         "generation": {
             "backend": "hf",
             "temperature": 1.0,
@@ -69,9 +67,10 @@ def create_test_config(
             },
         },
         "dtensor_cfg": {
+            **({"_v2": dtensor_v2} if dtensor_v2 else {}),
             "enabled": True,
             "cpu_offload": cpu_offload,
-            "sequence_parallel": sequence_parallel,
+            "sequence_parallel": sp,
             "activation_checkpointing": activation_checkpointing,
             "tensor_parallel_size": tp,
             "context_parallel_size": cp,
@@ -107,17 +106,6 @@ def create_test_config(
     }
 
 
-@pytest.fixture(scope="module", autouse=True)
-def skip_tied_weight_check_for_all():
-    """Automatically skip tied weight check for all tests in this module."""
-    os.environ["NRL_SKIP_TIED_WEIGHT_CHECK"] = "1"
-
-    yield
-
-    # Restore the original value
-    os.environ.pop("NRL_SKIP_TIED_WEIGHT_CHECK", None)
-
-
 @pytest.fixture(scope="module")
 def two_gpu_virtual_cluster():
     cluster_name = "test"
@@ -144,9 +132,10 @@ def gc_collect():
 
 
 @pytest.fixture
-def policy_setup(two_gpu_virtual_cluster, tiny_llama_model_path):
+def policy_setup(request, two_gpu_virtual_cluster, tiny_llama_model_path):
     """Setup and teardown for policy tests - creates a virtual cluster and policy."""
-    config = create_test_config(tiny_llama_model_path)
+    use_v2 = request.param if hasattr(request, "param") else False
+    config = create_test_config(tiny_llama_model_path, dtensor_v2=use_v2)
     tokenizer = get_tokenizer(config["tokenizer"])
     config["generation"] = configure_generation_config(config["generation"], tokenizer)
 
@@ -160,7 +149,8 @@ def policy_setup(two_gpu_virtual_cluster, tiny_llama_model_path):
 
 
 @pytest.mark.hf_gated
-@pytest.mark.timeout(180)
+@pytest.mark.timeout(360)
+@pytest.mark.parametrize("policy_setup", [True, False], indirect=True)
 def test_lm_policy_init(policy_setup):
     policy = policy_setup
 
@@ -240,11 +230,30 @@ def test_lm_policy_init(policy_setup):
 @pytest.fixture
 def training_setup(request, two_gpu_virtual_cluster):
     """Setup and teardown specifically for training tests."""
+    # Get the use_v2 parameter from the test function
+    use_v2 = getattr(request.function, "pytestmark", [])
+    use_v2_value = False
+    for mark in use_v2:
+        if (
+            hasattr(mark, "args")
+            and len(mark.args) > 1
+            and "use_v2" in str(mark.args[0])
+        ):
+            for param_set in mark.args[1]:
+                if isinstance(param_set, bool):
+                    use_v2_value = param_set
+                    break
+
+    # If multiple parametrize decorators, we need to check the node id
+    if hasattr(request, "node") and hasattr(request.node, "callspec"):
+        if "use_v2" in request.node.callspec.params:
+            use_v2_value = request.node.callspec.params["use_v2"]
+
     (
         model_fixture_name,
         tp,
         cp,
-        sequence_parallel,
+        sp,
         cpu_offload,
         activation_checkpointing,
     ) = request.param
@@ -257,11 +266,17 @@ def training_setup(request, two_gpu_virtual_cluster):
 
     try:
         config = create_test_config(
-            model_name, tp, cp, sequence_parallel, cpu_offload, activation_checkpointing
+            model_name,
+            tp,
+            cp,
+            sp,
+            cpu_offload,
+            activation_checkpointing,
+            dtensor_v2=use_v2_value,
         )
         tokenizer = get_tokenizer(config["tokenizer"])
         print(
-            f"Creating training Policy with tp={tp}, cpu_offload={cpu_offload}, sequence_parallel={sequence_parallel}, activation_checkpointing={activation_checkpointing}..."
+            f"Creating training Policy with tp={tp}, cpu_offload={cpu_offload}, sequence_parallel={sp}, activation_checkpointing={activation_checkpointing}..."
         )
         policy = Policy(
             cluster=two_gpu_virtual_cluster,
@@ -308,7 +323,8 @@ def training_setup(request, two_gpu_virtual_cluster):
 
 
 @pytest.mark.hf_gated
-@pytest.mark.timeout(60)
+@pytest.mark.timeout(360)
+@pytest.mark.parametrize("use_v2", [True, False])
 @pytest.mark.parametrize(
     "training_setup",
     [
@@ -340,10 +356,17 @@ def training_setup(request, two_gpu_virtual_cluster):
         ("tiny_gemma3_model_path", 1, 1, False, True, True),
         ("tiny_gemma3_model_path", 1, 1, True, True, True),
         # CP doesn't support gemma3 due to spda input has attent_mask != None.
+        # Nemotron-H doesn't support SP https://github.com/NVIDIA-NeMo/RL/issues/881
+        # ("tiny_nemotron5_h_model_path", 1, 1, True, True, False),
+        # ("tiny_nemotron5_h_model_path", 1, 1, True, False, True),
+        # ("tiny_nemotron5_h_model_path", 1, 1, True, True, True),
+        ("tiny_nemotron5_h_model_path", 1, 1, False, False, False),
+        ("tiny_nemotron5_h_model_path", 1, 1, False, True, True),
+        # nemotron5_h doesn't support cp
     ],
     indirect=True,
 )
-def test_dtensor_worker_training(training_setup):
+def test_dtensor_worker_training(use_v2, training_setup):
     def verify_loss_tensor(loss_tensor):
         assert not torch.isnan(loss_tensor).any(), "Loss should not be NaN"
         assert not torch.isinf(loss_tensor).any(), "Loss should not be Inf"
@@ -377,15 +400,43 @@ def verify_loss_tensor(loss_tensor):
     # Verify loss changed between iterations (model parameters were updated)
     assert losses[0] > losses[-1], "Loss should decrease over training iterations"
 
+    # Verify the train function returns the performance metrics
+
+    if policy.flops_tracker is not None:
+        assert "total_flops" in results and isinstance(
+            results["total_flops"], (int, float)
+        ), "training backend should report total_flops"
+        assert results["total_flops"] > 0, "total_flops should be positive"
+        assert "num_ranks" in results and isinstance(results["num_ranks"], int), (
+            "training backend should report num_ranks"
+        )
+        assert results["num_ranks"] > 0, "num_ranks should be positive"
+
+        # we don't always require theoretical_tflops since the data about the GPU
+        # is not always available.
+        if "theoretical_tflops" in results:
+            assert isinstance(results["theoretical_tflops"], (int, float)), (
+                "training backend should report theoretical_tflops"
+            )
+            assert results["theoretical_tflops"] > 0, (
+                "theoretical_tflops should be positive"
+            )
+
 
 @pytest.fixture
 def logprob_setup(request, two_gpu_virtual_cluster):
     """Setup and teardown specifically for training tests."""
+    # Get the use_v2 parameter from the test function
+    use_v2_value = False
+    if hasattr(request, "node") and hasattr(request.node, "callspec"):
+        if "use_v2" in request.node.callspec.params:
+            use_v2_value = request.node.callspec.params["use_v2"]
+
     (
         model_fixture_name,
         tp,
         cp,
-        sequence_parallel,
+        sp,
         cpu_offload,
         activation_checkpointing,
     ) = request.param
@@ -397,11 +448,17 @@ def logprob_setup(request, two_gpu_virtual_cluster):
 
     try:
         config = create_test_config(
-            model_name, tp, cp, sequence_parallel, cpu_offload, activation_checkpointing
+            model_name,
+            tp,
+            cp,
+            sp,
+            cpu_offload,
+            activation_checkpointing,
+            dtensor_v2=use_v2_value,
         )
         tokenizer = get_tokenizer(config["tokenizer"])
         print(
-            f"Creating logprob Policy with tp={tp}, cpu_offload={cpu_offload}, sequence_parallel={sequence_parallel}, activation_checkpointing={activation_checkpointing}..."
+            f"Creating logprob Policy with tp={tp}, cpu_offload={cpu_offload}, sequence_parallel={sp}, activation_checkpointing={activation_checkpointing}..."
         )
         policy = Policy(
             cluster=two_gpu_virtual_cluster,
@@ -468,6 +525,7 @@ def logprob_setup(request, two_gpu_virtual_cluster):
 
 @pytest.mark.hf_gated
 @pytest.mark.timeout(360)
+@pytest.mark.parametrize("use_v2", [True, False])
 @pytest.mark.parametrize(
     "logprob_setup",
     [
@@ -492,7 +550,7 @@ def logprob_setup(request, two_gpu_virtual_cluster):
     ],
     indirect=True,
 )
-def test_dtensor_worker_logprob_tp2_or_cp2_matches_unsharded(logprob_setup):
+def test_dtensor_worker_logprob_tp2_or_cp2_matches_unsharded(use_v2, logprob_setup):
     policy, data, logprobs = logprob_setup
 
     # Verify resources were created properly assert policy is not None, "Policy was not created properly"
@@ -510,8 +568,9 @@ def test_dtensor_worker_logprob_tp2_or_cp2_matches_unsharded(logprob_setup):
 
 
 @pytest.mark.hf_gated
+@pytest.mark.parametrize("use_v2", [True, False])
 def test_dtensor_tp_and_tied_model_with_custom_parallel_plan(
-    two_gpu_virtual_cluster, tiny_llama_tied_model_path
+    use_v2, two_gpu_virtual_cluster, tiny_llama_tied_model_path
 ):
     """Test that DTensor with a tp > 1 and a tied model with a custom parallel plan works."""
     from torch.distributed.tensor.parallel import ColwiseParallel
@@ -525,10 +584,11 @@ def test_dtensor_tp_and_tied_model_with_custom_parallel_plan(
         model_name=tiny_llama_tied_model_path,
         tp=2,
         cp=1,
-        sequence_parallel=False,
+        sp=False,
         cpu_offload=False,
         activation_checkpointing=False,
         custom_parallel_plan=custom_parallel_plan,
+        dtensor_v2=use_v2,
     )
     tokenizer = get_tokenizer(config["tokenizer"])
 
@@ -612,9 +672,14 @@ def test_dtensor_loss_independent_of_microbatch_size_two_gpus(
             "ratio_clip_max": 0.2,
             "ratio_clip_c": None,
             "reference_policy_kl_penalty": 0.1,
+            "reference_policy_kl_type": "k3",
+            "kl_input_clamp_value": 20.0,
+            "kl_output_clamp_value": 10.0,
             "disable_ppo_ratio": False,
             "use_on_policy_kl_approximation": False,
             "use_importance_sampling_correction": False,
+            "truncated_importance_sampling_ratio": None,
+            "sequence_level_importance_ratios": False,
             "token_level_loss": True,
         }
     )
@@ -654,3 +719,113 @@ def test_dtensor_loss_independent_of_microbatch_size_two_gpus(
     torch.testing.assert_close(mbs1_pg_loss, mbs2_pg_loss, rtol=1e-5, atol=1e-5)
 
     policy_mbs2.worker_group.shutdown()
+
+
+@pytest.mark.hf_gated
+@pytest.mark.timeout(300)
+@pytest.mark.parametrize("use_v2", [True, False])
+def test_dtensor_v1_policy_flops_range_check(
+    tiny_llama_model_path, two_gpu_virtual_cluster, use_v2
+):
+    """Test that the returned FLOPS is within a reasonable range using dtensor backend.
+
+    Performs 2 warmup iterations and measures FLOPS for the next 3 iterations.
+    """
+    batch_size = 8
+    seq_len = 128
+    vocab_size = 32000
+
+    # Create dtensor v1 config with default settings
+    config = create_test_config(tiny_llama_model_path, dtensor_v2=use_v2)
+
+    # Update config for FLOPS testing with larger batch and sequence length
+    config["train_global_batch_size"] = batch_size
+    config["train_micro_batch_size"] = (
+        batch_size  # Use full batch size for single microbatch
+    )
+
+    tokenizer = get_tokenizer(config["tokenizer"])
+    config["generation"] = configure_generation_config(config["generation"], tokenizer)
+
+    policy = Policy(
+        cluster=two_gpu_virtual_cluster,
+        config=config,
+        tokenizer=tokenizer,
+        init_reference_model=False,
+    )
+
+    # Create test data
+    torch.manual_seed(42)
+    input_ids = torch.randint(0, vocab_size, (batch_size, seq_len))
+    attention_mask = torch.ones(batch_size, seq_len)
+    input_lengths = attention_mask.sum(dim=1).to(torch.int32)
+
+    data = BatchedDataDict(
+        {
+            "input_ids": input_ids,
+            "input_lengths": input_lengths,
+            "attention_mask": attention_mask,
+            "labels": torch.randint(0, vocab_size, (batch_size, seq_len)),
+            "sample_mask": torch.ones(batch_size),
+        }
+    )
+
+    # Create loss function
+    loss_fn = SimpleLoss()
+
+    try:
+        # Prepare for training
+        policy.prepare_for_training()
+
+        # Perform 2 warmup iterations
+        print("Performing warmup iterations...")
+        for warmup_step in range(2):
+            results = policy.train(data, loss_fn)
+
+        # Measure FLOPS on the third iteration
+        print("Measuring FLOPS on 3 iterations...")
+        time_begin = time.time()
+        for train_step in range(3):
+            results = policy.train(data, loss_fn)
+        runtime_sec = time.time() - time_begin
+
+        # Check if FLOPS tracking is available
+        if policy.flops_tracker is not None:
+            assert "total_flops" in results, (
+                "Training results should contain 'total_flops'"
+            )
+            total_flops = results["total_flops"]
+
+            assert isinstance(total_flops, (int, float)), (
+                "total_flops should be numeric"
+            )
+            assert total_flops > 0, "total_flops should be positive"
+
+            total_tflops = total_flops / 1e12 / 3
+            print(f"Total FLOPS: {total_flops:.2e} ({total_tflops:.4f} TFLOPS)")
+
+            flop_count_total = total_flops * runtime_sec
+            assert 1e9 < flop_count_total < 5e10, (
+                "Total FLOPS should be within 1e9 and 5e10"
+            )
+
+            if "theoretical_tflops" in results:
+                theoretical_tflops = results["theoretical_tflops"]
+                assert isinstance(theoretical_tflops, (int, float)), (
+                    "theoretical_tflops should be numeric"
+                )
+                assert theoretical_tflops > 0, "theoretical_tflops should be positive"
+
+                utilization = total_tflops / theoretical_tflops
+                print(f"Theoretical TFLOPS: {theoretical_tflops:.2f}")
+                print(f"Model utilization: {utilization * 100:.2f}%")
+
+                assert utilization <= 1.0, (
+                    f"Model utilization {utilization * 100:.2f}% should not exceed 100%"
+                )
+        else:
+            print("FLOPS tracker not available, skipping FLOPS range check")
+            pytest.skip("FLOPS tracker not supported for this model configuration")
+
+    finally:
+        policy.shutdown()
diff --git a/tests/unit/models/policy/test_dtensor_worker_v2.py b/tests/unit/models/policy/test_dtensor_worker_v2.py
new file mode 100644
index 0000000000..daaf2ea5d2
--- /dev/null
+++ b/tests/unit/models/policy/test_dtensor_worker_v2.py
@@ -0,0 +1,244 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+import ray
+
+from nemo_rl.distributed.virtual_cluster import RayVirtualCluster
+from nemo_rl.models.policy import PolicyConfig
+
+
+def create_test_config(
+    model_name: str,
+    tp: int = 1,
+    cp: int = 1,
+    sp: bool = False,
+    cpu_offload: bool = False,
+    activation_checkpointing: bool = False,
+    custom_parallel_plan: str | None = None,
+    dtensor_v2: bool = False,
+) -> PolicyConfig:
+    return {
+        "model_name": model_name,
+        "tokenizer": {"name": model_name},
+        "generation_batch_size": 1,  # Small batch size for testing
+        "train_global_batch_size": 4,
+        "train_micro_batch_size": 1,
+        "learning_rate": 5e-6,
+        "logprob_batch_size": 1,
+        "precision": "float32",
+        "offload_optimizer_for_logprob": False,
+        "generation": {
+            "backend": "hf",
+            "temperature": 1.0,
+            "max_new_tokens": 16,  # Small number of tokens for testing
+            "top_p": 1.0,
+            "top_k": None,
+            "stop_token_ids": None,
+            "stop_strings": None,
+            "colocated": {
+                "enabled": True,
+                "resources": {
+                    "gpus_per_node": None,
+                    "num_nodes": None,
+                },
+            },
+        },
+        "dtensor_cfg": {
+            **({"_v2": dtensor_v2} if dtensor_v2 else {}),
+            "enabled": True,
+            "cpu_offload": cpu_offload,
+            "sequence_parallel": sp,
+            "activation_checkpointing": activation_checkpointing,
+            "tensor_parallel_size": tp,
+            "context_parallel_size": cp,
+            "custom_parallel_plan": custom_parallel_plan,
+        },
+        "dynamic_batching": {
+            "enabled": True,
+            "train_mb_tokens": 128,
+            "logprob_mb_tokens": 128,
+            "sequence_length_round": 4,
+        },
+        "sequence_packing": {
+            "enabled": False,
+        },
+        "optimizer": {
+            "name": "torch.optim.AdamW",
+            "kwargs": {
+                "lr": 5e-6,
+                "weight_decay": 0.01,
+                "betas": [0.9, 0.999],
+                "eps": 1e-8,
+                "foreach": False,
+                "fused": False,
+            },
+        },
+        "scheduler": {
+            "name": "torch.optim.lr_scheduler.CosineAnnealingLR",
+            "kwargs": {
+                "T_max": 100,
+            },
+        },
+        "max_grad_norm": 1.0,
+    }
+
+
+@pytest.fixture(scope="module")
+def two_gpu_virtual_cluster():
+    cluster_name = "test"
+    print(f"Creating virtual cluster '{cluster_name}'...")
+    cluster = RayVirtualCluster(
+        name=cluster_name,
+        bundle_ct_per_node_list=[2],  # Use tp bundles, one per GPU
+        use_gpus=True,
+        num_gpus_per_node=2,  # Using tp GPUs
+        max_colocated_worker_groups=1,  # Only one worker group
+    )
+    yield cluster
+    print("Shutting down virtual cluster...")
+    cluster.shutdown()
+
+
+from nemo_rl.algorithms.utils import get_tokenizer
+from nemo_rl.models.policy.lm_policy import Policy
+
+
+def compare_model_configs(config_v1: dict, config_v2: dict) -> list[str]:
+    """
+    Compare two model configurations and return a list of discrepancies.
+
+    Args:
+        config_v1: Model config from dtensor worker v1
+        config_v2: Model config from dtensor worker v2
+
+    Returns:
+        List of discrepancy descriptions. Empty list if configs are equivalent.
+    """
+    discrepancies = []
+
+    def compare_dicts(d1, d2, path=""):
+        """Recursively compare two dictionaries."""
+        all_keys = set(d1.keys()) | set(d2.keys())
+
+        for key in all_keys:
+            current_path = f"{path}.{key}" if path else key
+
+            if key not in d1:
+                discrepancies.append(f"Key '{current_path}' missing in v1 config")
+            elif key not in d2:
+                discrepancies.append(f"Key '{current_path}' missing in v2 config")
+            else:
+                val1, val2 = d1[key], d2[key]
+
+                if isinstance(val1, dict) and isinstance(val2, dict):
+                    compare_dicts(val1, val2, current_path)
+                elif val1 != val2:
+                    discrepancies.append(
+                        f"Value mismatch at '{current_path}': v1={val1}, v2={val2}"
+                    )
+
+    compare_dicts(config_v1, config_v2)
+    return discrepancies
+
+
+@pytest.mark.hf_gated
+@pytest.mark.parametrize(
+    "model_fixture_name,tp,cp,sp,cpu_offload,activation_checkpointing",
+    [
+        # TP=2, CP=1
+        ("tiny_qwen2_model_path", 2, 1, False, False, False),
+        ("tiny_llama_model_path", 2, 1, False, False, False),
+        ("tiny_qwen3_model_path", 2, 1, False, False, False),
+        ("tiny_gemma3_model_path", 2, 1, False, False, False),
+        # TP=1, CP=2
+        ("tiny_qwen2_model_path", 1, 2, False, False, False),
+        ("tiny_llama_model_path", 1, 2, False, False, False),
+        ("tiny_qwen3_model_path", 1, 2, False, False, False),
+    ],
+)
+def test_dtensor_worker_v1_v2_model_config_equivalence(
+    request,
+    two_gpu_virtual_cluster,  # noqa: F811
+    model_fixture_name,
+    tp,
+    cp,
+    sp,
+    cpu_offload,
+    activation_checkpointing,
+):
+    """Test that dtensor worker v1 and v2 produce equivalent model configurations."""
+    # Get the actual model path from the fixture name
+    model_name = request.getfixturevalue(model_fixture_name)
+    # Create v1 configuration
+    config_v1 = create_test_config(
+        model_name=model_name,
+        tp=tp,
+        cp=cp,
+        sp=sp,
+        cpu_offload=cpu_offload,
+        activation_checkpointing=activation_checkpointing,
+        dtensor_v2=False,  # Use v1 worker
+    )
+    # Create and test v1 policy first
+    print("Creating policy with v1 worker...")
+    policy_v1 = Policy(
+        tokenizer=get_tokenizer(config_v1["tokenizer"]),
+        config=config_v1,
+        init_optimizer=False,
+        init_reference_model=False,
+        cluster=two_gpu_virtual_cluster,
+        name_prefix="lm_policy_v1",
+    )
+
+    model_config_v1 = ray.get(
+        policy_v1.worker_group.workers[0].return_model_config.remote()
+    )
+    policy_v1.shutdown()
+
+    # Create v2 configuration
+    config_v2 = create_test_config(
+        model_name=model_name,
+        tp=tp,
+        cp=cp,
+        sp=sp,
+        cpu_offload=cpu_offload,
+        activation_checkpointing=activation_checkpointing,
+        dtensor_v2=True,  # Use v2 worker
+    )
+    policy_v2 = Policy(
+        tokenizer=get_tokenizer(config_v2["tokenizer"]),
+        config=config_v2,
+        init_optimizer=False,
+        init_reference_model=False,
+        cluster=two_gpu_virtual_cluster,
+        name_prefix="lm_policy_v2",
+    )
+
+    model_config_v2 = ray.get(
+        policy_v2.worker_group.workers[0].return_model_config.remote()
+    )
+    policy_v2.shutdown()
+
+    config_v1_dict = vars(model_config_v1)
+    config_v2_dict = vars(model_config_v2)
+    config_v1_dict.pop("nemo_version", None)
+    config_v2_dict.pop("nemo_version", None)
+    config_v1_dict.pop("pad_token_id", None)
+    config_v2_dict.pop("pad_token_id", None)
+
+    discrepancies = compare_model_configs(config_v1_dict, config_v2_dict)
+    assert not discrepancies, (
+        f"Model configurations differ between v1 and v2 approaches for {model_name}"
+    )
diff --git a/tests/unit/models/policy/test_megatron_worker.py b/tests/unit/models/policy/test_megatron_worker.py
index 38607ba59f..33fe4f35a0 100644
--- a/tests/unit/models/policy/test_megatron_worker.py
+++ b/tests/unit/models/policy/test_megatron_worker.py
@@ -13,15 +13,21 @@
 # limitations under the License.
 import os
 import tempfile
+import time
+from typing import Optional
 
+import numpy as np
 import pytest
+import ray
 import torch
 
-# Define a custom marker for model configuration tests
-pytestmark = pytest.mark.modelconfig
-
 from nemo_rl.algorithms.interfaces import LossFunction
-from nemo_rl.algorithms.loss_functions import ClippedPGLossFn, DPOLossFn, NLLLoss
+from nemo_rl.algorithms.loss_functions import (
+    ClippedPGLossConfig,
+    ClippedPGLossFn,
+    DPOLossFn,
+    NLLLoss,
+)
 from nemo_rl.algorithms.utils import get_tokenizer
 from nemo_rl.distributed.batched_data_dict import BatchedDataDict
 from nemo_rl.distributed.virtual_cluster import RayVirtualCluster
@@ -30,6 +36,22 @@
 from nemo_rl.models.policy.lm_policy import Policy
 from tests.unit.test_utils import SimpleLoss
 
+basic_pg_loss_test_config: ClippedPGLossConfig = {
+    "ratio_clip_min": 0.2,
+    "ratio_clip_max": 0.2,
+    "ratio_clip_c": None,
+    "reference_policy_kl_penalty": 0.1,
+    "reference_policy_kl_type": "k3",
+    "kl_input_clamp_value": 20.0,
+    "kl_output_clamp_value": 10.0,
+    "disable_ppo_ratio": False,
+    "use_on_policy_kl_approximation": False,
+    "use_importance_sampling_correction": False,
+    "truncated_importance_sampling_ratio": None,
+    "sequence_level_importance_ratios": False,
+    "token_level_loss": True,
+}
+
 
 def create_megatron_test_config(
     model_name: str,
@@ -40,6 +62,8 @@ def create_megatron_test_config(
     generation_backend: str = "megatron",
     sequence_parallel: bool = False,
     converter_type: str = "LlamaForCausalLM",
+    logprob_chunk_size: Optional[int] = None,
+    defer_fp32_logits: Optional[bool] = None,
 ) -> PolicyConfig:
     """Create a test config for Megatron policy worker."""
     return {
@@ -50,7 +74,9 @@ def create_megatron_test_config(
         "train_micro_batch_size": 2,
         "learning_rate": 5e-6,
         "logprob_batch_size": 2,
+        "logprob_chunk_size": logprob_chunk_size,
         "precision": precision,
+        "offload_optimizer_for_logprob": False,
         "generation": {
             "backend": generation_backend,
             "temperature": 1.0,
@@ -94,7 +120,11 @@ def create_megatron_test_config(
             "moe_router_dtype": "fp64",
             "moe_router_load_balancing_type": "none",
             "moe_router_bias_update_rate": 0.0,
+            "moe_permute_fusion": False,
             "apply_rope_fusion": True,
+            "bias_activation_fusion": True,
+            "defer_fp32_logits": defer_fp32_logits,
+            "train_iters": 100,  # Required for Megatron training
             "optimizer": {
                 "optimizer": "adam",
                 "lr": 5.0e-6,
@@ -109,6 +139,8 @@ def create_megatron_test_config(
                 "use_distributed_optimizer": True,
                 "use_precision_aware_optimizer": True,
                 "clip_grad": 1.0,
+                "optimizer_cpu_offload": False,
+                "optimizer_offload_fraction": 0.0,
             },
             "scheduler": {
                 "start_weight_decay": 0.01,
@@ -123,9 +155,14 @@ def create_megatron_test_config(
                 "grad_reduce_in_fp32": False,
                 "overlap_grad_reduce": True,
                 "overlap_param_gather": False,
-                "average_in_collective": True,
                 "data_parallel_sharding_strategy": "optim_grads_params",
             },
+            "fp8_cfg": {
+                "enabled": False,
+                "fp8": "hybrid",
+                "fp8_recipe": "tensorwise",
+                "fp8_param": True,
+            },
         },
         "optimizer": None,  # Remove default FSDP optimizer
         "scheduler": None,  # Remove default scheduler
@@ -133,14 +170,6 @@ def create_megatron_test_config(
     }
 
 
-@pytest.fixture(scope="module", autouse=True)
-def skip_tied_weight_check_for_all():
-    """Automatically skip tied weight check for all tests in this module."""
-    os.environ["NRL_SKIP_TIED_WEIGHT_CHECK"] = "1"
-    yield
-    os.environ.pop("NRL_SKIP_TIED_WEIGHT_CHECK", None)
-
-
 @pytest.fixture(scope="function")
 def gc_collect():
     """Helper function to force garbage collection after a test"""
@@ -338,6 +367,7 @@ def training_setup(request):
             {"activation_checkpointing": True},
         ),
         (2, 2, 1, "tiny_llama_model_path", {"sequence_parallel": True}),
+        (2, 2, 1, "tiny_llama_model_path", {"precision": "bfloat16", "fp8": "hybrid"}),
     ],
     indirect=True,
     ids=[
@@ -348,6 +378,7 @@ def training_setup(request):
         "2gpu_dp2_llama_bf16",
         "2gpu_dp2_llama_ac",
         "2gpu_tp2_llama_sp",
+        "2gpu_tp2_llama_fp8",
     ],
 )
 def test_megatron_policy_training(training_setup):
@@ -387,6 +418,26 @@ def verify_loss_tensor(loss_tensor):
     # Verify loss changed between iterations (model parameters were updated)
     assert losses[0] > losses[-1], "Loss should decrease over training iterations"
 
+    if policy.flops_tracker is not None:
+        assert "total_flops" in results and isinstance(
+            results["total_flops"], (int, float)
+        ), "training backend should report total_flops"
+        assert results["total_flops"] > 0, "total_flops should be positive"
+        assert "num_ranks" in results and isinstance(results["num_ranks"], int), (
+            "training backend should report num_ranks"
+        )
+        assert results["num_ranks"] > 0, "num_ranks should be positive"
+
+        # we don't always require theoretical_tflops since the data about the GPU
+        # is not always available.
+        if "theoretical_tflops" in results:
+            assert isinstance(results["theoretical_tflops"], (int, float)), (
+                "training backend should report theoretical_tflops"
+            )
+            assert results["theoretical_tflops"] > 0, (
+                "theoretical_tflops should be positive"
+            )
+
 
 @pytest.fixture
 def generation_setup(request, tiny_llama_model_path):
@@ -487,7 +538,7 @@ def generation_setup(request, tiny_llama_model_path):
             cluster.shutdown()
 
 
-@pytest.mark.skip(reason="Skipping megatorn generation tests for now")
+@pytest.mark.skip(reason="Skipping megatron generation tests for now")
 @pytest.mark.timeout(240)
 @pytest.mark.parametrize(
     "generation_setup",
@@ -542,9 +593,23 @@ def logprob_setup(request):
     """Setup and teardown specifically for logprob tests."""
     # Parse parameters: (num_gpus, tp, pp, model_fixture_name)
     if hasattr(request, "param") and request.param is not None:
-        num_gpus, tp, pp, model_fixture_name = request.param
+        (
+            num_gpus,
+            tp,
+            pp,
+            logprob_chunk_size,
+            defer_fp32_logits,
+            model_fixture_name,
+        ) = request.param
     else:
-        num_gpus, tp, pp, model_fixture_name = 2, 1, 1, "tiny_llama_model_path"
+        (
+            num_gpus,
+            tp,
+            pp,
+            logprob_chunk_size,
+            defer_fp32_logits,
+            model_fixture_name,
+        ) = (2, 1, 1, None, None, "tiny_llama_model_path")
 
     # Get the actual model path from the requested fixture
     model_name = request.getfixturevalue(model_fixture_name)
@@ -579,6 +644,8 @@ def logprob_setup(request):
             tp=tp,
             pp=pp,
             converter_type=converter_type,
+            logprob_chunk_size=logprob_chunk_size,
+            defer_fp32_logits=defer_fp32_logits,
         )
         tokenizer = get_tokenizer(config["tokenizer"])
         config["generation"] = configure_generation_config(
@@ -627,14 +694,35 @@ def logprob_setup(request):
 @pytest.mark.parametrize(
     "logprob_setup",
     [
-        # (num_gpus, tp, pp, model_fixture_name)
-        (2, 1, 1, "tiny_llama_model_path"),
-        (2, 2, 1, "tiny_llama_model_path"),
-        (2, 1, 1, "tiny_qwen2_model_path"),
-        (2, 2, 1, "tiny_qwen2_model_path"),
+        # (num_gpus, tp, pp, chunk sz, defer fp32, model_fixture_name)
+        (2, 1, 1, None, None, "tiny_llama_model_path"),
+        (2, 2, 1, None, None, "tiny_llama_model_path"),
+        (2, 1, 1, None, None, "tiny_qwen2_model_path"),
+        (2, 2, 1, None, None, "tiny_qwen2_model_path"),
+        (2, 1, 1, None, True, "tiny_llama_model_path"),
+        (2, 2, 1, None, True, "tiny_llama_model_path"),
+        (2, 1, 1, None, True, "tiny_qwen2_model_path"),
+        (2, 2, 1, None, True, "tiny_qwen2_model_path"),
+        (2, 1, 1, 16, True, "tiny_llama_model_path"),
+        (2, 2, 1, 16, True, "tiny_llama_model_path"),
+        (2, 1, 1, 16, True, "tiny_qwen2_model_path"),
+        (2, 2, 1, 16, True, "tiny_qwen2_model_path"),
     ],
     indirect=True,
-    ids=["2gpu_dp2_llama", "2gpu_tp2_llama", "2gpu_dp2_qwen2", "2gpu_tp2_qwen2"],
+    ids=[
+        "2gpu_dp2_llama",
+        "2gpu_tp2_llama",
+        "2gpu_dp2_qwen2",
+        "2gpu_tp2_qwen2",
+        "2gpu_dp2_deferfp32_llama",
+        "2gpu_tp2_deferfp32_llama",
+        "2gpu_dp2_deferfp32_qwen2",
+        "2gpu_tp2_deferfp32_qwen2",
+        "2gpu_dp2_chunked_deferfp32_llama",
+        "2gpu_tp2_chunked_deferfp32_llama",
+        "2gpu_dp2_chunked_deferfp32_qwen2",
+        "2gpu_tp2_chunked_deferfp32_qwen2",
+    ],
 )
 def test_megatron_policy_logprobs(logprob_setup):
     """Test Megatron policy logprob computation."""
@@ -651,6 +739,7 @@ def test_megatron_policy_logprobs(logprob_setup):
 
     # Basic validation
     assert isinstance(policy_logprobs, torch.Tensor), "Logprobs should be a tensor"
+    assert policy_logprobs.dtype == torch.float32
     assert policy_logprobs.shape == data.get("input_ids").shape, (
         f"Logprobs shape {policy_logprobs.shape} should match input shape {data.get('input_ids').shape}"
     )
@@ -722,18 +811,7 @@ def test_megatron_loss_independent_of_microbatch_size(tiny_llama_model_path):
 
     # Test loss functions
     nll_loss_fn = NLLLoss()
-    pg_loss_fn = ClippedPGLossFn(
-        {
-            "ratio_clip_min": 0.2,
-            "ratio_clip_max": 0.2,
-            "ratio_clip_c": None,
-            "reference_policy_kl_penalty": 0.1,
-            "disable_ppo_ratio": False,
-            "use_on_policy_kl_approximation": False,
-            "use_importance_sampling_correction": False,
-            "token_level_loss": True,
-        }
-    )
+    pg_loss_fn = ClippedPGLossFn(basic_pg_loss_test_config)
 
     policy1.prepare_for_training()
     mbs1_nll_results = policy1.train(data, nll_loss_fn)
@@ -782,6 +860,96 @@ def test_megatron_loss_independent_of_microbatch_size(tiny_llama_model_path):
     cluster2.shutdown()
 
 
+@pytest.mark.timeout(240)
+@pytest.mark.hf_gated
+def test_megatron_grad_norm_invariant_to_number_of_microbatches(tiny_llama_model_path):
+    """Verify grad_norm is invariant to number of microbatches."""
+    num_gpus = 2
+    global_batch_size = 4
+    seq_len = 64
+    vocab_size = 32000
+
+    torch.manual_seed(123)
+    input_ids = torch.randint(0, vocab_size, (global_batch_size, seq_len))
+    attention_mask = torch.ones(global_batch_size, seq_len)
+    input_lengths = attention_mask.sum(dim=1).to(torch.int32)
+
+    data = BatchedDataDict(
+        {
+            "input_ids": input_ids,
+            "input_lengths": input_lengths,
+            "attention_mask": attention_mask,
+            "token_mask": torch.triu(
+                torch.ones(global_batch_size, seq_len), diagonal=1
+            ),
+            "sample_mask": torch.ones((global_batch_size,)),
+            "labels": torch.randint(0, vocab_size, (global_batch_size, seq_len)),
+        }
+    )
+
+    tokenizer = get_tokenizer({"name": tiny_llama_model_path})
+    nll_loss_fn = NLLLoss()
+
+    cluster1 = RayVirtualCluster(
+        name="test-gradnorm-mbs1",
+        bundle_ct_per_node_list=[num_gpus],
+        use_gpus=True,
+        num_gpus_per_node=num_gpus,
+        max_colocated_worker_groups=1,
+    )
+
+    # mbs=1, num_microbatches=4
+    config1 = create_megatron_test_config(tiny_llama_model_path)
+    config1["train_global_batch_size"] = global_batch_size
+    config1["train_micro_batch_size"] = 1
+    config1["generation"] = configure_generation_config(
+        config1["generation"], tokenizer
+    )
+
+    policy1 = Policy(
+        cluster=cluster1,
+        config=config1,
+        tokenizer=tokenizer,
+        init_reference_model=False,
+    )
+    policy1.prepare_for_training()
+    res1 = policy1.train(data, nll_loss_fn, gbs=global_batch_size, mbs=1)
+    grad_norm_1 = res1["grad_norm"].cpu()
+    policy1.shutdown()
+    cluster1.shutdown()
+
+    cluster2 = RayVirtualCluster(
+        name="test-gradnorm-mbs2",
+        bundle_ct_per_node_list=[num_gpus],
+        use_gpus=True,
+        num_gpus_per_node=num_gpus,
+        max_colocated_worker_groups=1,
+    )
+
+    # mbs=2, num_microbatches=2
+    config2 = create_megatron_test_config(tiny_llama_model_path)
+    config2["train_global_batch_size"] = global_batch_size
+    config2["train_micro_batch_size"] = 2
+    config2["generation"] = configure_generation_config(
+        config2["generation"], tokenizer
+    )
+
+    policy2 = Policy(
+        cluster=cluster2,
+        config=config2,
+        tokenizer=tokenizer,
+        init_reference_model=False,
+    )
+    policy2.prepare_for_training()
+    res2 = policy2.train(data, nll_loss_fn, gbs=global_batch_size, mbs=2)
+    grad_norm_2 = res2["grad_norm"].cpu()
+
+    torch.testing.assert_close(grad_norm_1, grad_norm_2, rtol=1e-5, atol=1e-5)
+
+    policy2.shutdown()
+    cluster2.shutdown()
+
+
 @pytest.mark.timeout(300)
 @pytest.mark.hf_gated
 def test_megatron_reference_policy_functionality(tiny_llama_model_path):
@@ -1255,6 +1423,363 @@ def test_megatron_dpo_training(tiny_llama_model_path):
         cluster.shutdown()
 
 
+@pytest.fixture
+def topk_setup(request):
+    """Setup and teardown specifically for top-k logits tests."""
+    # Parse parameters: (num_gpus, tp, pp, logprob_chunk_size, defer_fp32_logits, model_fixture_name)
+    if hasattr(request, "param") and request.param is not None:
+        (
+            num_gpus,
+            tp,
+            pp,
+            logprob_chunk_size,
+            defer_fp32_logits,
+            model_fixture_name,
+        ) = request.param
+    else:
+        (
+            num_gpus,
+            tp,
+            pp,
+            logprob_chunk_size,
+            defer_fp32_logits,
+            model_fixture_name,
+        ) = (2, 1, 1, None, None, "tiny_llama_model_path")
+
+    # Get the actual model path from the requested fixture
+    model_name = request.getfixturevalue(model_fixture_name)
+
+    policy = None
+    cluster = None
+    data = None
+
+    try:
+        cluster_name = f"test-megatron-topk-{num_gpus}gpu-tp{tp}-pp{pp}"
+        print(
+            f"Creating topk cluster '{cluster_name}' for {num_gpus} GPUs (TP={tp}, PP={pp})"
+        )
+
+        cluster = RayVirtualCluster(
+            name=cluster_name,
+            bundle_ct_per_node_list=[num_gpus],
+            use_gpus=True,
+            num_gpus_per_node=num_gpus,
+            max_colocated_worker_groups=1,
+        )
+
+        # Determine converter type based on model
+        converter_type = "LlamaForCausalLM"
+        if "qwen" in model_name.lower():
+            converter_type = "Qwen2ForCausalLM"
+        elif "gemma" in model_name.lower():
+            converter_type = "GemmaForCausalLM"
+
+        config = create_megatron_test_config(
+            model_name=model_name,
+            tp=tp,
+            pp=pp,
+            converter_type=converter_type,
+            logprob_chunk_size=logprob_chunk_size,
+            defer_fp32_logits=defer_fp32_logits,
+        )
+        tokenizer = get_tokenizer(config["tokenizer"])
+        config["generation"] = configure_generation_config(
+            config["generation"], tokenizer
+        )
+
+        print("Creating Megatron topk Policy...")
+        policy = Policy(
+            cluster=cluster,
+            config=config,
+            tokenizer=tokenizer,
+            init_reference_model=False,
+        )
+
+        # Create test data
+        print("Creating test batch...")
+        torch.manual_seed(77)
+
+        input_ids = torch.randint(0, 32000, (4, 64))  # 4 sequences, each of length 64
+        attention_mask = torch.ones(4, 64)
+        input_lengths = attention_mask.sum(dim=1).to(torch.int32)
+
+        data = BatchedDataDict(
+            {
+                "input_ids": input_ids,
+                "input_lengths": input_lengths,
+                "attention_mask": attention_mask,
+            }
+        )
+
+        yield policy, cluster, data
+
+    except Exception as e:
+        print(f"Error during topk setup: {e}")
+        pytest.skip(f"Topk setup failed: {e}")
+    finally:
+        print("Cleaning up topk resources")
+        if policy:
+            policy.shutdown()
+        if cluster:
+            cluster.shutdown()
+
+
+@pytest.mark.timeout(180)
+@pytest.mark.hf_gated
+@pytest.mark.parametrize(
+    "topk_setup",
+    [
+        # (num_gpus, tp, pp, chunk sz, defer fp32, model_fixture_name)
+        (2, 1, 1, None, None, "tiny_llama_model_path"),
+        (2, 2, 1, None, None, "tiny_llama_model_path"),
+        (2, 1, 1, None, None, "tiny_qwen2_model_path"),
+        (2, 2, 1, None, None, "tiny_qwen2_model_path"),
+        (2, 1, 1, None, True, "tiny_llama_model_path"),
+        (2, 2, 1, None, True, "tiny_llama_model_path"),
+        (2, 1, 1, None, True, "tiny_qwen2_model_path"),
+        (2, 2, 1, None, True, "tiny_qwen2_model_path"),
+        (2, 1, 1, 16, True, "tiny_llama_model_path"),
+        (2, 2, 1, 16, True, "tiny_llama_model_path"),
+        (2, 1, 1, 16, True, "tiny_qwen2_model_path"),
+        (2, 2, 1, 16, True, "tiny_qwen2_model_path"),
+    ],
+    indirect=True,
+    ids=[
+        "2gpu_dp2_llama",
+        "2gpu_tp2_llama",
+        "2gpu_dp2_qwen2",
+        "2gpu_tp2_qwen2",
+        "2gpu_dp2_deferfp32_llama",
+        "2gpu_tp2_deferfp32_llama",
+        "2gpu_dp2_deferfp32_qwen2",
+        "2gpu_tp2_deferfp32_qwen2",
+        "2gpu_dp2_chunked_deferfp32_llama",
+        "2gpu_tp2_chunked_deferfp32_llama",
+        "2gpu_dp2_chunked_deferfp32_qwen2",
+        "2gpu_tp2_chunked_deferfp32_qwen2",
+    ],
+)
+def test_megatron_policy_topk_logits(topk_setup):
+    """Test Megatron policy top-k logits computation."""
+    policy, cluster, data = topk_setup
+
+    # Verify resources were created properly
+    assert policy is not None, "Policy was not created properly"
+    assert data is not None, "Test data was not created properly"
+
+    # Generate top-k logits
+    print("\nGenerating top-k logits...")
+    policy.prepare_for_lp_inference()
+    k = 5
+    outputs = policy.get_topk_logits(data, k=k)
+
+    # Basic validation
+    assert "topk_logits" in outputs and "topk_indices" in outputs, (
+        "Top-k outputs should contain both 'topk_logits' and 'topk_indices'"
+    )
+    topk_logits = outputs["topk_logits"]
+    topk_indices = outputs["topk_indices"]
+
+    assert isinstance(topk_logits, torch.Tensor)
+    assert isinstance(topk_indices, torch.Tensor)
+    assert topk_logits.dtype == torch.float32
+    assert topk_indices.dtype in (torch.int32, torch.int64, torch.long)
+
+    # Shape checks
+    B, S = data.get("input_ids").shape
+    assert topk_logits.shape == (B, S, k)
+    assert topk_indices.shape == (B, S, k)
+
+    # Mask invalid positions and check for NaN/Inf
+    valid_mask = (
+        data.get("attention_mask")
+        .unsqueeze(-1)
+        .bool()
+        .expand(-1, -1, topk_logits.shape[-1])
+    )
+    valid_logits = topk_logits[valid_mask]
+    assert not torch.isnan(valid_logits).any(), "Top-k logits should not contain NaN"
+    assert not torch.isinf(valid_logits).any(), "Top-k logits should not contain Inf"
+
+    # Check descending order within top-k for valid positions
+    if S > 1:
+        diffs = topk_logits[..., :-1] - topk_logits[..., 1:]
+        valid_mask_diffs = (
+            data.get("attention_mask")
+            .unsqueeze(-1)
+            .bool()
+            .expand(-1, -1, topk_logits.shape[-1] - 1)
+        )
+        diffs = diffs[valid_mask_diffs]
+        assert (diffs >= -1e-6).all(), "Top-k logits should be non-increasing across k"
+
+
+@pytest.mark.hf_gated
+@pytest.mark.timeout(300)
+def test_megatron_context_parallel_topk_agreement(tiny_qwen2_model_path):
+    """Test that CP and non-CP models produce identical top-k logits with sequence packing enabled."""
+    num_gpus = 2
+    batch_size = 4
+    seq_len = 64
+
+    # Create test data with varying sequence lengths to test sequence packing
+    torch.manual_seed(123)
+    input_ids = torch.arange(seq_len * batch_size, device="cuda").reshape(
+        batch_size, seq_len
+    )
+    input_lengths = torch.tensor([31, 21, 29, 56], dtype=torch.int32)
+    attention_mask = torch.zeros(batch_size, seq_len)
+    for i, length in enumerate(input_lengths):
+        attention_mask[i, :length] = 1
+
+    data = BatchedDataDict(
+        {
+            "input_ids": input_ids,
+            "input_lengths": input_lengths,
+            "attention_mask": attention_mask,
+        }
+    )
+
+    k = 5
+
+    # Test 1: Non-CP model (context_parallel_size=1) with sequence packing
+    print(
+        "=== Testing Non-CP model (context_parallel_size=1) with sequence packing for top-k ==="
+    )
+    cluster_no_cp = RayVirtualCluster(
+        name="test-no-cp-packing-topk",
+        bundle_ct_per_node_list=[num_gpus],
+        use_gpus=True,
+        num_gpus_per_node=num_gpus,
+        max_colocated_worker_groups=1,
+    )
+
+    config_no_cp = create_megatron_test_config(
+        tiny_qwen2_model_path, tp=1, pp=1, precision="bfloat16"
+    )
+    # Ensure context parallel is disabled
+    config_no_cp["megatron_cfg"]["context_parallel_size"] = 1
+
+    # Enable sequence packing
+    config_no_cp["sequence_packing"] = {
+        "enabled": True,
+        "train_mb_tokens": seq_len,
+        "logprob_mb_tokens": seq_len,
+        "algorithm": "modified_first_fit_decreasing",
+    }
+
+    tokenizer = get_tokenizer(config_no_cp["tokenizer"])
+    config_no_cp["generation"] = configure_generation_config(
+        config_no_cp["generation"], tokenizer
+    )
+
+    policy_no_cp = Policy(
+        cluster=cluster_no_cp,
+        config=config_no_cp,
+        tokenizer=tokenizer,
+        init_reference_model=False,
+    )
+
+    # Get top-k from non-CP model with sequence packing
+    policy_no_cp.prepare_for_lp_inference()
+    out_no_cp = policy_no_cp.get_topk_logits(data, k=k)
+    logits_no_cp = out_no_cp["topk_logits"] * attention_mask.unsqueeze(-1)
+    indices_no_cp = out_no_cp["topk_indices"]
+    print(f"Non-CP topk logits shape: {logits_no_cp.shape}")
+
+    # Cleanup non-CP resources and run without packing
+    policy_no_cp.shutdown()
+    config_no_cp_no_packing = config_no_cp.copy()
+    config_no_cp_no_packing["sequence_packing"] = {"enabled": False}
+    policy_no_cp_no_packing = Policy(
+        cluster=cluster_no_cp,
+        config=config_no_cp_no_packing,
+        tokenizer=tokenizer,
+        init_reference_model=False,
+    )
+    policy_no_cp_no_packing.prepare_for_lp_inference()
+    out_no_cp_np = policy_no_cp_no_packing.get_topk_logits(data, k=k)
+    logits_no_cp_np = out_no_cp_np["topk_logits"] * attention_mask.unsqueeze(-1)
+    indices_no_cp_np = out_no_cp_np["topk_indices"]
+    print(f"Non-CP (no packing) topk logits shape: {logits_no_cp_np.shape}")
+    cluster_no_cp.shutdown()
+
+    # Compare non-CP packing vs non-packing
+    print("=== Comparing non-CP packing vs non-packing top-k ===")
+    assert logits_no_cp.shape == logits_no_cp_np.shape
+    assert indices_no_cp.shape == indices_no_cp_np.shape
+    torch.testing.assert_close(logits_no_cp, logits_no_cp_np, rtol=1e-3, atol=1e-2)
+    valid_mask = (
+        attention_mask.bool().unsqueeze(-1).expand(-1, -1, indices_no_cp.shape[-1])
+    )
+    assert torch.equal(indices_no_cp[valid_mask], indices_no_cp_np[valid_mask]), (
+        "Top-k indices should match between packing and non-packing"
+    )
+
+    # Test 2: CP model (context_parallel_size=2) with sequence packing
+    print(
+        "=== Testing CP model (context_parallel_size=2) with sequence packing for top-k ==="
+    )
+    cluster_cp = RayVirtualCluster(
+        name="test-cp-packing-topk",
+        bundle_ct_per_node_list=[num_gpus],
+        use_gpus=True,
+        num_gpus_per_node=num_gpus,
+        max_colocated_worker_groups=1,
+    )
+
+    config_cp = create_megatron_test_config(
+        tiny_qwen2_model_path, tp=1, pp=1, precision="bfloat16"
+    )
+    # Enable context parallel
+    config_cp["megatron_cfg"]["context_parallel_size"] = 2
+
+    # Enable sequence packing
+    config_cp["sequence_packing"] = {
+        "enabled": True,
+        "train_mb_tokens": seq_len,
+        "logprob_mb_tokens": seq_len,
+        "algorithm": "modified_first_fit_decreasing",
+    }
+    config_cp["generation"] = configure_generation_config(
+        config_cp["generation"], tokenizer
+    )
+
+    policy_cp = Policy(
+        cluster=cluster_cp,
+        config=config_cp,
+        tokenizer=tokenizer,
+        init_reference_model=False,
+    )
+    policy_cp.prepare_for_lp_inference()
+    out_cp = policy_cp.get_topk_logits(data, k=k)
+    logits_cp = out_cp["topk_logits"] * attention_mask.unsqueeze(-1)
+    indices_cp = out_cp["topk_indices"]
+
+    # Cleanup CP resources
+    policy_cp.shutdown()
+    cluster_cp.shutdown()
+
+    # Compare CP vs non-CP (no packing)
+    print("=== Comparing CP vs non-CP (no packing) top-k ===")
+    assert logits_no_cp_np.shape == logits_cp.shape
+    assert indices_no_cp_np.shape == indices_cp.shape
+    assert not torch.isnan(logits_cp).any()
+    assert not torch.isinf(logits_cp).any()
+    torch.testing.assert_close(logits_no_cp_np, logits_cp, rtol=1e-3, atol=1e-2)
+    # since there are close logits, we only check the index match ratio
+    valid_mask_idx = (
+        attention_mask.bool().unsqueeze(-1).expand(-1, -1, indices_cp.shape[-1])
+    )
+    cp_idx_flat = indices_cp[valid_mask_idx]
+    nocp_idx_flat = indices_no_cp_np[valid_mask_idx]
+    match_ratio = (cp_idx_flat == nocp_idx_flat).float().mean().item()
+    print(f"Top-k index match ratio (CP vs non-CP): {match_ratio:.4f}")
+    assert match_ratio >= 0.95, (
+        f"Top-k index match ratio too low: {match_ratio:.4f} (< 0.95)"
+    )
+
+
 @pytest.mark.timeout(300)
 @pytest.mark.hf_gated
 def test_megatron_sft_training(tiny_llama_model_path):
@@ -1629,18 +2154,7 @@ def test_megatron_context_parallel_training_agreement(tiny_llama_model_path):
     )
 
     # Create ClippedPG loss function
-    loss_fn = ClippedPGLossFn(
-        {
-            "ratio_clip_min": 0.2,
-            "ratio_clip_max": 0.2,
-            "ratio_clip_c": None,
-            "reference_policy_kl_penalty": 0.1,
-            "disable_ppo_ratio": False,
-            "use_on_policy_kl_approximation": False,
-            "use_importance_sampling_correction": False,
-            "token_level_loss": True,
-        }
-    )
+    loss_fn = ClippedPGLossFn(basic_pg_loss_test_config)
 
     # Train non-CP model
     policy_no_cp.prepare_for_training()
@@ -1756,3 +2270,309 @@ def test_megatron_context_parallel_training_agreement(tiny_llama_model_path):
     print(
         "✓ SUCCESS: CP and non-CP models produce consistent training results with ClippedPG loss and sequence packing"
     )
+
+
+@pytest.mark.hf_gated
+@pytest.mark.timeout(300)
+def test_megatron_gradient_norm_consistency_across_parallelism(tiny_llama_model_path):
+    """Test that gradient norms are consistent across different TP and DP configurations.
+
+    This test validates that the same model produces identical gradient norms
+    regardless of tensor parallelism (TP) and data parallelism (DP) settings.
+    """
+    batch_size = 8
+    seq_len = 64
+    vocab_size = 32000
+
+    # Create reproducible test data
+    torch.manual_seed(42)
+    input_ids = torch.randint(0, vocab_size, (batch_size, seq_len))
+    attention_mask = torch.ones(batch_size, seq_len)
+    input_lengths = attention_mask.sum(dim=1).to(torch.int32)
+    labels = torch.randint(0, vocab_size, (batch_size, seq_len))
+
+    data = BatchedDataDict(
+        {
+            "input_ids": input_ids,
+            "input_lengths": input_lengths,
+            "attention_mask": attention_mask,
+            "labels": labels,
+            "sample_mask": torch.ones(batch_size),
+            "token_mask": torch.ones_like(input_ids),
+        }
+    )
+
+    # Test configurations: (num_gpus, tp, pp, description)
+    test_configs = [
+        (1, 1, 1, "DP1TP1"),
+        (2, 1, 1, "DP2"),  # Data parallel with 2 GPUs
+        (2, 2, 1, "TP2"),  # Tensor parallel with 2 GPUs
+    ]
+
+    grad_norms = {}
+    losses = {}
+
+    for num_gpus, tp, pp, desc in test_configs:
+        print(
+            f"\n=== Testing {desc} configuration (GPUs={num_gpus}, TP={tp}, PP={pp}) ==="
+        )
+
+        cluster = RayVirtualCluster(
+            name=f"test-grad-norm-{desc.lower()}",
+            bundle_ct_per_node_list=[num_gpus],
+            use_gpus=True,
+            num_gpus_per_node=num_gpus,
+            max_colocated_worker_groups=1,
+        )
+
+        config = create_megatron_test_config(
+            model_name=tiny_llama_model_path,
+            tp=tp,
+            pp=pp,
+            precision="float32",  # Use float32 for more stable gradient comparisons
+        )
+
+        tokenizer = get_tokenizer(config["tokenizer"])
+        config["generation"] = configure_generation_config(
+            config["generation"], tokenizer
+        )
+
+        policy = Policy(
+            cluster=cluster,
+            config=config,
+            tokenizer=tokenizer,
+            init_reference_model=False,
+        )
+
+        # Use SimpleLoss for consistent comparison
+        loss_fn = NLLLoss()
+
+        try:
+            # Prepare for training
+            policy.prepare_for_training()
+
+            # Perform one forward/backward step
+            print(f"Performing forward/backward pass for {desc}...")
+            results = policy.train(data, loss_fn)
+
+            # Extract metrics
+            loss_tensor = results["loss"]
+            grad_norm = results["grad_norm"]
+
+            # Verify loss is valid
+            assert not torch.isnan(loss_tensor).any(), (
+                f"Loss should not be NaN for {desc}"
+            )
+            assert not torch.isinf(loss_tensor).any(), (
+                f"Loss should not be Inf for {desc}"
+            )
+
+            # Store results for comparison
+            grad_norms[desc] = grad_norm
+            losses[desc] = loss_tensor.cpu().numpy()
+
+            print(f"{desc} - Loss: {loss_tensor}")
+            print(f"{desc} - Grad norm: {grad_norm}")
+
+            # Check tensor parallel attributes on model parameters
+            print(f"Checking tensor parallel attributes for {desc}...")
+            tp_check_futures = policy.worker_group.run_all_workers_single_data(
+                "check_tensor_parallel_attributes"
+            )
+            tp_check_results = [ray.get(future) for future in tp_check_futures]
+
+            # Analyze the first worker's results (all workers should have the same structure)
+            tp_info = tp_check_results[0]
+
+            print(f"{desc} - TP size: {tp_info['tp_size']}")
+            print(f"{desc} - Total params: {tp_info['total_params']}")
+            print(f"{desc} - TP params: {len(tp_info['tp_params'])}")
+            print(f"{desc} - Non-TP params: {len(tp_info['non_tp_params'])}")
+
+            # Validate tensor parallel attributes
+            expected_tp_size = tp
+            assert tp_info["tp_size"] == expected_tp_size, (
+                f"Expected TP size {expected_tp_size}, got {tp_info['tp_size']}"
+            )
+
+            if tp > 1:
+                tp_sharded_names = [item["name"] for item in tp_info["tp_params"]]
+                # When tensor parallelism is enabled, we should have some TP parameters
+                assert "module.embedding.word_embeddings.weight" in tp_sharded_names, (
+                    f"Expected module.embedding.word_embeddings.weight to be TP-sharded when TP={tp}"
+                )
+
+        finally:
+            policy.shutdown()
+            cluster.shutdown()
+
+    # Compare gradient norms across configurations
+    print("\n=== Comparing gradient norms across configurations ===")
+
+    # Get reference values from DP2 configuration
+    # NOTE: even if TP2 config passes these tests, it doesn't necessarily imply
+    # there are no bugs. That's why we also check that TP attributes are set correctly above
+    reference_config = "DP1TP1"
+    reference_grad_norm = grad_norms[reference_config]
+    reference_loss = losses[reference_config]
+
+    for config_name, grad_norm in grad_norms.items():
+        if config_name == reference_config:
+            continue
+
+        if not isinstance(grad_norm, list):
+            grad_norm = [grad_norm]
+
+        print(f"\nComparing {config_name} with {reference_config}:")
+        print(f"  {reference_config} grad norm: {reference_grad_norm}")
+        print(f"  {config_name} grad norm: {grad_norm}")
+
+        # Compare gradient norms
+        if not isinstance(grad_norm, list):
+            grad_norm = [grad_norm]
+            reference_grad_norm = [reference_grad_norm]
+        if isinstance(grad_norm, list) and isinstance(reference_grad_norm, list):
+            # Handle case where grad_norm is a list (multiple microbatches)
+            assert len(grad_norm) == len(reference_grad_norm), (
+                f"Number of gradient norm values should match: {len(grad_norm)} vs {len(reference_grad_norm)}"
+            )
+
+            for i, (gn, ref_gn) in enumerate(zip(grad_norm, reference_grad_norm)):
+                grad_diff = abs(gn - ref_gn)
+                relative_diff = grad_diff / (ref_gn + 1e-8)
+                print(
+                    f"    Microbatch {i}: {ref_gn} vs {gn}, diff={grad_diff.item():.6f}, rel_diff={relative_diff.item():.6f}"
+                )
+
+                # Allow small differences due to floating point precision and parallelization
+                assert relative_diff < 0.01 or grad_diff < 1e-6, (
+                    f"Gradient norm difference too large for microbatch {i}: "
+                    f"{ref_gn} vs {gn} (diff={grad_diff.item():.6f}, rel_diff={relative_diff.item():.6f})"
+                )
+
+        # Compare losses (should also be identical for same computation)
+        loss_diff = np.max(np.abs(reference_loss - losses[config_name]))
+        relative_loss_diff = loss_diff / (np.mean(np.abs(reference_loss)) + 1e-8)
+        print(
+            f"    Loss diff: {loss_diff:.6f}, relative loss diff: {relative_loss_diff:.6f}"
+        )
+
+        # Allow small differences in loss as well
+        assert relative_loss_diff < 0.01 or loss_diff < 1e-6, (
+            f"Loss difference too large: "
+            f"max diff={loss_diff:.6f}, rel_diff={relative_loss_diff:.6f}"
+        )
+
+    print(
+        "\n✓ SUCCESS: Gradient norms are consistent across all parallelization configurations!"
+    )
+
+
+@pytest.mark.hf_gated
+@pytest.mark.timeout(300)
+def test_megatron_policy_flops_range_check(tiny_llama_model_path):
+    """Test that the returned FLOPS is within a reasonable range using default config.
+
+    Performs 2 warmup iterations and measures FLOPS on the third iteration.
+    """
+    num_gpus = 1
+    batch_size = 8
+    seq_len = 128
+    vocab_size = 32000
+
+    # Create cluster and policy with default config
+    cluster = RayVirtualCluster(
+        name="test-flops-tracker",
+        bundle_ct_per_node_list=[num_gpus],
+        use_gpus=True,
+        num_gpus_per_node=num_gpus,
+        max_colocated_worker_groups=1,
+    )
+
+    # Use the default config function
+    config = create_megatron_test_config(tiny_llama_model_path)
+    tokenizer = get_tokenizer(config["tokenizer"])
+    config["generation"] = configure_generation_config(config["generation"], tokenizer)
+
+    policy = Policy(
+        cluster=cluster,
+        config=config,
+        tokenizer=tokenizer,
+        init_reference_model=False,
+    )
+
+    # Create test data
+    torch.manual_seed(42)
+    input_ids = torch.randint(0, vocab_size, (batch_size, seq_len))
+    attention_mask = torch.ones(batch_size, seq_len)
+    input_lengths = attention_mask.sum(dim=1).to(torch.int32)
+
+    data = BatchedDataDict(
+        {
+            "input_ids": input_ids,
+            "input_lengths": input_lengths,
+            "attention_mask": attention_mask,
+            "labels": torch.randint(0, vocab_size, (batch_size, seq_len)),
+            "sample_mask": torch.ones(batch_size),
+        }
+    )
+
+    # Create loss function
+    loss_fn = SimpleLoss()
+
+    try:
+        # Prepare for training
+        policy.prepare_for_training()
+
+        # Perform 2 warmup iterations
+        print("Performing warmup iterations...")
+        for warmup_step in range(2):
+            results = policy.train(data, loss_fn)
+
+        # Measure FLOPS on the third iteration
+        print("Measuring FLOPS on third iteration...")
+        time_begin = time.time()
+        results = policy.train(data, loss_fn)
+        runtime_sec = time.time() - time_begin
+
+        # Check if FLOPS tracking is available
+        if policy.flops_tracker is not None:
+            assert "total_flops" in results, (
+                "Training results should contain 'total_flops'"
+            )
+            total_flops = results["total_flops"]
+
+            assert isinstance(total_flops, (int, float)), (
+                "total_flops should be numeric"
+            )
+            assert total_flops > 0, "total_flops should be positive"
+
+            total_tflops = total_flops / 1e12
+            print(f"Total FLOPS: {total_flops:.2e} ({total_tflops:.4f} TFLOPS)")
+
+            flop_count_total = total_flops * runtime_sec
+            assert 1e9 < flop_count_total < 5e10, (
+                "Total FLOPS should be within 1e9 and 5e10"
+            )
+
+            if "theoretical_tflops" in results:
+                theoretical_tflops = results["theoretical_tflops"]
+                assert isinstance(theoretical_tflops, (int, float)), (
+                    "theoretical_tflops should be numeric"
+                )
+                assert theoretical_tflops > 0, "theoretical_tflops should be positive"
+
+                utilization = total_tflops / theoretical_tflops
+                print(f"Theoretical TFLOPS: {theoretical_tflops:.2f}")
+                print(f"Model utilization: {utilization * 100:.2f}%")
+
+                assert utilization <= 1.0, (
+                    f"Model utilization {utilization * 100:.2f}% should not exceed 100%"
+                )
+        else:
+            print("FLOPS tracker not available, skipping FLOPS range check")
+            pytest.skip("FLOPS tracker not supported for this model configuration")
+
+    finally:
+        policy.shutdown()
+        cluster.shutdown()
diff --git a/tests/unit/models/policy/test_policy_validation.py b/tests/unit/models/policy/test_policy_validation.py
new file mode 100644
index 0000000000..e5aedeeb12
--- /dev/null
+++ b/tests/unit/models/policy/test_policy_validation.py
@@ -0,0 +1,336 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Unit tests for Policy class validation logic.
+
+This module tests the early validation checks in the Policy class, particularly
+the world_size compatibility validation that prevents confusing reshape errors
+when the cluster size is insufficient for the specified parallelism configuration.
+"""
+
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from nemo_rl.models.policy import PolicyConfig
+from nemo_rl.models.policy.lm_policy import Policy
+
+
+def create_mock_cluster(world_size: int):
+    """Create a mock cluster with the specified world size."""
+    cluster = MagicMock()
+    cluster.world_size.return_value = world_size
+
+    # Mock get_master_address_and_port method to return valid address and port
+    cluster.get_master_address_and_port.return_value = ("127.0.0.1", 29500)
+
+    # Mock get_placement_groups method to return a list of mock placement groups
+    mock_pg = MagicMock()
+    mock_pg.bundle_count = world_size  # Each placement group has world_size bundles
+    cluster.get_placement_groups.return_value = [mock_pg]
+
+    # Mock get_available_address_and_port method
+    cluster.get_available_address_and_port.return_value = ("127.0.0.1", 29501)
+
+    return cluster
+
+
+def create_mock_tokenizer():
+    """Create a mock tokenizer."""
+    tokenizer = MagicMock()
+    tokenizer.pad_token_id = 0
+    return tokenizer
+
+
+def create_dtensor_config(
+    model_name: str, tp: int, pp: int = 1, cp: int = 1
+) -> PolicyConfig:
+    """Create a DTensor configuration for testing."""
+    return {
+        "model_name": model_name,
+        "tokenizer": {"name": model_name},
+        "generation_batch_size": 1,
+        "train_global_batch_size": 4,
+        "train_micro_batch_size": 1,
+        "learning_rate": 5e-6,
+        "logprob_batch_size": 1,
+        "precision": "float32",
+        "offload_optimizer_for_logprob": False,
+        "generation": {
+            "backend": "hf",
+            "temperature": 1.0,
+            "max_new_tokens": 16,
+            "top_p": 1.0,
+            "top_k": None,
+            "stop_token_ids": None,
+            "stop_strings": None,
+            "colocated": {
+                "enabled": True,
+                "resources": {
+                    "gpus_per_node": None,
+                    "num_nodes": None,
+                },
+            },
+        },
+        "dtensor_cfg": {
+            "enabled": True,
+            "cpu_offload": False,
+            "sequence_parallel": False,
+            "activation_checkpointing": False,
+            "tensor_parallel_size": tp,
+            "context_parallel_size": cp,
+        },
+        "dynamic_batching": {
+            "enabled": True,
+            "train_mb_tokens": 128,
+            "logprob_mb_tokens": 128,
+            "sequence_length_round": 4,
+        },
+        "sequence_packing": {
+            "enabled": False,
+        },
+        "optimizer": {
+            "name": "torch.optim.AdamW",
+            "lr": 5e-6,
+            "weight_decay": 0.01,
+            "betas": [0.9, 0.999],
+        },
+    }
+
+
+def create_megatron_config(
+    model_name: str, tp: int, pp: int = 1, cp: int = 1
+) -> PolicyConfig:
+    """Create a Megatron configuration for testing."""
+    return {
+        "model_name": model_name,
+        "tokenizer": {"name": model_name},
+        "generation_batch_size": 1,
+        "train_global_batch_size": 4,
+        "train_micro_batch_size": 1,
+        "learning_rate": 5e-6,
+        "logprob_batch_size": 1,
+        "precision": "float32",
+        "offload_optimizer_for_logprob": False,
+        "generation": {
+            "backend": "hf",
+            "temperature": 1.0,
+            "max_new_tokens": 16,
+            "top_p": 1.0,
+            "top_k": None,
+            "stop_token_ids": None,
+            "stop_strings": None,
+            "colocated": {
+                "enabled": True,
+                "resources": {
+                    "gpus_per_node": None,
+                    "num_nodes": None,
+                },
+            },
+        },
+        "megatron_cfg": {
+            "enabled": True,
+            "tensor_model_parallel_size": tp,
+            "pipeline_model_parallel_size": pp,
+            "context_parallel_size": cp,
+        },
+        "dynamic_batching": {
+            "enabled": pp == 1,  # Only enable for single pipeline parallel stage
+            "train_mb_tokens": 128,
+            "logprob_mb_tokens": 128,
+            "sequence_length_round": 4,
+        },
+        "sequence_packing": {
+            "enabled": False,
+        },
+        "optimizer": {
+            "name": "torch.optim.AdamW",
+            "lr": 5e-6,
+            "weight_decay": 0.01,
+            "betas": [0.9, 0.999],
+        },
+    }
+
+
+@pytest.mark.parametrize(
+    "world_size,tp,cp,should_pass,expected_error_type,description",
+    [
+        # Valid cases - DTensor backend (PP is always 1 for DTensor)
+        (8, 8, 1, True, None, "Valid: DP=1, TP=8, PP=1, CP=1"),
+        (16, 8, 1, True, None, "Valid: DP=2, TP=8, PP=1, CP=1"),
+        (8, 4, 2, True, None, "Valid: DP=1, TP=4, PP=1, CP=2"),
+        (16, 4, 2, True, None, "Valid: DP=2, TP=4, PP=1, CP=2"),
+        (1, 1, 1, True, None, "Valid: Minimal config DP=1, TP=1, PP=1, CP=1"),
+        # Invalid cases - insufficient world_size (DP < 1)
+        (4, 8, 1, False, "insufficient", "Invalid: DP=0.5, TP=8, PP=1, CP=1"),
+        (2, 8, 1, False, "insufficient", "Invalid: DP=0.25, TP=8, PP=1, CP=1"),
+        (4, 4, 2, False, "insufficient", "Invalid: DP=0.5, TP=4, PP=1, CP=2"),
+        # Invalid cases - not divisible (DP not integer)
+        (10, 4, 2, False, "divisible", "Invalid: DP=1.25, TP=4, PP=1, CP=2"),
+        (9, 8, 1, False, "divisible", "Invalid: DP=1.125, TP=8, PP=1, CP=1"),
+        (6, 4, 1, False, "divisible", "Invalid: DP=1.5, TP=4, PP=1, CP=1"),
+    ],
+)
+@patch("nemo_rl.models.policy.lm_policy.RayWorkerGroup")
+def test_world_size_validation_dtensor(
+    mock_ray_worker_group,
+    tiny_llama_model_path,
+    world_size,
+    tp,
+    cp,
+    should_pass,
+    expected_error_type,
+    description,
+):
+    """Test world_size validation with DTensor backend.
+
+    Note: DTensor backend always uses PP=1 (no pipeline parallelism support).
+    Tests the constraint: world_size = DP * PP * CP * TP where DP >= 1 and DP must be integer.
+    """
+    cluster = create_mock_cluster(world_size)
+    tokenizer = create_mock_tokenizer()
+    config = create_dtensor_config(
+        tiny_llama_model_path, tp, pp=1, cp=cp
+    )  # DTensor always has PP=1
+
+    # Mock RayWorkerGroup to prevent actual worker creation
+    mock_worker_group_instance = MagicMock()
+    mock_ray_worker_group.return_value = mock_worker_group_instance
+
+    if should_pass:
+        # Should succeed without raising an exception
+        try:
+            policy = Policy(cluster=cluster, config=config, tokenizer=tokenizer)
+            # Verify the calculated DP makes sense
+            expected_dp = world_size // (1 * cp * tp)  # PP=1 for DTensor
+            assert expected_dp >= 1, f"Expected DP should be >= 1, got {expected_dp}"
+            # Verify that worker group was created (validation passed)
+            mock_ray_worker_group.assert_called_once()
+        except Exception as e:
+            pytest.fail(f"Expected success for {description}, but got error: {e}")
+    else:
+        # Should raise ValueError with specific error type
+        with pytest.raises(ValueError) as exc_info:
+            Policy(cluster=cluster, config=config, tokenizer=tokenizer)
+
+        error_msg = str(exc_info.value)
+        if expected_error_type == "insufficient":
+            assert "insufficient" in error_msg, (
+                f"Expected 'insufficient' error for {description}"
+            )
+            assert "DP must be ≥ 1" in error_msg, (
+                f"Expected DP constraint message for {description}"
+            )
+        elif expected_error_type == "divisible":
+            assert "must be divisible" in error_msg, (
+                f"Expected 'divisible' error for {description}"
+            )
+            assert "not an integer" in error_msg, (
+                f"Expected integer constraint message for {description}"
+            )
+        # For failing cases, worker group should not be created
+        mock_ray_worker_group.assert_not_called()
+
+
+@pytest.mark.parametrize(
+    "world_size,tp,pp,cp,should_pass,expected_error_type,description",
+    [
+        # Valid cases - Megatron backend (supports PP > 1)
+        (
+            32,
+            8,
+            4,
+            1,
+            True,
+            None,
+            "Valid: DP=1, TP=8, PP=4, CP=1 (original error case fixed)",
+        ),
+        (64, 8, 4, 1, True, None, "Valid: DP=2, TP=8, PP=4, CP=1"),
+        (16, 4, 2, 2, True, None, "Valid: DP=1, TP=4, PP=2, CP=2"),
+        # Invalid cases - insufficient world_size (DP < 1)
+        (
+            8,
+            8,
+            4,
+            1,
+            False,
+            "insufficient",
+            "Invalid: DP=0.25, TP=8, PP=4, CP=1 (original error)",
+        ),
+        (16, 8, 4, 1, False, "insufficient", "Invalid: DP=0.5, TP=8, PP=4, CP=1"),
+        # Invalid cases - not divisible (DP not integer)
+        (33, 8, 4, 1, False, "divisible", "Invalid: DP=1.03, TP=8, PP=4, CP=1"),
+        (18, 4, 2, 2, False, "divisible", "Invalid: DP=1.125, TP=4, PP=2, CP=2"),
+    ],
+)
+@patch("nemo_rl.models.policy.lm_policy.RayWorkerGroup")
+def test_world_size_validation_megatron(
+    mock_ray_worker_group,
+    tiny_llama_model_path,
+    world_size,
+    tp,
+    pp,
+    cp,
+    should_pass,
+    expected_error_type,
+    description,
+):
+    """Test world_size validation with Megatron backend.
+
+    Megatron backend supports pipeline parallelism (PP > 1) unlike DTensor.
+    Tests the constraint: world_size = DP * PP * CP * TP where DP >= 1 and DP must be integer.
+    Note: Expert Parallelism (EP) is handled internally by Megatron-Core, not at the worker level.
+    """
+    cluster = create_mock_cluster(world_size)
+    tokenizer = create_mock_tokenizer()
+    config = create_megatron_config(tiny_llama_model_path, tp, pp, cp)
+
+    # Mock RayWorkerGroup to prevent actual worker creation
+    mock_worker_group_instance = MagicMock()
+    mock_ray_worker_group.return_value = mock_worker_group_instance
+
+    if should_pass:
+        # Should succeed without raising an exception
+        try:
+            policy = Policy(cluster=cluster, config=config, tokenizer=tokenizer)
+            # Verify the calculated DP makes sense
+            expected_dp = world_size // (pp * cp * tp)
+            assert expected_dp >= 1, f"Expected DP should be >= 1, got {expected_dp}"
+            # Verify that worker group was created (validation passed)
+            mock_ray_worker_group.assert_called_once()
+        except Exception as e:
+            pytest.fail(f"Expected success for {description}, but got error: {e}")
+    else:
+        # Should raise ValueError with specific error type
+        with pytest.raises(ValueError) as exc_info:
+            Policy(cluster=cluster, config=config, tokenizer=tokenizer)
+
+        error_msg = str(exc_info.value)
+        if expected_error_type == "insufficient":
+            assert "insufficient" in error_msg, (
+                f"Expected 'insufficient' error for {description}"
+            )
+            assert "DP must be ≥ 1" in error_msg, (
+                f"Expected DP constraint message for {description}"
+            )
+        elif expected_error_type == "divisible":
+            assert "must be divisible" in error_msg, (
+                f"Expected 'divisible' error for {description}"
+            )
+            assert "not an integer" in error_msg, (
+                f"Expected integer constraint message for {description}"
+            )
+        # For failing cases, worker group should not be created
+        mock_ray_worker_group.assert_not_called()
diff --git a/tests/unit/models/policy/test_utils.py b/tests/unit/models/policy/test_utils.py
index 5712985cd3..0b90ab0fbf 100644
--- a/tests/unit/models/policy/test_utils.py
+++ b/tests/unit/models/policy/test_utils.py
@@ -12,153 +12,26 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import multiprocessing
 import os
+import sys
+import time
+import traceback
 import unittest.mock
-from unittest.mock import MagicMock, patch
+
+import pytest
+import torch
+import zmq
 
 from nemo_rl.models.policy.utils import (
-    configure_expandable_segments,
+    IPCProtocol,
+    calculate_aligned_size,
     get_megatron_checkpoint_dir,
+    rebuild_cuda_tensor_from_ipc,
+    stream_weights_via_ipc_zmq_impl,
 )
 
 
-class TestConfigureExpandableSegments(unittest.TestCase):
-    """Test cases for configure_expandable_segments function."""
-
-    def setUp(self):
-        """Set up test environment."""
-        # Store original environment variable
-        self.original_pytorch_cuda_alloc_conf = os.environ.get(
-            "PYTORCH_CUDA_ALLOC_CONF"
-        )
-
-    def tearDown(self):
-        """Clean up after tests."""
-        # Restore original environment variable
-        if self.original_pytorch_cuda_alloc_conf is not None:
-            os.environ["PYTORCH_CUDA_ALLOC_CONF"] = (
-                self.original_pytorch_cuda_alloc_conf
-            )
-        elif "PYTORCH_CUDA_ALLOC_CONF" in os.environ:
-            del os.environ["PYTORCH_CUDA_ALLOC_CONF"]
-
-    @patch("torch.cuda.get_device_properties")
-    def test_hopper_gpu_no_existing_config(self, mock_get_device_properties):
-        """Test Hopper+ GPU (compute capability >= 9) with no existing PYTORCH_CUDA_ALLOC_CONF."""
-        # Mock GPU properties for Hopper+ architecture
-        mock_device_properties = MagicMock()
-        mock_device_properties.major = 9
-        mock_get_device_properties.return_value = mock_device_properties
-
-        # Ensure no existing config
-        if "PYTORCH_CUDA_ALLOC_CONF" in os.environ:
-            del os.environ["PYTORCH_CUDA_ALLOC_CONF"]
-
-        # Call the function
-        configure_expandable_segments()
-
-        # Verify the environment variable was set correctly
-        self.assertEqual(
-            os.environ["PYTORCH_CUDA_ALLOC_CONF"], "expandable_segments:True"
-        )
-
-    @patch("torch.cuda.get_device_properties")
-    def test_hopper_gpu_with_existing_config(self, mock_get_device_properties):
-        """Test Hopper+ GPU with existing PYTORCH_CUDA_ALLOC_CONF."""
-        # Mock GPU properties for Hopper+ architecture
-        mock_device_properties = MagicMock()
-        mock_device_properties.major = 9
-        mock_get_device_properties.return_value = mock_device_properties
-
-        # Set existing config
-        existing_config = "max_split_size_mb:128"
-        os.environ["PYTORCH_CUDA_ALLOC_CONF"] = existing_config
-
-        # Call the function
-        configure_expandable_segments()
-
-        # Verify the environment variable was updated correctly
-        expected_config = f"{existing_config},expandable_segments:True"
-        self.assertEqual(os.environ["PYTORCH_CUDA_ALLOC_CONF"], expected_config)
-
-    @patch("torch.cuda.get_device_properties")
-    def test_hopper_gpu_already_configured(self, mock_get_device_properties):
-        """Test Hopper+ GPU with existing config that already has expandable_segments."""
-        # Mock GPU properties for Hopper+ architecture
-        mock_device_properties = MagicMock()
-        mock_device_properties.major = 9
-        mock_get_device_properties.return_value = mock_device_properties
-
-        # Set existing config with expandable_segments already present
-        existing_config = "max_split_size_mb:128,expandable_segments:False"
-        os.environ["PYTORCH_CUDA_ALLOC_CONF"] = existing_config
-
-        # Call the function
-        configure_expandable_segments()
-
-        # Verify the environment variable was not changed
-        self.assertEqual(os.environ["PYTORCH_CUDA_ALLOC_CONF"], existing_config)
-
-    @patch("torch.cuda.get_device_properties")
-    def test_ampere_gpu_no_config_change(self, mock_get_device_properties):
-        """Test Ampere GPU (compute capability < 9) should not modify config."""
-        # Mock GPU properties for Ampere architecture
-        mock_device_properties = MagicMock()
-        mock_device_properties.major = 8  # Ampere
-        mock_get_device_properties.return_value = mock_device_properties
-
-        # Set existing config
-        existing_config = "max_split_size_mb:128"
-        os.environ["PYTORCH_CUDA_ALLOC_CONF"] = existing_config
-
-        # Call the function
-        configure_expandable_segments()
-
-        # Verify the environment variable was not changed
-        self.assertEqual(os.environ["PYTORCH_CUDA_ALLOC_CONF"], existing_config)
-
-    @patch("torch.cuda.get_device_properties")
-    def test_ampere_gpu_no_existing_config(self, mock_get_device_properties):
-        """Test Ampere GPU with no existing config should not set anything."""
-        # Mock GPU properties for Ampere architecture
-        mock_device_properties = MagicMock()
-        mock_device_properties.major = 8  # Ampere
-        mock_get_device_properties.return_value = mock_device_properties
-
-        # Ensure no existing config
-        if "PYTORCH_CUDA_ALLOC_CONF" in os.environ:
-            del os.environ["PYTORCH_CUDA_ALLOC_CONF"]
-
-        # Call the function
-        configure_expandable_segments()
-
-        # Verify the environment variable was not set
-        self.assertNotIn("PYTORCH_CUDA_ALLOC_CONF", os.environ)
-
-    @patch("torch.cuda.get_device_properties")
-    def test_ampere_gpu_with_expandable_segments_true_raises_error(
-        self, mock_get_device_properties
-    ):
-        """Test Ampere GPU with expandable_segments:True in config raises RuntimeError."""
-        # Mock GPU properties for Ampere architecture
-        mock_device_properties = MagicMock()
-        mock_device_properties.major = 8  # Ampere
-        mock_get_device_properties.return_value = mock_device_properties
-
-        # Set config with expandable_segments:True
-        os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
-
-        # Call the function and expect RuntimeError
-        with self.assertRaises(RuntimeError) as context:
-            configure_expandable_segments()
-
-        # Verify the error message
-        self.assertIn("expandable_segments is enabled", str(context.exception))
-        self.assertIn(
-            "not supported on architectures older than Hopper", str(context.exception)
-        )
-
-
 class TestGetMegatronCheckpointDir:
     """Test cases for the get_megatron_checkpoint_dir function."""
 
@@ -245,3 +118,228 @@ def test_function_prints_selected_directory(self, capsys):
                 f"Using default megatron checkpoint dir: {expected_dir}" in captured.out
             )
             assert result == expected_dir
+
+
+def server_process(
+    zmq_addr: str,
+    known_tensors: list[tuple[str, torch.Tensor]],
+    buffer_size_bytes: int,
+    ready_queue: multiprocessing.Queue,
+) -> None:
+    """Server process that streams tensors via IPC ZMQ."""
+    try:
+        device = torch.device("cuda:0")
+        gpu_tensors = [(name, tensor.to(device)) for name, tensor in known_tensors]
+
+        context = zmq.Context()
+        socket = context.socket(zmq.PAIR)
+        socket.setsockopt(zmq.LINGER, 0)  # Close immediately on error
+        socket.setsockopt(zmq.RCVTIMEO, 10000)  # 10 second timeout
+        socket.bind(zmq_addr)
+        ready_queue.put(("ready", None))
+
+        stream_weights_via_ipc_zmq_impl(
+            (t for t in gpu_tensors),
+            buffer_size_bytes,
+            socket,
+            rank=0,
+            worker_name="test_server",
+        )
+    except Exception as e:
+        import sys
+        import traceback
+
+        error_details = f"{type(e).__name__}: {str(e)}\n{traceback.format_exc()}"
+        ready_queue.put(("error", error_details))
+        sys.exit(
+            1
+        )  # Exit with non-zero code so check_process_error detects the failure
+    finally:
+        socket.close()
+        context.term()
+
+
+def client_process(
+    zmq_addr: str,
+    known_tensors_data: list[tuple[str, tuple, torch.dtype, torch.Tensor]],
+    result_queue: multiprocessing.Queue,
+) -> None:
+    """Client process that receives and validates tensors via IPC ZMQ."""
+    try:
+        device = torch.device("cuda:0")
+
+        # Prepare expected tensors on GPU
+        expected_tensors = {
+            name: tensor.to(device) for name, _, _, tensor in known_tensors_data
+        }
+        state_dict_info = {
+            name: (shape, dtype) for name, shape, dtype, _ in known_tensors_data
+        }
+
+        context = zmq.Context()
+        socket = context.socket(zmq.PAIR)
+        socket.setsockopt(zmq.LINGER, 0)  # Close immediately on error
+        socket.setsockopt(zmq.RCVTIMEO, 10000)  # 10 second timeout
+        socket.connect(zmq_addr)
+
+        # Receive and validate loop
+        while True:
+            payload = socket.recv_pyobj()
+            if payload == IPCProtocol.COMPLETE:
+                socket.send(IPCProtocol.ACK.value.encode())
+                break
+
+            ipc_handle, list_keys, used_bytes = payload
+            buffer = rebuild_cuda_tensor_from_ipc(ipc_handle, device.index)
+
+            offset = 0
+            for key in list_keys:
+                shape, dtype = state_dict_info[key]
+                shape = torch.Size(shape) if isinstance(shape, list) else shape
+                size_in_bytes = dtype.itemsize * shape.numel()
+
+                tensor = (
+                    buffer[offset : offset + size_in_bytes]
+                    .view(dtype=dtype)
+                    .view(shape)
+                )
+                expected = expected_tensors[key]
+
+                # Validate tensor
+                assert tensor.shape == expected.shape, f"Shape mismatch for {key}"
+                assert tensor.dtype == expected.dtype, f"Dtype mismatch for {key}"
+                assert torch.allclose(tensor, expected, rtol=1e-7, atol=1e-7), (
+                    f"Values mismatch for {key}"
+                )
+
+                offset += calculate_aligned_size(size_in_bytes)
+
+            assert offset == used_bytes, f"Offset mismatch: {offset} != {used_bytes}"
+            socket.send(b"")
+
+        result_queue.put(("success", "All tensors validated"))
+    except Exception as e:
+        error_details = f"{type(e).__name__}: {str(e)}\n{traceback.format_exc()}"
+        result_queue.put(("error", error_details))
+        sys.exit(1)
+    finally:
+        socket.close()
+        context.term()
+
+
+def check_process_error(
+    proc: multiprocessing.Process,
+    queue: multiprocessing.Queue,
+    process_name: str,
+) -> None:
+    """Check if a process failed and assert with detailed error message if available."""
+    if proc.exitcode == 0:
+        return
+
+    # Get error details from queue
+    error_msg = None
+    while not queue.empty():
+        status, msg = queue.get_nowait()
+        if status == "error":
+            error_msg = msg
+            break
+
+    if proc.exitcode is None:
+        assert False, f"{process_name} timed out"
+    else:
+        details = f"\n{error_msg}" if error_msg else ""
+        assert False, f"{process_name} failed (exitcode={proc.exitcode}){details}"
+
+
+class TestStreamWeightsViaIPC:
+    """Test suite for IPC weight streaming functionality."""
+
+    TIMEOUT = 30  # 30 second timeout for additional overhead when running with coverage
+
+    @pytest.mark.parametrize(
+        "test_case,tensor_specs,buffer_size_bytes,test_description",
+        [
+            (
+                "large_buffer",
+                [
+                    ("tensor_1", (10, 20), torch.float32),  # 0.78KB
+                    ("tensor_2", (5, 15, 25), torch.float32),  # 7.32KB
+                    ("tensor_3", (100,), torch.float16),  # 0.20KB
+                    ("tensor_4", (50, 50), torch.bfloat16),  # 4.88KB
+                    ("tensor_5", (8, 16, 32), torch.float32),  # 16.00KB
+                ],  # Total: 29.18KB
+                100 * 1024,  # 100 KB - large buffer for single batch (50KB per side)
+                "Test with various shapes/dtypes in large buffer (single batch)",
+            ),
+            (
+                "small_buffer",
+                [
+                    ("small_1", (30, 30), torch.float32),  # 3.52KB
+                    ("small_2", (20, 40), torch.float16),  # 1.56KB
+                    ("small_3", (128,), torch.float32),  # 0.50KB
+                    ("small_4", (25, 35), torch.float32),  # 3.42KB
+                ],  # Total: 9.00KB
+                10 * 1024,  # 10 KB - forces multiple batches (5KB per side)
+                "Test with small buffer forcing multiple batches",
+            ),
+        ],
+    )
+    def test_stream_weights_via_ipc_zmq_impl(
+        self, test_case, tensor_specs, buffer_size_bytes, test_description
+    ):
+        """Test streaming weights via IPC ZMQ between server and client processes."""
+        # Generate test tensors
+        known_tensors = [
+            (name, torch.randn(*shape, dtype=dtype))
+            for name, shape, dtype in tensor_specs
+        ]
+        known_tensors_data = [
+            (name, list(t.shape), t.dtype, t) for name, t in known_tensors
+        ]
+
+        # Create unique socket path and queues
+        socket_path = f"/tmp/test_ipc_zmq_{test_case}_{os.getpid()}_{time.time()}"
+        zmq_addr = f"ipc://{socket_path}"
+
+        mp_context = multiprocessing.get_context("spawn")
+        ready_queue = mp_context.Queue()
+        result_queue = mp_context.Queue()
+
+        # Start server and client
+        server_proc = mp_context.Process(
+            target=server_process,
+            args=(zmq_addr, known_tensors, buffer_size_bytes, ready_queue),
+        )
+        server_proc.start()
+
+        status, msg = ready_queue.get(timeout=self.TIMEOUT)
+        assert status == "ready", f"Server failed: {msg}"
+
+        client_proc = mp_context.Process(
+            target=client_process,
+            args=(zmq_addr, known_tensors_data, result_queue),
+        )
+        client_proc.start()
+
+        # Wait and validate
+        try:
+            server_proc.join(timeout=self.TIMEOUT)
+            client_proc.join(timeout=self.TIMEOUT)
+
+            # Check client first since client failure often causes server to fail
+            check_process_error(client_proc, result_queue, "Client")
+            check_process_error(server_proc, ready_queue, "Server")
+
+            # Verify client success message
+            status, msg = result_queue.get(timeout=self.TIMEOUT)
+            assert status == "success", f"Validation failed: {msg}"
+        finally:
+            for proc in [server_proc, client_proc]:
+                if proc and proc.is_alive():
+                    proc.terminate()
+                    proc.join(timeout=self.TIMEOUT)
+                    if proc.is_alive():
+                        proc.kill()
+
+            if os.path.exists(socket_path):
+                os.unlink(socket_path)
diff --git a/tests/unit/prepare_unit_test_assets.py b/tests/unit/prepare_unit_test_assets.py
new file mode 100644
index 0000000000..6cb8344c55
--- /dev/null
+++ b/tests/unit/prepare_unit_test_assets.py
@@ -0,0 +1,98 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This script exists to help load any unit asset that requires special handling.
+
+The initial reason for this was to help with Nemotron-H which has a requirement
+to have mamaba-ssm in the base environment in order to initialize a dummy model. Since
+the unit tests should be runable with the base environment (without mamba-ssm),
+we use ray.remotes to build the asset here. We do this outside of a fixture
+like the other test assets because this one sometimes takes a while to build. This
+extra setup time can sometimes cause timeouts in the unit tests if unlucky.
+"""
+
+import os
+
+import ray
+
+from nemo_rl.distributed.virtual_cluster import PY_EXECUTABLES
+from nemo_rl.utils.venvs import create_local_venv
+
+TESTS_DIR = os.path.dirname(os.path.abspath(__file__))
+TEST_ASSETS_DIR = os.path.join(TESTS_DIR, "test_assets")
+
+
+def build_tiny_nemotron5_h_checkpoint(model_path: str) -> None:
+    import shutil
+
+    from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
+
+    config = AutoConfig.from_pretrained(
+        "nvidia/Nemotron-H-8B-Base-8K", trust_remote_code=True
+    )
+    config.hybrid_override_pattern = "M*-"
+    config.num_hidden_layers = 3
+    config.intermediate_size = 32
+    config.hidden_size = 256
+    config.num_attention_heads = 8
+    config.mamba_num_heads = 8
+    config.num_key_value_heads = 8
+    config.n_groups = 1
+
+    model = AutoModelForCausalLM.from_config(config, trust_remote_code=True)
+    tokenizer = AutoTokenizer.from_pretrained(
+        "nvidia/Nemotron-H-8B-Base-8K", trust_remote_code=True
+    )
+
+    shutil.rmtree(model_path, ignore_errors=True)
+    model.save_pretrained(model_path)
+    tokenizer.save_pretrained(model_path)
+    print(f"✓ Built tiny Nemotron-H asset at: {model_path}")
+
+
+def main() -> None:
+    os.makedirs(TEST_ASSETS_DIR, exist_ok=True)
+
+    target = os.path.join(TEST_ASSETS_DIR, "tiny_nemotron5_h_with_nemotron_tokenizer")
+
+    # Create Automodel env venv
+    automodel_python = create_local_venv(
+        py_executable=PY_EXECUTABLES.AUTOMODEL, venv_name="automodel_env"
+    )
+
+    ############################################################################
+    # Add other remote calls here
+    ############################################################################
+    # Submit as list of remote calls and wait individually
+    remote_calls = [
+        ray.remote(build_tiny_nemotron5_h_checkpoint)
+        .options(
+            num_gpus=0.01,  # tiny reservation to satisfy CUDA-inspecting deps
+            runtime_env={"py_executable": automodel_python},
+            name="build-nemotron5h",
+        )
+        .remote(target)
+    ]
+
+    for obj_ref in remote_calls:
+        ray.get(obj_ref)
+
+
+if __name__ == "__main__":
+    if not ray.is_initialized():
+        ray.init(ignore_reinit_error=True, include_dashboard=False)
+    try:
+        main()
+    finally:
+        ray.shutdown()
diff --git a/tests/unit/rewards/test_rewards.py b/tests/unit/rewards/test_rewards.py
new file mode 100644
index 0000000000..38ea52d74e
--- /dev/null
+++ b/tests/unit/rewards/test_rewards.py
@@ -0,0 +1,259 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from numpy.testing import assert_allclose
+
+from nemo_rl.environments.rewards import (
+    bbox_giou_reward,
+    combine_reward_functions,
+    exact_answer_alphanumeric_reward,
+    format_reward,
+    math_expression_reward,
+)
+
+
+def test_math_expression_reward():
+    # Test correct math expression
+    ground_truth = "2x + 3"
+    response = "Let me solve for y ... <think>5x + 5 = 3x + 2 + y \implies y = 2x + 3</think> <answer>2x + 3</answer>"
+    reward, is_correct = math_expression_reward(ground_truth, response)
+    assert_allclose(reward, 1.0, atol=1e-6)
+    assert is_correct is True
+
+    # Test incorrect math expression
+    response = "Let me solve this... <think>I'm a dumb LLM so I have no reasoning trace to actuallysolve this</think> <answer>3x + 2</answer>"
+    reward, is_correct = math_expression_reward(ground_truth, response)
+    assert_allclose(reward, 0.0, atol=1e-6)
+    assert is_correct is False
+
+    # Test for missing answer tags
+    response = "Let me solve this... The answer is 2x + 3"
+    reward, is_correct = math_expression_reward(ground_truth, response)
+    assert_allclose(reward, 0.0, atol=1e-6)
+    assert is_correct is False
+
+
+def test_format_reward():
+    ground_truth = "any_ground_truth"  # Format reward doesn't use ground truth
+
+    # Test complete format
+    response = "<think>My thinking</think> <answer>My answer</answer>"
+    reward, is_correct = format_reward(ground_truth, response)
+    assert reward == 1.0
+    assert is_correct is None
+
+    # Test only think tags
+    response = "<think>My thinking</think>"
+    reward, is_correct = format_reward(ground_truth, response)
+    assert reward == 0.25
+    assert is_correct is None
+
+    # Test only answer tags
+    response = "<answer>My answer</answer>"
+    reward, is_correct = format_reward(ground_truth, response)
+    assert reward == 0.75
+    assert is_correct is None
+
+    # Test no tags
+    response = "Just plain text"
+    reward, is_correct = format_reward(ground_truth, response)
+    assert reward == 0.0
+    assert is_correct is None
+
+
+def test_format_reward_custom_tags():
+    ground_truth = "does_not_matter"
+
+    # Both tags in response and reward function match
+    response = "<think_trace>Reasoning here</think_trace> <solution>42</solution>"
+    reward, is_correct = format_reward(
+        ground_truth, response, think_tag="think_trace", answer_tag="solution"
+    )
+    assert reward == 1.0
+    assert is_correct is None
+
+    # Only think tag present, tags match
+    response = "<think_trace>Reasoning here</think_trace>"
+    reward, is_correct = format_reward(
+        ground_truth, response, think_tag="think_trace", answer_tag="solution"
+    )
+    assert reward == 0.25
+    assert is_correct is None
+
+    # Only answer tag present, tags match
+    response = "<solution>42</solution>"
+    reward, is_correct = format_reward(
+        ground_truth, response, think_tag="think_trace", answer_tag="solution"
+    )
+    assert reward == 0.75
+    assert is_correct is None
+
+    # Neither tag present, tags match
+    response = "No tags here"
+    reward, is_correct = format_reward(
+        ground_truth, response, think_tag="think_trace", answer_tag="solution"
+    )
+    assert reward == 0.0
+    assert is_correct is None
+
+    # Tags in response do not match those in reward function (should yield 0.0)
+    response = "<think>Reasoning here</think> <answer>42</answer>"
+    reward, is_correct = format_reward(
+        ground_truth, response, think_tag="think_trace", answer_tag="solution"
+    )
+    assert reward == 0.0
+    assert is_correct is None
+
+    # Mixed: one tag matches, one does not (should yield 0.25 for think_trace, 0 for solution)
+    response = "<think_trace>Reasoning here</think_trace> <answer>42</answer>"
+    reward, is_correct = format_reward(
+        ground_truth, response, think_tag="think_trace", answer_tag="solution"
+    )
+    assert reward == 0.25
+    assert is_correct is None
+
+    # Mixed: one tag matches, one does not (should yield 0.75 for solution, 0 for think_trace)
+    response = "<think>Reasoning here</think> <solution>42</solution>"
+    reward, is_correct = format_reward(
+        ground_truth, response, think_tag="think_trace", answer_tag="solution"
+    )
+    assert reward == 0.75
+    assert is_correct is None
+
+
+def test_exact_answer_alphanumeric_reward():
+    ground_truth = "Hello123"
+
+    # Test exact match
+    response = "<answer>Hello123</answer>"
+    reward, is_correct = exact_answer_alphanumeric_reward(ground_truth, response)
+    assert_allclose(reward, 1.0, atol=1e-6)
+    assert is_correct is True
+
+    # Test case insensitive match
+    response = "<answer>HELLO123</answer>"
+    reward, is_correct = exact_answer_alphanumeric_reward(ground_truth, response)
+    assert_allclose(reward, 1.0, atol=1e-6)
+    assert is_correct is True
+
+    # Test with special characters
+    response = "<answer>Hello-123!</answer>"
+    reward, is_correct = exact_answer_alphanumeric_reward(ground_truth, response)
+    assert_allclose(reward, 1.0, atol=1e-6)
+    assert is_correct is True
+
+    # Test incorrect answer
+    response = "<answer>Hello124</answer>"
+    reward, is_correct = exact_answer_alphanumeric_reward(ground_truth, response)
+    assert_allclose(reward, 0.0, atol=1e-6)
+    assert is_correct is False
+
+
+def test_bbox_giou_reward():
+    ground_truth = "[0.1, 0.1, 0.5, 0.5]"
+
+    # Test perfect match
+    response = "<answer>[0.1, 0.1, 0.5, 0.5]</answer>"
+    reward, is_correct = bbox_giou_reward(ground_truth, response)
+    print(f"reward: {reward}, is_correct: {is_correct}")
+    assert_allclose(reward, 1.0, atol=1e-6)
+    assert is_correct is True
+
+    # Test partial overlap
+    response = "<answer>[0.2, 0.2, 0.6, 0.6]</answer>"
+    reward, is_correct = bbox_giou_reward(ground_truth, response)
+    print(f"reward: {reward}, is_correct: {is_correct}")
+    assert 0 < reward < 1.0
+    assert is_correct is False
+
+    # Test no overlap
+    response = "<answer>[0.6, 0.6, 0.9, 0.9]</answer>"
+    reward, is_correct = bbox_giou_reward(ground_truth, response)
+    print(f"reward: {reward}, is_correct: {is_correct}")
+    assert reward < 0.0  # GIoU can be negative when boxes don't overlap
+    assert is_correct is False
+
+    # test bad bounding box format (5 numbers)
+    response = "<answer>[0.6, 0.6, 0.9, 0.9, 0.1]</answer>"
+    reward, is_correct = bbox_giou_reward(ground_truth, response)
+    print(f"reward: {reward}, is_correct: {is_correct}")
+    assert_allclose(reward, 0.0, atol=1e-6)
+    assert is_correct is False
+
+    # Test invalid format
+    response = "<answer>invalid bbox format</answer>"
+    reward, is_correct = bbox_giou_reward(ground_truth, response)
+    print(f"reward: {reward}, is_correct: {is_correct}")
+    assert_allclose(reward, 0.0, atol=1e-6)
+    assert is_correct is False
+
+
+def test_exact_answer_alphanumeric_reward_combined():
+    # Define test cases
+    ground_truth = "test123"
+    good_response = "<think>thinking</think> <answer>test123</answer>"
+    bad_response = "<think>thinking</think> <answer>wrong</answer>"
+    incorrect_format_response = "here is a bbox: [0.1, 0.1, 0.5, 0.5] without any tags"
+
+    # Create reward function combinations with weights
+    reward_functions = [(format_reward, 0.3), (exact_answer_alphanumeric_reward, 0.7)]
+    combined_reward = combine_reward_functions(reward_functions)
+
+    # Test good response
+    reward, is_correct = combined_reward(ground_truth, good_response)
+    assert_allclose(reward, 1.0, atol=1e-6)
+    assert is_correct is True
+
+    # Test bad response
+    reward, is_correct = combined_reward(ground_truth, bad_response)
+    assert_allclose(reward, 0.3, atol=1e-6)
+    assert is_correct is False
+
+    # test bad format
+    reward, is_correct = combined_reward(ground_truth, incorrect_format_response)
+    assert_allclose(reward, 0.0, atol=1e-6)
+    assert is_correct is False
+
+
+def test_bbox_giou_reward_combined():
+    # Test combining all reward functions
+    ground_truth_bbox = "[0.1, 0.1, 0.5, 0.5]"
+    good_response = "<think>The bounding box coordinates are [0.1, 0.1, 0.5, 0.5]</think> <answer>[0.1, 0.1, 0.5, 0.5]</answer>"
+    no_think_response = "<answer>[0.1, 0.1, 0.5, 0.5]</answer>"
+    no_answer_response = "<think>thinking</think>"
+    no_think_no_answer_response = (
+        "here is a bbox: [0.1, 0.1, 0.5, 0.5] without any tags"
+    )
+
+    reward_functions = [(format_reward, 0.2), (bbox_giou_reward, 0.8)]
+
+    combined_reward = combine_reward_functions(reward_functions)
+
+    # Test perfect response
+    reward, is_correct = combined_reward(ground_truth_bbox, good_response)
+    assert_allclose(reward, 1.0, atol=1e-6)
+    assert is_correct is True
+
+    # Test partially correct response (correct format, wrong bbox)
+    reward, is_correct = combined_reward(ground_truth_bbox, no_think_response)
+    assert_allclose(reward, 0.75 * 0.2 + 0.8, atol=1e-6)
+    assert is_correct is True
+
+    reward, is_correct = combined_reward(ground_truth_bbox, no_answer_response)
+    assert_allclose(reward, 0.2 * 0.25, atol=1e-6)
+    assert is_correct is False
+
+    reward, is_correct = combined_reward(ground_truth_bbox, no_think_no_answer_response)
+    assert_allclose(reward, 0.0, atol=1e-6)
+    assert is_correct is False
diff --git a/tests/unit/test_check_metrics.py b/tests/unit/test_check_metrics.py
new file mode 100644
index 0000000000..313801531e
--- /dev/null
+++ b/tests/unit/test_check_metrics.py
@@ -0,0 +1,407 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+from pathlib import Path
+
+import pytest
+
+# Add the tests directory to the path so we can import check_metrics
+tests_dir = Path(__file__).parent.parent
+sys.path.insert(0, str(tests_dir))
+
+from check_metrics import evaluate_check, max, mean, min, ratio_above
+
+
+class TestMeanFunction:
+    """Test the mean function with various scenarios."""
+
+    def test_basic_mean(self):
+        """Test basic mean calculation without outlier filtering."""
+        data = {"1": 1.0, "2": 2.0, "3": 3.0, "4": 4.0, "5": 5.0}
+        result = mean(data)
+        assert result == 3.0
+
+    def test_mean_with_ignore_top_p(self):
+        """Test mean with ignore_top_p to filter outliers."""
+        # Data with one clear outlier (100)
+        data = {"1": 1.0, "2": 2.0, "3": 3.0, "4": 4.0, "5": 100.0}
+
+        # Without filtering
+        result_no_filter = mean(data, ignore_top_p=0.0)
+        assert result_no_filter == 22.0  # (1+2+3+4+100)/5
+
+        # With 20% filtering (should remove the top value: 100)
+        result_with_filter = mean(data, ignore_top_p=0.2)
+        assert result_with_filter == 2.5  # (1+2+3+4)/4
+
+    def test_mean_ignore_top_5_percent(self):
+        """Test mean with 5% outlier filtering."""
+        # Create data with 20 values where the top one is an outlier
+        data = {str(i): float(i) for i in range(1, 20)}  # 1-19
+        data["20"] = 1000.0  # outlier
+
+        # With 5% filtering (should remove 1 value out of 20 = top 5%)
+        result = mean(data, ignore_top_p=0.05)
+        # Should be mean of 1-19 = 10.0
+        assert result == 10.0
+
+    def test_mean_ignore_multiple_outliers(self):
+        """Test mean with filtering multiple outliers."""
+        data = {str(i): float(i) for i in range(1, 11)}  # 1-10
+
+        # With 20% filtering (should remove top 2 values: 9, 10)
+        result = mean(data, ignore_top_p=0.2)
+        # Mean of 1-8 = 4.5
+        assert result == 4.5
+
+    def test_mean_with_range_and_ignore_top_p(self):
+        """Test that range_start and range_end work with ignore_top_p."""
+        data = {str(i): float(i) for i in range(1, 11)}
+
+        # Get mean of steps 3-7 (values 3,4,5,6) with 25% filtering
+        # Should remove the top value (6), leaving 3,4,5
+        result = mean(data, range_start=3, range_end=7, ignore_top_p=0.25)
+        assert result == 4.0  # (3+4+5)/3
+
+    def test_mean_ignore_top_p_edge_case_all_same(self):
+        """Test with all same values (no outliers)."""
+        data = {str(i): 5.0 for i in range(1, 11)}
+        result = mean(data, ignore_top_p=0.1)
+        assert result == 5.0
+
+    def test_mean_ignore_top_p_edge_case_single_value(self):
+        """Test with single value."""
+        data = {"1": 42.0}
+        result = mean(data, ignore_top_p=0.5)
+        # Should keep at least one value
+        assert result == 42.0
+
+    def test_mean_ignore_top_p_edge_case_two_values(self):
+        """Test with two values."""
+        data = {"1": 1.0, "2": 10.0}
+        result = mean(data, ignore_top_p=0.5)
+        # Should remove top 50% (1 value), leaving just 1.0
+        assert result == 1.0
+
+    def test_mean_ignore_top_p_invalid_range(self):
+        """Test that invalid ignore_top_p values raise an error."""
+        data = {"1": 1.0, "2": 2.0, "3": 3.0}
+
+        with pytest.raises(
+            ValueError, match="ignore_top_p must be between 0.0 and 1.0"
+        ):
+            mean(data, ignore_top_p=1.5)
+
+        with pytest.raises(
+            ValueError, match="ignore_top_p must be between 0.0 and 1.0"
+        ):
+            mean(data, ignore_top_p=-0.1)
+
+    def test_mean_with_offset(self):
+        """Test mean calculation with step offset (from checkpoint resume)."""
+        # Simulate a checkpoint resume scenario
+        # Steps 101-105 (resumed from step 100)
+        data = {"101": 1.0, "102": 2.0, "103": 3.0, "104": 4.0, "105": 5.0}
+        result = mean(data)
+        assert result == 3.0
+
+    def test_mean_with_negative_range(self):
+        """Test mean with negative range indices."""
+        data = {str(i): float(i) for i in range(1, 11)}  # 1-10
+
+        # Last 3 values (8, 9, 10)
+        result = mean(data, range_start=-3, range_end=0)
+        assert result == 9.0  # (8+9+10)/3
+
+    def test_mean_with_floats_and_strings(self):
+        """Test that string values are properly converted to floats."""
+        data = {"1": "1.5", "2": "2.5", "3": "3.5"}
+        result = mean(data)
+        assert result == 2.5
+
+
+class TestMinMaxFunctions:
+    """Test the min and max helper functions."""
+
+    def test_min_basic(self):
+        """Test basic min functionality."""
+        data = {"1": 5.0, "2": 2.0, "3": 8.0, "4": 1.0}
+        result = min(data)
+        assert result == 1.0
+
+    def test_max_basic(self):
+        """Test basic max functionality."""
+        data = {"1": 5.0, "2": 2.0, "3": 8.0, "4": 1.0}
+        result = max(data)
+        assert result == 8.0
+
+    def test_min_with_string_values(self):
+        """Test min with string numeric values."""
+        data = {"1": "5.5", "2": "2.2", "3": "8.8"}
+        result = min(data)
+        assert result == 2.2
+
+    def test_max_with_string_values(self):
+        """Test max with string numeric values."""
+        data = {"1": "5.5", "2": "2.2", "3": "8.8"}
+        result = max(data)
+        assert result == 8.8
+
+
+class TestRatioAboveFunction:
+    """Test the ratio_above function."""
+
+    def test_ratio_above_basic(self):
+        """Test basic ratio_above calculation."""
+        data = {"1": 1.0, "2": 2.0, "3": 3.0, "4": 4.0, "5": 5.0}
+        # Values >= 3.0 are: 3.0, 4.0, 5.0 (3 out of 5 = 0.6)
+        result = ratio_above(data, 3.0)
+        assert result == 0.6
+
+    def test_ratio_above_none_above(self):
+        """Test when no values are above threshold."""
+        data = {"1": 1.0, "2": 2.0, "3": 3.0}
+        result = ratio_above(data, 10.0)
+        assert result == 0.0
+
+    def test_ratio_above_all_above(self):
+        """Test when all values are above threshold."""
+        data = {"1": 5.0, "2": 6.0, "3": 7.0}
+        result = ratio_above(data, 4.0)
+        assert result == 1.0
+
+    def test_ratio_above_equal_to_threshold(self):
+        """Test that values equal to threshold are counted (>=)."""
+        data = {"1": 1.0, "2": 2.0, "3": 2.0, "4": 3.0}
+        # Values >= 2.0 are: 2.0, 2.0, 3.0 (3 out of 4 = 0.75)
+        result = ratio_above(data, 2.0)
+        assert result == 0.75
+
+    def test_ratio_above_single_value(self):
+        """Test with single value."""
+        data = {"1": 5.0}
+        result = ratio_above(data, 3.0)
+        assert result == 1.0
+
+        result = ratio_above(data, 10.0)
+        assert result == 0.0
+
+    def test_ratio_above_empty_dict(self):
+        """Test with empty dictionary."""
+        data = {}
+        result = ratio_above(data, 1.0)
+        assert result == 0.0
+
+    def test_ratio_above_with_strings(self):
+        """Test that string values are properly converted."""
+        data = {"1": "1.0", "2": "2.0", "3": "3.0", "4": "4.0", "5": "5.0"}
+        result = ratio_above(data, 3.0)
+        assert result == 0.6
+
+    def test_ratio_above_with_floats(self):
+        """Test with float threshold and values."""
+        data = {"1": 1.05, "2": 1.1, "3": 1.0, "4": 1.2, "5": 0.9}
+        # Values >= 1.05: 1.05, 1.1, 1.2 (3 out of 5 = 0.6)
+        result = ratio_above(data, 1.05)
+        assert result == 0.6
+
+
+class TestEvaluateCheck:
+    """Test the evaluate_check function."""
+
+    def test_evaluate_check_pass(self):
+        """Test a passing check."""
+        data = {"accuracy": {"1": 0.9, "2": 0.95}}
+        passed, message, value = evaluate_check(data, "mean(data['accuracy']) > 0.85")
+        assert passed is True
+        assert "PASS" in message
+        assert value == 0.925
+
+    def test_evaluate_check_fail(self):
+        """Test a failing check."""
+        data = {"accuracy": {"1": 0.7, "2": 0.75}}
+        passed, message, value = evaluate_check(data, "mean(data['accuracy']) > 0.85")
+        assert passed is False
+        assert "FAIL" in message
+        assert value == 0.725
+
+    def test_evaluate_check_with_ignore_top_p(self):
+        """Test evaluate_check with ignore_top_p parameter."""
+        data = {"error": {"1": 1.0, "2": 1.0, "3": 1.0, "4": 1.0, "5": 10.0}}
+
+        # Without filtering, mean would be 2.8, which is > 1.5 (should fail the < check)
+        passed_no_filter, _, value_no_filter = evaluate_check(
+            data, "mean(data['error']) < 1.5"
+        )
+        assert passed_no_filter is False
+        assert value_no_filter == 2.8
+
+        # With 20% filtering, mean should be 1.0, which is < 1.5 (should pass)
+        passed_with_filter, _, value_with_filter = evaluate_check(
+            data, "mean(data['error'], ignore_top_p=0.2) < 1.5"
+        )
+        assert passed_with_filter is True
+        assert value_with_filter == 1.0
+
+    def test_evaluate_check_key_error(self):
+        """Test evaluate_check with missing key."""
+        data = {"accuracy": {"1": 0.9}}
+        passed, message, value = evaluate_check(data, "mean(data['missing']) > 0.5")
+        assert passed is False
+        assert "key not found" in message
+        assert value is None
+
+    def test_evaluate_check_multiple_conditions(self):
+        """Test evaluate_check with complex conditions."""
+        data = {
+            "train_loss": {"1": 0.5, "2": 0.4, "3": 0.3},
+            "val_loss": {"1": 0.6, "2": 0.5, "3": 0.4},
+        }
+
+        # Test less than
+        passed, _, value = evaluate_check(data, "mean(data['train_loss']) < 0.5")
+        assert passed is True
+        assert value == 0.4
+
+        # Test greater than
+        passed, _, value = evaluate_check(data, "mean(data['val_loss']) > 0.4")
+        assert passed is True
+        assert value == 0.5
+
+    def test_evaluate_check_with_min_max(self):
+        """Test evaluate_check with min and max functions."""
+        data = {"scores": {"1": 1.0, "2": 5.0, "3": 3.0}}
+
+        passed, _, value = evaluate_check(data, "min(data['scores']) > 0.5")
+        assert passed is True
+        assert value == 1.0
+
+        passed, _, value = evaluate_check(data, "max(data['scores']) < 10.0")
+        assert passed is True
+        assert value == 5.0
+
+    def test_evaluate_check_with_ratio_above(self):
+        """Test evaluate_check with ratio_above function."""
+        data = {"error": {"1": 1.0, "2": 1.0, "3": 1.5, "4": 1.0, "5": 2.0}}
+
+        # 2 out of 5 values are >= 1.5 (ratio = 0.4)
+        passed, _, value = evaluate_check(data, "ratio_above(data['error'], 1.5) < 0.5")
+        assert passed is True
+        assert value == 0.4
+
+        # Should fail when ratio is above threshold
+        passed, _, value = evaluate_check(data, "ratio_above(data['error'], 1.5) < 0.3")
+        assert passed is False
+        assert value == 0.4
+
+
+class TestRealWorldScenarios:
+    """Test scenarios that match real-world usage patterns."""
+
+    def test_token_prob_error_scenario(self):
+        """Test the exact scenario from the user's example."""
+        # Simulate token_mult_prob_error with some outliers
+        data = {
+            "train/token_mult_prob_error": {
+                str(i): 1.0 + (i % 3) * 0.01 for i in range(1, 20)
+            }
+        }
+        # Add a couple large outliers that will skew the mean
+        data["train/token_mult_prob_error"]["20"] = 5.0
+
+        # Without filtering, mean should be significantly above 1.1
+        passed_no_filter, _, value_no_filter = evaluate_check(
+            data, 'mean(data["train/token_mult_prob_error"]) < 1.1'
+        )
+        assert passed_no_filter is False  # Should fail due to outlier
+        assert value_no_filter > 1.1
+
+        # With 5% filtering (removes 1 out of 20 = top 5%)
+        passed_with_filter, _, value_with_filter = evaluate_check(
+            data, 'mean(data["train/token_mult_prob_error"], ignore_top_p=0.05) < 1.1'
+        )
+        assert passed_with_filter is True  # Should pass with outlier removed
+        assert value_with_filter < 1.1
+
+    def test_large_dataset_with_few_outliers(self):
+        """Test with a large dataset containing a few outliers."""
+        # Create 100 normal values around 1.0
+        data = {"metric": {str(i): 1.0 + (i % 10) * 0.01 for i in range(1, 101)}}
+        # Add 5 outliers
+        for i in range(101, 106):
+            data["metric"][str(i)] = 10.0
+
+        # Without filtering
+        mean_no_filter = mean(data["metric"], ignore_top_p=0.0)
+        assert mean_no_filter > 1.4  # Significantly affected by outliers
+
+        # With 5% filtering (should remove ~5 values, including the outliers)
+        mean_with_filter = mean(data["metric"], ignore_top_p=0.05)
+        assert mean_with_filter < 1.1  # Should be close to 1.0
+
+    def test_robustness_to_varying_outlier_severity(self):
+        """Test that filtering works with outliers of varying severity."""
+        base_data = {str(i): 1.0 for i in range(1, 10)}
+
+        # Test with mild outlier
+        data_mild = base_data.copy()
+        data_mild["10"] = 2.0
+        result_mild = mean(data_mild, ignore_top_p=0.1)
+        assert result_mild == 1.0  # Outlier removed
+
+        # Test with severe outlier
+        data_severe = base_data.copy()
+        data_severe["10"] = 100.0
+        result_severe = mean(data_severe, ignore_top_p=0.1)
+        assert result_severe == 1.0  # Outlier removed
+
+    def test_ratio_above_real_world_scenario(self):
+        """Test the exact scenario from the user's example with ratio_above."""
+        # Simulate token_mult_prob_error where most values are around 1.0
+        # but a few are above 1.05
+        data = {
+            "train/token_mult_prob_error": {
+                str(i): 1.0 + (i % 20) * 0.001 for i in range(1, 101)
+            }
+        }
+        # Add a few values above 1.05 (should be 1 out of 100 = 1%)
+        data["train/token_mult_prob_error"]["50"] = 1.06
+
+        # Check that less than 2% of values are above 1.05
+        passed, _, value = evaluate_check(
+            data, 'ratio_above(data["train/token_mult_prob_error"], 1.05) < 0.02'
+        )
+        assert passed is True
+        assert value == 0.01  # 1 out of 100
+
+    def test_ratio_above_combined_with_mean_ignore_top_p(self):
+        """Test combining ratio_above check with mean ignore_top_p."""
+        # Create data where a few outliers would skew the mean
+        data = {"metric": {str(i): 1.0 for i in range(1, 96)}}
+        # Add 5 outliers (5%)
+        for i in range(96, 101):
+            data["metric"][str(i)] = 10.0
+
+        # Without filtering, mean would be high
+        mean_no_filter = mean(data["metric"], ignore_top_p=0.0)
+        assert mean_no_filter > 1.4
+
+        # With 5% filtering, mean should be close to 1.0
+        mean_with_filter = mean(data["metric"], ignore_top_p=0.05)
+        assert mean_with_filter < 1.1
+
+        # Check that exactly 5% are above threshold
+        ratio = ratio_above(data["metric"], 5.0)
+        assert ratio == 0.05
diff --git a/tests/unit/test_config_validation.py b/tests/unit/test_config_validation.py
index 3056ae270d..349a024ab0 100644
--- a/tests/unit/test_config_validation.py
+++ b/tests/unit/test_config_validation.py
@@ -14,212 +14,121 @@
 
 import glob
 import os
-import warnings
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Set, Type, Union, get_type_hints
+from typing import Any, Dict, Type
 
 import pytest
 from omegaconf import OmegaConf
-from typing_extensions import NotRequired
-
-from nemo_rl.algorithms.dpo import DPOConfig
-from nemo_rl.algorithms.grpo import GRPOConfig, GRPOLoggerConfig
-from nemo_rl.algorithms.sft import SFTConfig
-from nemo_rl.data import DataConfig
-from nemo_rl.distributed.virtual_cluster import ClusterConfig
-from nemo_rl.models.policy import PolicyConfig
-from nemo_rl.utils.checkpoint import CheckpointingConfig
+from pydantic import TypeAdapter, ValidationError
+
+from nemo_rl.algorithms.distillation import MasterConfig as DistillationMasterConfig
+from nemo_rl.algorithms.dpo import MasterConfig as DPOMasterConfig
+from nemo_rl.algorithms.grpo import MasterConfig as GRPOMasterConfig
+from nemo_rl.algorithms.rm import MasterConfig as RMMasterConfig
+from nemo_rl.algorithms.sft import MasterConfig as SFTMasterConfig
+from nemo_rl.evals.eval import MasterConfig as EvalMasterConfig
 from nemo_rl.utils.config import load_config_with_inheritance
-from nemo_rl.utils.logger import LoggerConfig
-
-
-def get_keys_from_typeddict(typed_dict_class: dict) -> Set[str]:
-    """Extract required keys from a TypedDict class, excluding NotRequired fields."""
-    type_hints = get_type_hints(typed_dict_class, include_extras=True)
-    required_keys = set()
-    optional_keys = set()
-
-    for key, annotation in type_hints.items():
-        # Check if the field is marked as NotRequired
-        if hasattr(annotation, "__origin__") and (annotation.__origin__ is NotRequired):
-            optional_keys.add(key)
-
-        ## check for Optional fields
-        elif (
-            hasattr(annotation, "__origin__")
-            and annotation.__origin__ is Union
-            and type(None) in annotation.__args__
-        ):
-            raise ValueError(
-                f"Please use the NotRequired annotation instead of Optional for key {key}"
-            )
-        else:
-            required_keys.add(key)
-
-    return required_keys, optional_keys
 
+# All tests in this module should run first
+pytestmark = pytest.mark.run_first
 
-def validate_nested_config_section(
-    config_dict: Dict[str, Any], config_class: Type, section_path: str
-) -> List[str]:
-    """Recursively validate a config section and its nested TypedDict fields."""
-    errors = []
-    type_hints = get_type_hints(config_class, include_extras=True)
+if not OmegaConf.has_resolver("mul"):
+    OmegaConf.register_new_resolver("mul", lambda a, b: a * b)
 
-    for key, annotation in type_hints.items():
-        current_path = f"{section_path}.{key}" if section_path else key
-
-        # Check if the field is marked as NotRequired
-        is_optional = hasattr(annotation, "__origin__") and (
-            annotation.__origin__ is NotRequired
-        )
 
-        # If the key is not in the config and it's required, add an error
-        if key not in config_dict:
-            if not is_optional:
-                errors.append(f"Missing required key in {section_path}: {key}")
-            continue
-
-        # Get the value from the config
-        value = config_dict[key]
-
-        # If the annotation is a TypedDict (nested config), validate it recursively
-        if hasattr(annotation, "__annotations__") and isinstance(value, dict):
-            # This is a nested TypedDict, validate it recursively
-            nested_errors = validate_nested_config_section(
-                value, annotation, current_path
+def validate_config_section(
+    section_config: Dict[str, Any],
+    config_class: Type,
+    config_file: str,
+) -> None:
+    """Validate a config section against its TypedDict class using Pydantic.
+
+    Raises AssertionError with formatted error messages if validation fails.
+    """
+    if not isinstance(section_config, dict):
+        raise TypeError("Config must be a dictionary")
+
+    # Use Pydantic's TypeAdapter to validate the TypedDict
+    adapter = TypeAdapter(config_class)
+    try:
+        adapter.validate_python(section_config)
+    except ValidationError as e:
+        # Format errors nicely with actual values
+        error_messages = []
+        for error in e.errors():
+            path_parts = []
+            if error["loc"]:
+                path_parts.extend(str(loc) for loc in error["loc"])
+            path = ".".join(path_parts) if path_parts else "root"
+
+            # Only include the actual input value for non-missing fields
+            # For missing fields, the 'input' is the parent dict which is confusing
+            input_info = ""
+            if "input" in error and error["type"] != "missing":
+                input_value = error.get("input")
+                # Truncate very long values for readability
+                input_str = str(input_value)
+                if len(input_str) > 100:
+                    input_str = input_str[:97] + "..."
+                input_info = f" (got: {input_str})"
+
+            error_messages.append(
+                f"  {path}: {error['msg']} (type={error['type']}){input_info}"
             )
-            errors.extend(nested_errors)
-        elif hasattr(annotation, "__origin__") and annotation.__origin__ is Optional:
-            # Handle Optional[TypedDict] case
-            if (
-                value is not None
-                and hasattr(annotation.__args__[0], "__annotations__")
-                and isinstance(value, dict)
-            ):
-                nested_errors = validate_nested_config_section(
-                    value, annotation.__args__[0], current_path
-                )
-                errors.extend(nested_errors)
-
-    # Check for extra keys (keys in config that are not in the TypedDict)
-    required_keys, optional_keys = get_keys_from_typeddict(config_class)
-    all_valid_keys = required_keys | optional_keys
-
-    for key in config_dict.keys():
-        if key not in all_valid_keys:
-            errors.append(f"Extra key in {section_path}: {key}")
-
-    return errors
-
 
-def validate_config_section(
-    config_dict: Dict[str, Any], config_class: dict, section_name: str
-) -> List[str]:
-    """Validate a specific section of a config against its TypedDict class."""
-    errors = []
-    required_keys, optional_keys = get_keys_from_typeddict(config_class)
+        config_info = f"\n\nConfig file: {config_file}" if config_file else ""
+        raise AssertionError(
+            f"Config validation failed:{config_info}\n" + "\n".join(error_messages)
+        ) from e
 
-    if section_name not in config_dict:
-        errors.append(f"Missing required section: {section_name}")
-        return errors
 
-    section_config = config_dict[section_name]
-    if not isinstance(section_config, dict):
-        errors.append(f"Section {section_name} must be a dictionary")
-        return errors
+absolute_path = os.path.abspath(__file__)
+configs_dir = Path(
+    os.path.join(os.path.dirname(absolute_path), "../../examples/configs")
+).resolve()
+config_files = glob.glob(str(configs_dir / "**/*.yaml"), recursive=True)
+assert len(config_files) > 0, "No config files found"
 
-    # Use the new recursive validation function
-    nested_errors = validate_nested_config_section(
-        section_config, config_class, section_name
-    )
-    errors.extend(nested_errors)
 
-    return errors
+@pytest.mark.parametrize("config_file", config_files)
+def test_all_config_files_have_required_keys(config_file):
+    """Test that all config files in examples/configs have all required keys for their respective sections."""
 
+    print(f"\nValidating config file: {config_file}")
+
+    # Load the config file with inheritance
+    config = load_config_with_inheritance(config_file)
+    config_dict = OmegaConf.to_container(config, resolve=True)
+
+    if config_dict is None:
+        raise AssertionError(f"Config file {config_file} is empty or invalid")
+
+    # Determine which MasterConfig to use based on the config contents
+    master_config_class = None
+    config_type = None
+
+    if "/evals/" in config_file:
+        master_config_class = EvalMasterConfig
+        config_type = "eval"
+    elif "distillation" in config_dict:
+        master_config_class = DistillationMasterConfig
+        config_type = "distillation"
+    elif "dpo" in config_dict:
+        master_config_class = DPOMasterConfig
+        config_type = "dpo"
+    elif "sft" in config_dict:
+        master_config_class = SFTMasterConfig
+        config_type = "sft"
+    elif "grpo" in config_dict:
+        master_config_class = GRPOMasterConfig
+        config_type = "grpo"
+    elif "rm" in config_dict:
+        master_config_class = RMMasterConfig
+        config_type = "rm"
+    else:
+        raise AssertionError(
+            f"Could not determine algorithm type for config {config_file}."
+        )
 
-def test_all_config_files_have_required_keys():
-    """Test that all config files in examples/configs have all required keys for their respective sections."""
-    if not OmegaConf.has_resolver("mul"):
-        OmegaConf.register_new_resolver("mul", lambda a, b: a * b)
-
-    absolute_path = os.path.abspath(__file__)
-    configs_dir = Path(
-        os.path.join(os.path.dirname(absolute_path), "../../examples/configs")
-    )
-
-    # Get all YAML config files
-    config_files = glob.glob(str(configs_dir / "**/*.yaml"), recursive=True)
-
-    assert len(config_files) > 0, "No config files found"
-
-    all_errors = []
-
-    for config_file in config_files:
-        print(f"\nValidating config file: {config_file}")
-
-        try:
-            # Load the config file with inheritance
-            config = load_config_with_inheritance(config_file)
-            config_dict = OmegaConf.to_container(config, resolve=True)
-
-            if config_dict is None:
-                all_errors.append(f"Config file {config_file} is empty or invalid")
-                continue
-
-            # Validate each section against its corresponding config class
-            section_validations = [
-                ("policy", PolicyConfig),
-                ("data", DataConfig),
-                ("cluster", ClusterConfig),
-                ("checkpointing", CheckpointingConfig),
-            ]
-
-            # Add algorithm-specific validation
-            if "dpo" in config_dict:
-                section_validations.extend(
-                    [("dpo", DPOConfig), ("logger", LoggerConfig)]
-                )
-            elif "sft" in config_dict:
-                section_validations.extend(
-                    [("sft", SFTConfig), ("logger", LoggerConfig)]
-                )
-            elif "grpo" in config_dict:
-                section_validations.extend(
-                    [("grpo", GRPOConfig), ("logger", GRPOLoggerConfig)]
-                )
-                # GRPO also has a loss_fn section
-                if "loss_fn" in config_dict:
-                    from nemo_rl.algorithms.loss_functions import ClippedPGLossConfig
-
-                    section_validations.append(("loss_fn", ClippedPGLossConfig))
-            else:
-                warnings.warn(
-                    f"Could not determine algorithm type for config {config_file}. Continuing..."
-                )
-                continue
-
-            # Validate each section
-            for section_name, config_class in section_validations:
-                errors = validate_config_section(
-                    config_dict, config_class, section_name
-                )
-                for error in errors:
-                    all_errors.append(f"{config_file}: {error}")
-
-            # Additional validation for GRPO configs that have an 'env' section
-            if "grpo" in config_dict and "env" in config_dict:
-                if not isinstance(config_dict["env"], dict):
-                    all_errors.append(
-                        f"{config_file}: env section must be a dictionary"
-                    )
-
-        except Exception as e:
-            all_errors.append(f"Error processing {config_file}: {str(e)}")
-
-    # If there are any errors, fail the test with detailed error messages
-    if all_errors:
-        error_message = "\n".join(all_errors)
-        pytest.fail(f"Config validation failed:\n{error_message}")
-
-    print(f"\n✅ Successfully validated {len(config_files)} config files")
+    # Validate the entire config using the appropriate MasterConfig
+    validate_config_section(config_dict, master_config_class, config_file)
diff --git a/tests/unit/test_envs.py b/tests/unit/test_envs.py
index d07bbfe65b..d71e08d964 100644
--- a/tests/unit/test_envs.py
+++ b/tests/unit/test_envs.py
@@ -100,6 +100,7 @@ def process_turn(
         bool,
         Optional[list[str]],
         Optional[MultiStepCalcMetadata],
+        Optional[list[str]],
     ]:
         """Processes a single turn for the multi-step calculator task."""
         last_assistant_msg = ""
@@ -129,6 +130,7 @@ def process_turn(
                 is_terminated,
                 None,
                 next_metadata,
+                None,
             )
 
         # Check for final answer first
@@ -167,12 +169,15 @@ def process_turn(
                 next_metadata = None
 
         next_observation = {"role": "environment", "content": next_observation_content}
+        # next_answer only assigned in the verify function
+        next_answer = None
         return (
             next_observation,
             turn_reward,
             is_terminated,
             next_stop_strings,
             next_metadata,
+            next_answer,
         )
 
 
@@ -201,13 +206,15 @@ def step(
         terminateds = []
         all_stop_strings = []  # List of Lists or Nones
         all_next_metadata = []
+        all_answers = []
 
-        for obs, rew, term, stops, meta in results:
+        for obs, rew, term, stops, meta, answ in results:
             observations.append(obs)  # obs is already dict[str, str]
             rewards.append(rew)
             terminateds.append(term)
             all_stop_strings.append(stops)
             all_next_metadata.append(meta)
+            all_answers.append(answ)
 
         # Convert to tensors where needed
         rewards_tensor = torch.tensor(rewards, dtype=torch.float32)
@@ -221,6 +228,7 @@ def step(
             next_stop_strings=all_stop_strings,
             rewards=rewards_tensor,
             terminateds=done_tensor,
+            answers=all_answers,
         )
 
     def shutdown(self):
diff --git a/tests/unit/test_recipes_and_test_suites.py b/tests/unit/test_recipes_and_test_suites.py
index 47d1d2f45b..dbdf009dae 100644
--- a/tests/unit/test_recipes_and_test_suites.py
+++ b/tests/unit/test_recipes_and_test_suites.py
@@ -17,6 +17,9 @@
 
 import pytest
 
+# All tests in this module should run first
+pytestmark = pytest.mark.run_first
+
 dir_path = os.path.dirname(os.path.abspath(__file__))
 project_root = os.path.abspath(os.path.join(dir_path, "..", ".."))
 configs_dir = os.path.join(project_root, "examples", "configs")
@@ -25,20 +28,24 @@
 
 nightly_test_suite_path = os.path.join(test_suites_dir, "nightly.txt")
 release_test_suite_path = os.path.join(test_suites_dir, "release.txt")
-nightly_performance_test_suite_path = os.path.join(
-    test_suites_dir, "nightly_performance.txt"
-)
-release_performance_test_suite_path = os.path.join(
-    test_suites_dir, "release_performance.txt"
-)
+performance_test_suite_path = os.path.join(test_suites_dir, "performance.txt")
 
 # Relative to project root
 ALGO_MAPPING_TO_BASE_YAML = {
     "sft": "examples/configs/sft.yaml",
     "dpo": "examples/configs/dpo.yaml",
     "grpo": "examples/configs/grpo_math_1B.yaml",
+    "vlm_grpo": "examples/configs/vlm_grpo_3B.yaml",
+    "distillation": "examples/configs/distillation_math.yaml",
+    "rm": "examples/configs/rm.yaml",
+    "dapo": "examples/configs/grpo_math_1B.yaml",
 }
 
+# Configuration keys that are allowed to be added to base configs during testing
+# These keys may exist in recipe configs but not in base configs, so we need to
+# manually add them to avoid merge conflicts during config validation
+ALLOWED_ADDITIONAL_CONFIG_KEYS = ["policy.generation.vllm_kwargs"]
+
 
 @pytest.fixture
 def nightly_test_suite():
@@ -63,40 +70,23 @@ def release_test_suite():
 
 
 @pytest.fixture
-def nightly_performance_test_suite():
-    nightly_performance_suite = []
-    with open(nightly_performance_test_suite_path, "r") as f:
-        for line in f:
-            line = line.strip()
-            if line and not line.startswith("#"):
-                nightly_performance_suite.append(line)
-    return nightly_performance_suite
-
-
-@pytest.fixture
-def release_performance_test_suite():
-    release_performance_suite = []
-    with open(release_performance_test_suite_path, "r") as f:
+def performance_test_suite():
+    performance_suite = []
+    with open(performance_test_suite_path, "r") as f:
         for line in f:
             line = line.strip()
             if line and not line.startswith("#"):
-                release_performance_suite.append(line)
-    return release_performance_suite
+                performance_suite.append(line)
+    return performance_suite
 
 
 @pytest.fixture
 def all_test_suites(
     nightly_test_suite,
     release_test_suite,
-    nightly_performance_test_suite,
-    release_performance_test_suite,
+    performance_test_suite,
 ):
-    return (
-        nightly_test_suite
-        + release_test_suite
-        + nightly_performance_test_suite
-        + release_performance_test_suite
-    )
+    return nightly_test_suite + release_test_suite + performance_test_suite
 
 
 @pytest.fixture
@@ -114,14 +104,12 @@ def all_recipe_yaml_rel_paths():
     [
         nightly_test_suite_path,
         release_test_suite_path,
-        nightly_performance_test_suite_path,
-        release_performance_test_suite_path,
+        performance_test_suite_path,
     ],
     ids=[
         "nightly_test_suite",
         "release_test_suite",
-        "nightly_performance_test_suite",
-        "release_performance_test_suite",
+        "performance_test_suite",
     ],
 )
 def test_test_suites_exist(test_suite_path):
@@ -182,7 +170,7 @@ def test_all_recipe_yamls_accounted_for_in_test_suites(
     )
 
 
-def test_nightly_compute_stays_below_1024_hours(nightly_test_suite, tracker):
+def test_nightly_compute_stays_below_1040_hours(nightly_test_suite, tracker):
     command = f"DRYRUN=1 HF_HOME=... HF_DATASETS_CACHE=... CONTAINER= ACCOUNT= PARTITION= ./tools/launch {' '.join(nightly_test_suite)}"
 
     print(f"Running command: {command}")
@@ -214,8 +202,8 @@ def test_nightly_compute_stays_below_1024_hours(nightly_test_suite, tracker):
         f"Last line of output was not as expected: '{last_line}'"
     )
     total_gpu_hours = float(last_line.split(":")[-1].strip())
-    assert total_gpu_hours <= 1024, (
-        f"Total GPU hours exceeded 1024: {last_line}. We should revisit the test suites to reduce the total GPU hours."
+    assert total_gpu_hours <= 1040, (
+        f"Total GPU hours exceeded 1040: {last_line}. We should revisit the test suites to reduce the total GPU hours."
     )
     tracker.track("total_nightly_gpu_hours", total_gpu_hours)
 
@@ -275,27 +263,3 @@ def test_all_recipes_start_with_algo_hyphen(all_recipe_yaml_rel_paths):
         assert algo in expected_algos, (
             f"Recipe {recipe_yaml} has unexpected algo {algo}"
         )
-
-
-@pytest.mark.parametrize("algo, algo_base_yaml", ALGO_MAPPING_TO_BASE_YAML.items())
-def test_all_recipes_can_merge_configs_with_base_config(
-    all_recipe_yaml_rel_paths, all_test_suites, algo, algo_base_yaml
-):
-    from omegaconf import OmegaConf
-
-    base_yaml = os.path.join(project_root, algo_base_yaml)
-    base_config = OmegaConf.load(base_yaml)
-    # Would result in an error if we couldn't merge our config with the recipe's config
-    OmegaConf.set_struct(base_config, True)
-    for recipe_yaml in all_recipe_yaml_rel_paths:
-        if not os.path.basename(recipe_yaml).startswith(algo):
-            # Skipping here b/c we test that all recipes start with the algo-hyphen in
-            #  test_all_recipes_start_with_algo_hyphen()
-            continue
-        recipe_yaml_path = os.path.join(recipes_dir, recipe_yaml)
-        recipe_config = OmegaConf.load(recipe_yaml_path)
-        OmegaConf.set_struct(recipe_config, True)
-        # This will raise a error if the config can't be merged
-        print(f"Merging {recipe_yaml} with {base_yaml}")
-        merged_config = OmegaConf.merge(base_config, recipe_config)
-        print(merged_config)
diff --git a/tests/unit/test_transformer_memory_regression.py b/tests/unit/test_transformer_memory_regression.py
new file mode 100644
index 0000000000..fffbd3e66d
--- /dev/null
+++ b/tests/unit/test_transformer_memory_regression.py
@@ -0,0 +1,68 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import pytest
+from packaging import version
+
+
+def test_transformers_version_memory_regression():
+    """
+    Test that transformers version is within the safe range [4.54, 4.56).
+
+    This test exists because of a memory regression in transformers>=4.54,<4.56
+    where KV cache is incorrectly treated as trainable, causing significant memory
+    pressure with higher TP settings and long sequence lengths.
+
+    If this test fails, it means:
+    - Either transformers has been upgraded to >=4.56 (good!)
+    - Or downgraded to <4.54 (unexpected)
+
+    In either case, you should:
+    1. Remove this test file (tests/unit/test_transformer_memory_regression.py)
+    2. Reinstate the nightly test: tests/test_suites/llm/dpo-mistral-nemo-instruct-2407-1n8g-fsdp2tp8-actckpt-long.sh
+    3. Update the GitHub issue: https://github.com/NVIDIA-NeMo/RL/issues/1343
+
+    Related upstream issue: https://github.com/huggingface/transformers/issues/39795
+    """
+    import transformers
+
+    transformers_version = version.parse(transformers.__version__)
+
+    # Expected range: >= 4.54 and < 4.56
+    min_version = version.parse("4.54.0")
+    max_version = version.parse("4.56.0")
+
+    is_in_expected_range = min_version <= transformers_version < max_version
+
+    if not is_in_expected_range:
+        error_message = (
+            f"\n{'=' * 80}\n"
+            f"Transformers version {transformers.__version__} is OUTSIDE the expected range [4.54, 4.56).\n"
+            f"\n"
+            f"This is GOOD NEWS if you've upgraded to >=4.56 (memory regression is fixed)!\n"
+            f"\n"
+            f"ACTION REQUIRED:\n"
+            f"1. Remove this test file: tests/unit/test_transformer_memory_regression.py\n"
+            f"2. Reinstate the nightly test that was disabled:\n"
+            f"   tests/test_suites/llm/dpo-mistral-nemo-instruct-2407-1n8g-fsdp2tp8-actckpt-long.sh\n"
+            f"3. Update and close GitHub issue: https://github.com/NVIDIA-NeMo/RL/issues/1343\n"
+            f"\n"
+            f"Background: transformers [4.54, 4.56) had a memory regression where KV cache\n"
+            f"was incorrectly treated as trainable, causing OOMs with high TP and long sequences.\n"
+            f"See: https://github.com/huggingface/transformers/issues/39795\n"
+            f"{'=' * 80}\n"
+        )
+        pytest.fail(error_message)
+
+    # If we're in the expected range, the test passes silently
+    assert is_in_expected_range, "Transformers version should be in range [4.54, 4.56)"
diff --git a/tests/unit/test_transformers_module_patching.py b/tests/unit/test_transformers_module_patching.py
new file mode 100644
index 0000000000..6f1820e71c
--- /dev/null
+++ b/tests/unit/test_transformers_module_patching.py
@@ -0,0 +1,189 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for the transformers module directory patching functionality."""
+
+import os
+import tempfile
+from unittest.mock import patch
+
+from nemo_rl import patch_transformers_module_dir
+
+
+class TestPatchTransformersModuleDir:
+    """Test cases for the patch_transformers_module_dir function."""
+
+    def test_no_patching_when_hf_home_not_set(self):
+        """Test that patching is skipped when HF_HOME is not set."""
+        env_vars = {"OTHER_VAR": "value"}
+
+        # Ensure HF_HOME is not set
+        with patch.dict(os.environ, {}, clear=True):
+            result = patch_transformers_module_dir(env_vars)
+
+        # Should return the same dict without modifications
+        assert result == {"OTHER_VAR": "value"}
+        assert "PYTHONPATH" not in result
+
+    def test_patching_adds_pythonpath_when_not_present(self):
+        """Test that PYTHONPATH is added when it doesn't exist."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            # Create the modules directory
+            modules_dir = os.path.join(tmpdir, "modules")
+            os.makedirs(modules_dir)
+
+            env_vars = {"OTHER_VAR": "value"}
+
+            with patch.dict(os.environ, {"HF_HOME": tmpdir}):
+                result = patch_transformers_module_dir(env_vars)
+
+            # Should add PYTHONPATH with the modules directory
+            assert "PYTHONPATH" in result
+            assert result["PYTHONPATH"] == modules_dir
+            assert result["OTHER_VAR"] == "value"
+
+    def test_patching_prepends_to_existing_pythonpath(self):
+        """Test that modules directory is prepended to existing PYTHONPATH."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            # Create the modules directory
+            modules_dir = os.path.join(tmpdir, "modules")
+            os.makedirs(modules_dir)
+
+            existing_path = "/some/other/path"
+            env_vars = {"PYTHONPATH": existing_path}
+
+            with patch.dict(os.environ, {"HF_HOME": tmpdir}):
+                result = patch_transformers_module_dir(env_vars)
+
+            # Should prepend modules_dir to existing PYTHONPATH
+            assert result["PYTHONPATH"] == f"{modules_dir}:{existing_path}"
+
+    def test_patching_returns_early_when_modules_dir_not_exist(self):
+        """Test that function returns unchanged env_vars when modules directory doesn't exist."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            # Don't create the modules directory
+            env_vars = {"OTHER_VAR": "value"}
+
+            with patch.dict(os.environ, {"HF_HOME": tmpdir}):
+                result = patch_transformers_module_dir(env_vars)
+
+            # Should return unchanged env_vars
+            assert result == {"OTHER_VAR": "value"}
+            assert "PYTHONPATH" not in result
+
+    def test_patching_with_nested_hf_home(self):
+        """Test patching works with nested HF_HOME path."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            # Create a nested HF_HOME structure
+            hf_home = os.path.join(tmpdir, "nested", "huggingface")
+            modules_dir = os.path.join(hf_home, "modules")
+            os.makedirs(modules_dir)
+
+            env_vars = {}
+
+            with patch.dict(os.environ, {"HF_HOME": hf_home}):
+                result = patch_transformers_module_dir(env_vars)
+
+            assert result["PYTHONPATH"] == modules_dir
+
+    def test_patching_does_not_modify_original_dict(self):
+        """Test that the function modifies the dictionary in place and returns it."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            # Create the modules directory
+            modules_dir = os.path.join(tmpdir, "modules")
+            os.makedirs(modules_dir)
+
+            env_vars = {"OTHER_VAR": "value"}
+            original_id = id(env_vars)
+
+            with patch.dict(os.environ, {"HF_HOME": tmpdir}):
+                result = patch_transformers_module_dir(env_vars)
+
+            # Should return the same object (modified in place)
+            assert id(result) == original_id
+            assert "PYTHONPATH" in result
+            assert result["PYTHONPATH"] == modules_dir
+
+    def test_multiple_calls_with_same_env_vars(self):
+        """Test that calling the function multiple times with existing PYTHONPATH works correctly."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            # Create the modules directory
+            modules_dir = os.path.join(tmpdir, "modules")
+            os.makedirs(modules_dir)
+
+            env_vars = {}
+
+            with patch.dict(os.environ, {"HF_HOME": tmpdir}):
+                # First call
+                result1 = patch_transformers_module_dir(env_vars)
+                assert result1["PYTHONPATH"] == modules_dir
+
+                # Second call with the already modified env_vars
+                result2 = patch_transformers_module_dir(result1)
+                # Should prepend again
+                assert result2["PYTHONPATH"] == f"{modules_dir}:{modules_dir}"
+
+    def test_empty_env_vars_dict(self):
+        """Test that function works with an empty env_vars dictionary."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            # Create the modules directory
+            modules_dir = os.path.join(tmpdir, "modules")
+            os.makedirs(modules_dir)
+
+            env_vars = {}
+
+            with patch.dict(os.environ, {"HF_HOME": tmpdir}):
+                result = patch_transformers_module_dir(env_vars)
+
+            assert result == {"PYTHONPATH": modules_dir}
+
+    def test_hf_home_with_trailing_slash(self):
+        """Test that function handles HF_HOME with trailing slash correctly."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            # Create the modules directory
+            modules_dir = os.path.join(tmpdir, "modules")
+            os.makedirs(modules_dir)
+
+            env_vars = {}
+
+            # Add trailing slash to HF_HOME
+            hf_home_with_slash = tmpdir + "/"
+
+            with patch.dict(os.environ, {"HF_HOME": hf_home_with_slash}):
+                result = patch_transformers_module_dir(env_vars)
+
+            # os.path.join should handle the trailing slash correctly
+            expected_path = os.path.join(hf_home_with_slash, "modules")
+            assert result["PYTHONPATH"] == expected_path
+
+    def test_preserves_other_env_vars(self):
+        """Test that function preserves other environment variables."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            # Create the modules directory
+            modules_dir = os.path.join(tmpdir, "modules")
+            os.makedirs(modules_dir)
+
+            env_vars = {
+                "VAR1": "value1",
+                "VAR2": "value2",
+                "VAR3": "value3",
+            }
+
+            with patch.dict(os.environ, {"HF_HOME": tmpdir}):
+                result = patch_transformers_module_dir(env_vars)
+
+            # All original vars should be preserved
+            assert result["VAR1"] == "value1"
+            assert result["VAR2"] == "value2"
+            assert result["VAR3"] == "value3"
+            assert result["PYTHONPATH"] == modules_dir
diff --git a/tests/unit/tools/test_config_cli.py b/tests/unit/tools/test_config_cli.py
new file mode 100644
index 0000000000..63af6c8294
--- /dev/null
+++ b/tests/unit/tools/test_config_cli.py
@@ -0,0 +1,280 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import importlib.util
+import inspect
+import os
+from pathlib import Path
+from textwrap import dedent
+from typing import Any
+
+import pytest
+from omegaconf import OmegaConf
+
+
+def _load_cli_module() -> Any:
+    # Use a path relative to this test file to import tools/config_cli.py
+    test_file = Path(__file__).resolve()
+    repo_root = test_file.parents[3]
+    cli_path = repo_root / "tools" / "config_cli.py"
+    assert cli_path.exists(), f"Expected CLI at {cli_path}"
+    spec = importlib.util.spec_from_file_location("config_cli", str(cli_path))
+    assert spec and spec.loader
+    module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(module)  # type: ignore[arg-type]
+    return module
+
+
+@pytest.fixture(scope="module")
+def cli() -> Any:
+    return _load_cli_module()
+
+
+def test__resolve_path_absolute_and_relative(cli: Any, tmp_path: Path) -> None:
+    base = tmp_path
+    # absolute input stays absolute
+    abs_in = "/etc/hosts"
+    assert str(cli.resolve_path(base, abs_in)) == abs_in
+    # relative input resolves against base
+    rel_in = "sub/dir/file.yaml"
+    expected = (base / rel_in).resolve()
+    assert cli.resolve_path(base, rel_in) == expected
+
+
+def test__prune_equal_basic(cli: Any) -> None:
+    # Dict pruning: remove keys equal to base, keep differences
+    a = {"a": 1, "b": {"c": 2, "d": 3}}
+    b = {"a": 1, "b": {"c": 9, "d": 3}}
+    out = cli._prune_equal(a, b)
+    assert out == {"b": {"c": 2}}
+
+    # List pruning: equal lists of same length return REMOVE sentinel
+    a_list = [1, 2, 3]
+    b_list = [1, 2, 3]
+    out_list = cli._prune_equal(a_list, b_list)
+    assert out_list is cli.REMOVE
+
+    # Base-type equality returns REMOVE
+    assert cli._prune_equal(5, 5) is cli.REMOVE
+    # Different base-types keep original
+    assert cli._prune_equal(5, 6) == 5
+
+
+def test__ensure_defaults_relative_variants(cli: Any, tmp_path: Path) -> None:
+    base = tmp_path / "configs" / "base.yaml"
+    child = tmp_path / "recipes" / "child.yaml"
+    child.parent.mkdir(parents=True, exist_ok=True)
+    base.parent.mkdir(parents=True, exist_ok=True)
+    base.write_text("base: true\n")
+    child.write_text("child: true\n")
+
+    # Case 1: no defaults in child
+    cfg: dict[str, Any] = {"child": True}
+    cli._ensure_defaults_relative(child, base, cfg)
+    rel = os.path.relpath(str(base), start=str(child.parent))
+    assert cfg["defaults"] == rel
+
+    # Case 2: defaults as string (ensure base inserted first if missing)
+    cfg2: dict[str, Any] = {"defaults": "something.yaml"}
+    cli._ensure_defaults_relative(child, base, cfg2)
+    val = cfg2["defaults"]
+    if isinstance(val, list):
+        assert val[0] == rel
+    else:
+        # collapsed to a string only if single element
+        assert val == rel or val == "something.yaml"
+
+    # Case 3: defaults list, ensure base is present and order preserved otherwise
+    cfg3: dict[str, Any] = {"defaults": ["x.yaml", "y.yaml"]}
+    cli._ensure_defaults_relative(child, base, cfg3)
+    assert isinstance(cfg3["defaults"], list)
+    assert cfg3["defaults"][0] == rel
+
+
+def test_minimize_in_place_and_check(
+    cli: Any, tmp_path: Path, capsys: pytest.CaptureFixture[str]
+) -> None:
+    base = tmp_path / "base.yaml"
+    child = tmp_path / "child.yaml"
+    base.write_text(
+        dedent(
+            """
+            common:
+              a: 1
+              list: [1, 2]
+              nested:
+                x: 0
+            top_only: 7
+            """
+        ).strip()
+    )
+    child.write_text(
+        dedent(
+            """
+            defaults: parent.yaml
+            common:
+              a: 1
+              list: [1, 2]
+              nested:
+                x: 1
+            new_top: 42
+            """
+        ).strip()
+    )
+
+    # Before minimizing, check should fail
+    ns = type("NS", (), {"base": str(base), "config": str(child)})
+    ret = cli.minimize_check(ns)
+    assert ret == 1
+    err = capsys.readouterr().err
+    assert "Suggested fix" in err
+
+    # Minimize in place
+    ns2 = type("NS", (), {"base": str(base), "config": str(child), "in_place": True})
+    ret2 = cli.minimize(ns2)
+    assert ret2 == 0
+    minimized = child.read_text().strip()
+    rel = os.path.relpath(str(base), start=str(child.parent))
+    assert minimized.splitlines()[0].startswith("defaults:")
+    assert rel in minimized
+    # Ensure pruned keys are gone and differences stay
+    assert "top_only" not in minimized
+    assert "new_top" in minimized
+    assert "nested:\n  x: 1" in minimized.replace(
+        "\r\n", "\n"
+    ) or "nested:\n    x: 1" in minimized.replace("\r\n", "\n")
+
+    # After minimizing, check should pass
+    ret3 = cli.minimize_check(ns)
+    assert ret3 == 0
+
+
+def test_expand_and_compare(
+    cli: Any, tmp_path: Path, capsys: pytest.CaptureFixture[str]
+) -> None:
+    parent = tmp_path / "parent.yaml"
+    child = tmp_path / "child.yaml"
+    parent.write_text(
+        dedent(
+            """
+            base_value: 10
+            block:
+              a: 1
+              b: 2
+            """
+        ).strip()
+    )
+    child.write_text(
+        dedent(
+            """
+            defaults: parent.yaml
+            base_value: 11
+            block:
+              b: 3
+              c: 4
+            """
+        ).strip()
+    )
+
+    # expand should merge without resolving interpolations; capture stdout
+    ns = type("NS", (), {"config": str(child), "in_place": False})
+    ret = cli.expand(ns)
+    assert ret == 0
+    out = capsys.readouterr().out
+    # Expect merged keys present
+    assert "base_value: 11" in out
+    assert "a: 1" in out and "b: 3" in out and "c: 4" in out
+
+    # compare identical files prints identical message
+    ns_cmp = type("NS", (), {"left": str(child), "right": str(child)})
+    ret_cmp = cli.compare(ns_cmp)
+    assert ret_cmp == 0
+    out_cmp = capsys.readouterr().out
+    assert "Configs are identical" in out_cmp
+
+    # compare different files prints sections: changed
+    alt = tmp_path / "alt.yaml"
+    alt.write_text(
+        dedent(
+            """
+            defaults: parent.yaml
+            base_value: 12
+            block:
+              a: 9
+              b: 3
+              d: 5
+            """
+        ).strip()
+    )
+    ns_cmp2 = type("NS", (), {"left": str(child), "right": str(alt)})
+    ret_cmp2 = cli.compare(ns_cmp2)
+    assert ret_cmp2 == 0
+    out_cmp2 = capsys.readouterr().out
+    assert "Comparing configs" in out_cmp2
+    assert "Added in Right" in out_cmp2
+    assert "Changed (Left -> Right)" in out_cmp2
+
+
+def test_vendored_loader_behavior_matches_upstream(tmp_path: Path) -> None:
+    # Prepare simple parent/child config files
+    parent = tmp_path / "parent.yaml"
+    child = tmp_path / "child.yaml"
+    parent.write_text(
+        dedent(
+            """
+            base: 1
+            block:
+              a: 2
+              b: 3
+            """
+        ).strip()
+    )
+    child.write_text(
+        dedent(
+            """
+            defaults: parent.yaml
+            base: 9
+            block:
+              b: 7
+              c: 4
+            """
+        ).strip()
+    )
+
+    # Use text-level expansion comparison by importing both implementations
+    # Vendored
+    cli = _load_cli_module()
+    vendored_cfg = cli.load_config_with_inheritance(str(child))
+    vendored = OmegaConf.to_container(vendored_cfg)
+
+    # Upstream via direct import; if it fails, the test should fail
+    import nemo_rl.utils.config as upstream
+
+    upstream_cfg = upstream.load_config_with_inheritance(str(child))
+    upstream_out = OmegaConf.to_container(upstream_cfg)
+
+    assert vendored == upstream_out
+
+
+def test_vendored_loader_drift_against_upstream_source() -> None:
+    # Enforce exact copy-paste: the vendored function's source must match upstream exactly
+    cli = _load_cli_module()
+    vendored_fn = cli.load_config_with_inheritance
+
+    import nemo_rl.utils.config as upstream
+
+    upstream_fn = upstream.load_config_with_inheritance
+
+    up_src = inspect.getsource(upstream_fn).strip()
+    ven_src = inspect.getsource(vendored_fn).strip()
+    assert up_src == ven_src
diff --git a/tests/unit/utils/test_automodel_checkpoint.py b/tests/unit/utils/test_automodel_checkpoint.py
new file mode 100644
index 0000000000..9906a1522f
--- /dev/null
+++ b/tests/unit/utils/test_automodel_checkpoint.py
@@ -0,0 +1,420 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+from tempfile import TemporaryDirectory
+from unittest.mock import MagicMock, patch
+
+import pytest
+import torch
+
+# Skip entire module if nemo_automodel is not available
+pytest_plugins = []
+try:
+    import nemo_automodel  # noqa: F401
+except ImportError:
+    pytest.skip("nemo_automodel not available", allow_module_level=True)
+
+from nemo_rl.utils.automodel_checkpoint import (
+    detect_checkpoint_format,
+    load_checkpoint,
+    save_checkpoint,
+)
+
+
+class TestModel(torch.nn.Module):
+    """Simple test model with a forward method."""
+
+    def __init__(self):
+        super().__init__()
+        self.layers = torch.nn.ModuleList(
+            [
+                torch.nn.Linear(4, 4),
+                torch.nn.LayerNorm(4),
+                torch.nn.ReLU(),
+                torch.nn.Linear(4, 1),
+            ]
+        )
+
+    def forward(self, x):
+        for layer in self.layers:
+            x = layer(x)
+        return x
+
+
+@pytest.fixture
+def mock_model():
+    """Create a simple mock model for testing."""
+    return TestModel()
+
+
+@pytest.fixture
+def mock_optimizer():
+    """Create a simple mock optimizer for testing."""
+    model = torch.nn.Linear(4, 1)
+    return torch.optim.Adam(model.parameters())
+
+
+@pytest.mark.automodel
+class TestDetectCheckpointFormat:
+    """Test the detect_checkpoint_format function."""
+
+    def test_directory_with_safetensors(self):
+        """Test detection for directories containing safetensors files."""
+        with TemporaryDirectory() as tmp_dir:
+            # Create directory with safetensors files
+            os.makedirs(os.path.join(tmp_dir, "weights", "model"))
+            weights_path = os.path.join(tmp_dir, "weights", "model")
+
+            # Create safetensors shard files
+            with open(
+                os.path.join(
+                    weights_path, "shard-00001-model-00001-of-00001.safetensors"
+                ),
+                "w",
+            ) as f:
+                f.write("dummy content")
+            with open(
+                os.path.join(
+                    weights_path, "shard-00002-model-00001-of-00001.safetensors"
+                ),
+                "w",
+            ) as f:
+                f.write("dummy content")
+
+            format_type, is_peft = detect_checkpoint_format(weights_path)
+            assert format_type == "safetensors"
+            assert is_peft == False
+
+    def test_directory_with_dcp_format(self):
+        """Test detection for directories with DCP (Distributed Checkpoint) format."""
+        with TemporaryDirectory() as tmp_dir:
+            # Create directory structure like: step_3/policy/optimizer/optim
+            optim_path = os.path.join(tmp_dir, "step_3", "policy", "optimizer", "optim")
+            os.makedirs(optim_path)
+
+            # Create DCP files (.distcp + .metadata)
+            with open(os.path.join(optim_path, "__0_0.distcp"), "w") as f:
+                f.write("dummy dcp content")
+            with open(os.path.join(optim_path, "__1_0.distcp"), "w") as f:
+                f.write("dummy dcp content")
+            with open(os.path.join(optim_path, ".metadata"), "w") as f:
+                f.write("dummy metadata")
+
+            format_type, is_peft = detect_checkpoint_format(optim_path)
+            assert format_type == "torch_save"  # DCP uses torch_save format
+            assert is_peft == False
+
+    def test_directory_with_torch_files(self):
+        """Test detection for directories containing torch save files."""
+        with TemporaryDirectory() as tmp_dir:
+            model_path = os.path.join(tmp_dir, "model")
+            os.makedirs(model_path)
+
+            # Create torch save files
+            with open(os.path.join(model_path, "pytorch_model.bin"), "w") as f:
+                f.write("dummy content")
+
+            format_type, is_peft = detect_checkpoint_format(model_path)
+            assert format_type == "torch_save"
+            assert is_peft == False
+
+    def test_peft_detection_in_filenames(self):
+        """Test PEFT detection from filenames within directories."""
+        with TemporaryDirectory() as tmp_dir:
+            model_path = os.path.join(tmp_dir, "regular_model")
+            os.makedirs(model_path)
+
+            # Create file with adapter pattern in name
+            with open(os.path.join(model_path, "adapter_model.safetensors"), "w") as f:
+                f.write("dummy content")
+
+            format_type, is_peft = detect_checkpoint_format(model_path)
+            assert format_type == "safetensors"
+            assert is_peft == True  # Should detect adapter in filename
+
+    def test_default_fallback(self):
+        """Test default behavior for non-existent directories."""
+        # Non-existent directory should default to safetensors, no PEFT
+        format_type, is_peft = detect_checkpoint_format("/non/existent/directory")
+        assert format_type == "safetensors"
+        assert is_peft == False
+
+    def test_expected_structure(self):
+        """Test with the expected folder structure from the user."""
+        with TemporaryDirectory() as tmp_dir:
+            # Create the expected structure: step_3/policy/weights/model
+            weights_path = os.path.join(tmp_dir, "step_3", "policy", "weights", "model")
+            os.makedirs(weights_path)
+
+            # Create safetensors shard files as in the example
+            with open(
+                os.path.join(
+                    weights_path, "shard-00001-model-00001-of-00001.safetensors"
+                ),
+                "w",
+            ) as f:
+                f.write("dummy content")
+            with open(
+                os.path.join(
+                    weights_path, "shard-00002-model-00001-of-00001.safetensors"
+                ),
+                "w",
+            ) as f:
+                f.write("dummy content")
+
+            format_type, is_peft = detect_checkpoint_format(weights_path)
+            assert format_type == "safetensors"
+            assert is_peft == False
+
+    """Test the save_checkpoint function."""
+
+    @pytest.mark.automodel
+    @patch("nemo_rl.utils.automodel_checkpoint.save_model")
+    @patch("nemo_rl.utils.automodel_checkpoint.save_optimizer")
+    def test_save_model_only(self, mock_save_optimizer, mock_save_model, mock_model):
+        """Test saving model weights only."""
+        with TemporaryDirectory() as tmp_dir:
+            weights_path = os.path.join(tmp_dir, "weights")
+            os.makedirs(os.path.dirname(weights_path), exist_ok=True)
+
+            # Save checkpoint
+            save_checkpoint(
+                model=mock_model,
+                weights_path=weights_path,
+                model_save_format="safetensors",
+                is_peft=False,
+            )
+
+            # Verify save_model was called correctly
+            mock_save_model.assert_called_once()
+            call_args = mock_save_model.call_args
+            assert call_args[1]["model"] is mock_model
+            assert call_args[1]["weights_path"] == weights_path
+            assert (
+                call_args[1]["checkpoint_config"].model_save_format.value
+                == "safetensors"
+            )
+            assert call_args[1]["checkpoint_config"].is_peft == False
+
+            # Verify optimizer saving was not called
+            mock_save_optimizer.assert_not_called()
+
+    @pytest.mark.automodel
+    @patch("nemo_rl.utils.automodel_checkpoint.save_model")
+    @patch("nemo_rl.utils.automodel_checkpoint.save_optimizer")
+    def test_save_with_optimizer(
+        self, mock_save_optimizer, mock_save_model, mock_model, mock_optimizer
+    ):
+        """Test saving model and optimizer weights."""
+        with TemporaryDirectory() as tmp_dir:
+            weights_path = os.path.join(tmp_dir, "model", "weights")
+            optimizer_path = os.path.join(tmp_dir, "optimizer", "optim")
+            os.makedirs(os.path.dirname(weights_path))
+            os.makedirs(os.path.dirname(optimizer_path))
+
+            # Save checkpoint with optimizer
+            save_checkpoint(
+                model=mock_model,
+                weights_path=weights_path,
+                optimizer=mock_optimizer,
+                optimizer_path=optimizer_path,
+                model_save_format="torch_save",
+                is_peft=True,
+            )
+
+            # Verify both model and optimizer saving were called
+            mock_save_model.assert_called_once()
+            mock_save_optimizer.assert_called_once()
+
+            # Check optimizer call args
+            opt_call_args = mock_save_optimizer.call_args
+            assert opt_call_args[1]["optimizer"] is mock_optimizer
+            assert opt_call_args[1]["model"] is mock_model
+            assert opt_call_args[1]["weights_path"] == optimizer_path
+
+    @pytest.mark.automodel
+    @patch("nemo_rl.utils.automodel_checkpoint.save_model")
+    def test_save_with_tokenizer(self, mock_save_model, mock_model):
+        """Test saving with tokenizer."""
+        with TemporaryDirectory() as tmp_dir:
+            weights_path = os.path.join(tmp_dir, "model", "weights")
+            tokenizer_path = os.path.join(tmp_dir, "tokenizer")
+            os.makedirs(os.path.dirname(weights_path))
+            os.makedirs(tokenizer_path)
+
+            # Create mock tokenizer
+            mock_tokenizer = MagicMock()
+
+            # Save checkpoint with tokenizer
+            save_checkpoint(
+                model=mock_model,
+                weights_path=weights_path,
+                tokenizer=mock_tokenizer,
+                tokenizer_path=tokenizer_path,
+            )
+
+            # Verify tokenizer.save_pretrained was called
+            mock_tokenizer.save_pretrained.assert_called_once_with(tokenizer_path)
+
+
+@pytest.fixture
+def mock_experiment():
+    """Create a real model, optimizer, and scheduler for integration testing."""
+    model = TestModel()
+    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
+    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.1)
+    return model, optimizer, scheduler
+
+
+def check_dict_equality(dict1, dict2):
+    """Recursively check equality of two dictionaries"""
+    for k in dict1.keys():
+        if isinstance(dict1[k], dict):
+            check_dict_equality(dict1[k], dict2[k])
+        elif isinstance(dict1[k], torch.Tensor):
+            assert torch.allclose(dict1[k], dict2[k])
+        else:
+            assert dict1[k] == dict2[k]
+
+
+@pytest.mark.automodel
+class TestSaveLoadIntegration:
+    """Integration tests that actually save and load checkpoints."""
+
+    def test_save_and_load_model_only_safetensors(self, mock_experiment):
+        """Test saving and loading model weights only with safetensors format."""
+        test_model, _, _ = mock_experiment
+        original_state_dict = test_model.state_dict()
+
+        with TemporaryDirectory() as tmp_dir:
+            weights_path = os.path.join(tmp_dir, "test_model")
+
+            # Save checkpoint
+            save_checkpoint(
+                model=test_model,
+                weights_path=weights_path,
+                model_save_format="safetensors",
+            )
+
+            # Verify files are created
+            assert os.path.exists(weights_path)
+            files = os.listdir(os.path.join(weights_path, "model"))
+            assert any(f.endswith(".safetensors") for f in files)
+
+            # Create a new model with different weights
+            new_model = TestModel()
+            # Initialize with different values
+            for param in new_model.parameters():
+                param.data.fill_(999.0)
+
+            # Load the checkpoint
+            load_checkpoint(model=new_model, weights_path=weights_path)
+
+            # Verify the weights match the original
+            check_dict_equality(new_model.state_dict(), original_state_dict)
+
+    def test_save_and_load_model_only_torch_save(self, mock_experiment):
+        """Test saving and loading model weights only with torch_save format."""
+        test_model, _, _ = mock_experiment
+        original_state_dict = test_model.state_dict()
+
+        with TemporaryDirectory() as tmp_dir:
+            weights_path = os.path.join(tmp_dir, "test_model")
+
+            # Save checkpoint
+            save_checkpoint(
+                model=test_model,
+                weights_path=weights_path,
+                model_save_format="torch_save",
+            )
+
+            # Verify files are created
+            assert os.path.exists(weights_path)
+            files = os.listdir(os.path.join(weights_path, "model"))
+            assert any(f.endswith(".distcp") for f in files)
+
+            # Create a new model with different weights
+            new_model = TestModel()
+            # Initialize with different values
+            for param in new_model.parameters():
+                param.data.fill_(999.0)
+
+            # Load the checkpoint
+            load_checkpoint(model=new_model, weights_path=weights_path)
+
+            # Verify the weights match the original
+            check_dict_equality(new_model.state_dict(), original_state_dict)
+
+    def test_save_and_load_model_and_optimizer(self, mock_experiment):
+        """Test saving and loading both model and optimizer."""
+        test_model, optimizer, scheduler = mock_experiment
+
+        # Take some optimization steps to change optimizer state
+        for _ in range(5):
+            loss = torch.nn.functional.mse_loss(
+                test_model(torch.randn(2, 4)), torch.randn(2, 1)
+            )
+            optimizer.zero_grad()
+            loss.backward()
+            optimizer.step()
+            scheduler.step()
+
+        original_model_state = test_model.state_dict()
+        original_optimizer_state = optimizer.state_dict()
+        original_scheduler_state = scheduler.state_dict()
+
+        with TemporaryDirectory() as tmp_dir:
+            model_path = os.path.join(tmp_dir, "model_and_optimizer", "model_path")
+            optimizer_path = os.path.join(tmp_dir, "model_and_optimizer", "optimizer")
+            os.makedirs(os.path.dirname(model_path), exist_ok=True)
+            os.makedirs(os.path.dirname(optimizer_path), exist_ok=True)
+
+            # Save checkpoint
+            save_checkpoint(
+                model=test_model,
+                weights_path=model_path,
+                optimizer=optimizer,
+                scheduler=scheduler,
+                optimizer_path=optimizer_path,
+            )
+
+            # Verify files are created
+            assert os.path.exists(model_path)
+            assert os.path.exists(optimizer_path)
+
+            # Create new model, optimizer, and scheduler with different state
+            new_model = TestModel()
+            new_optimizer = torch.optim.Adam(new_model.parameters(), lr=0.001)
+            new_scheduler = torch.optim.lr_scheduler.StepLR(
+                new_optimizer, step_size=4, gamma=0.2
+            )
+
+            # Initialize with different values
+            for param in new_model.parameters():
+                param.data.fill_(999.0)
+
+            # Load the checkpoint
+            load_checkpoint(
+                model=new_model,
+                weights_path=model_path,
+                optimizer=new_optimizer,
+                scheduler=new_scheduler,
+                optimizer_path=optimizer_path,
+            )
+
+            # Verify all states match the original
+            check_dict_equality(new_model.state_dict(), original_model_state)
+            check_dict_equality(new_optimizer.state_dict(), original_optimizer_state)
+            assert new_scheduler.state_dict() == original_scheduler_state
diff --git a/tests/unit/utils/test_checkpoint.py b/tests/unit/utils/test_checkpoint.py
index c5a90c7932..002524cc71 100644
--- a/tests/unit/utils/test_checkpoint.py
+++ b/tests/unit/utils/test_checkpoint.py
@@ -141,6 +141,85 @@ def test_remove_old_checkpoints_topk_bias_recent_if_equal(
     assert sorted(remaining_steps) == sorted(expected_steps)
 
 
+def test_remove_old_checkpoints_topk_some_missing_val_metric(
+    checkpoint_manager, checkpoint_dir
+):
+    # Create checkpoints where some have validation metrics and others don't
+    steps = [1, 2, 3, 4, 10, 11, 12]
+    # Some checkpoints have loss metrics, others don't have any validation metrics
+    training_infos = [
+        {"loss": 0.5},  # step 1 - has loss
+        {"loss": 0.3},  # step 2 - has loss
+        {"other_metric": 0.8},  # step 3 - missing loss metric
+        {"loss": 0.2},  # step 4 - has loss
+        {},  # step 10 - missing loss metric
+        {"loss": 1.0},  # has loss but not in top-k
+        {},  # step 12 - missing loss (latest)
+    ]
+
+    for step, training_info in zip(steps, training_infos):
+        tmp_dir = checkpoint_manager.init_tmp_checkpoint(step, training_info)
+        checkpoint_manager.finalize_checkpoint(tmp_dir)
+
+    # Check if only top-k checkpoints are kept
+    remaining_dirs = list(checkpoint_dir.glob("step_*"))
+    assert (
+        len(remaining_dirs) == checkpoint_manager.keep_top_k + 1
+    )  # +1 because we exclude the latest
+
+    # Checkpoints with missing validation metrics should be treated as having the worst possible value
+    # Since higher_is_better=False, missing metrics get float("inf") which is worst
+    # So checkpoints with actual loss values should be preferred over those without
+    remaining_steps = []
+    for dir_path in remaining_dirs:
+        step_num = int(dir_path.name.split("_")[1])
+        remaining_steps.append(step_num)
+
+    # Should keep checkpoints with actual loss values (steps 1, 2, 4, 12)
+    # and exclude those without loss metrics (steps 3, 10)
+    # The latest checkpoint (step 12) is always kept
+    expected_steps = [1, 2, 4, 12]  # Steps with loss metrics, plus latest
+    assert sorted(remaining_steps) == sorted(expected_steps)
+
+
+def test_remove_old_checkpoints_topk_most_missing_val_metric(
+    checkpoint_manager, checkpoint_dir
+):
+    # Create checkpoints where some have validation metrics and others don't
+    steps = [1, 2, 3, 4, 10, 12]
+    # Some checkpoints have loss metrics, others don't have any validation metrics
+    training_infos = [
+        {"loss": 0.2},  # step 1 - has loss
+        {},  # step 2 - has loss
+        {"other_metric": 0.8},  # step 3 - missing loss metric
+        {},  # step 4 - has loss
+        {},  # step 10 - missing loss metric
+        {},  # step 12 - missing loss (latest)
+    ]
+
+    for step, training_info in zip(steps, training_infos):
+        tmp_dir = checkpoint_manager.init_tmp_checkpoint(step, training_info)
+        checkpoint_manager.finalize_checkpoint(tmp_dir)
+
+    # Check if only top-k checkpoints are kept
+    remaining_dirs = list(checkpoint_dir.glob("step_*"))
+    assert len(remaining_dirs) == checkpoint_manager.keep_top_k
+
+    # Checkpoints with missing validation metrics should be treated as having the worst possible value
+    # Since higher_is_better=False, missing metrics get float("inf") which is worst
+    # So checkpoints with actual loss values should be preferred over those without
+    remaining_steps = []
+    for dir_path in remaining_dirs:
+        step_num = int(dir_path.name.split("_")[1])
+        remaining_steps.append(step_num)
+
+    # Should keep checkpoints with actual loss values (step 1)
+    # followed by the most recent steps
+    # The latest checkpoint (step 12) is always kept
+    expected_steps = [1, 10, 12]  # Steps with loss metrics, plus latest
+    assert sorted(remaining_steps) == sorted(expected_steps)
+
+
 def test_get_best_checkpoint_path(checkpoint_manager, checkpoint_dir):
     # Create multiple checkpoints with different loss values
     steps = [1, 2, 3]
@@ -176,6 +255,24 @@ def test_get_latest_checkpoint_path(checkpoint_manager, checkpoint_dir):
     assert Path(latest_path).name == f"step_{max(steps)}"
 
 
+def test_get_latest_checkpoint_path_with_suffixes(checkpoint_manager, checkpoint_dir):
+    """Test that having step_*-hf dirs alongside step_* checkpoints doesn't crash."""
+    # Create a checkpoint
+    step = 1
+    training_info = {"loss": 0.5}
+    tmp_dir = checkpoint_manager.init_tmp_checkpoint(step, training_info)
+    checkpoint_manager.finalize_checkpoint(tmp_dir)
+
+    # Create pseudo-converted checkpoint folder
+    (checkpoint_dir / "step_1-hf").mkdir()
+
+    # Get latest checkpoint path
+    latest_path = checkpoint_manager.get_latest_checkpoint_path()
+
+    # Verify the -hf suffix didn't affect the get_latest_checkpoint func
+    assert Path(latest_path).name == "step_1"
+
+
 def test_load_training_metadata(checkpoint_manager, checkpoint_dir):
     # Create a checkpoint
     step = 1
diff --git a/tests/unit/utils/test_flops_counter.py b/tests/unit/utils/test_flops_counter.py
new file mode 100644
index 0000000000..51b9d0bc5f
--- /dev/null
+++ b/tests/unit/utils/test_flops_counter.py
@@ -0,0 +1,43 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+
+from nemo_rl.utils.flops_tracker import FLOPTracker, get_default_hf_config
+
+
+@pytest.mark.parametrize(
+    "model_name, gbs, seqlen, expected_flops",
+    [
+        ("meta-llama/Llama-2-7b-hf", 128, 4096, 2.25e16),
+        ("meta-llama/Llama-2-13b-hf", 128, 4096, 4.17e16),
+        ("meta-llama/Llama-2-70b-hf", 128, 4096, 2.25e17),
+        ("meta-llama/Meta-Llama-3-8B", 128, 8192, 5.31e16),
+        ("meta-llama/Llama-3.1-70B-Instruct", 128, 8192, 4.71e17),
+        ("meta-llama/Llama-3.1-405B-Instruct", 128, 8192, 2.65e18),
+        ("Qwen/Qwen3-30B-A3B", 128, 4096, 9.37e15),
+        ("Qwen/Qwen3-235B-A22B", 128, 4096, 6.21e16),
+        ("deepseek-ai/DeepSeek-V3", 1, 4096, 1.023e15),
+        ("moonshotai/Moonlight-16B-A3B-Instruct", 1, 4096, 6.45e13),
+    ],
+)
+def test_flops_counter(model_name, gbs, seqlen, expected_flops):
+    model_config = get_default_hf_config(model_name)
+    flops_tracker = FLOPTracker.from_config(model_name, model_config)
+    flops_tracker.track(gbs, seqlen)
+
+    # check within 5% relative difference
+    assert abs(flops_tracker.total_flops - expected_flops) / expected_flops <= 0.05, (
+        f"Expected {expected_flops} flops, got {flops_tracker.total_flops}"
+    )
diff --git a/tests/unit/utils/test_logger.py b/tests/unit/utils/test_logger.py
index e61120f312..c9771cea0c 100644
--- a/tests/unit/utils/test_logger.py
+++ b/tests/unit/utils/test_logger.py
@@ -23,9 +23,11 @@
     Logger,
     MLflowLogger,
     RayGpuMonitorLogger,
+    SwanlabLogger,
     TensorboardLogger,
     WandbLogger,
     flatten_dict,
+    print_message_log_samples,
 )
 
 
@@ -263,6 +265,142 @@ def test_log_hyperparams(self, mock_wandb):
         mock_run.config.update.assert_called_once_with(params)
 
 
+class TestSwanlabLogger:
+    """Test the SwanlabLogger class."""
+
+    @pytest.fixture
+    def temp_dir(self):
+        """Create a temporary directory for logs."""
+        temp_dir = tempfile.mkdtemp()
+        yield temp_dir
+        shutil.rmtree(temp_dir)
+
+    @patch("nemo_rl.utils.logger.swanlab")
+    def test_init_custom_config(self, mock_swanlab, temp_dir):
+        """Test initialization of SwanlabLogger with custom config."""
+        cfg = {
+            "project": "custom-project",
+            "name": "custom-run",
+            "entity": "custom-entity",
+            "group": "custom-group",
+            "tags": ["tag1", "tag2"],
+        }
+        SwanlabLogger(cfg, log_dir=temp_dir)
+
+        mock_swanlab.init.assert_called_once_with(
+            project="custom-project",
+            name="custom-run",
+            entity="custom-entity",
+            group="custom-group",
+            tags=["tag1", "tag2"],
+            logdir=temp_dir,
+        )
+
+    @patch("nemo_rl.utils.logger.swanlab")
+    def test_log_metrics(self, mock_swanlab):
+        """Test logging metrics to SwanlabLogger."""
+        cfg = {}
+        logger = SwanlabLogger(cfg)
+
+        metrics = {"loss": 0.5, "accuracy": 0.8}
+        step = 10
+        logger.log_metrics(metrics, step)
+
+        # Check that log was called with metrics and step
+        mock_run = mock_swanlab.init.return_value
+        mock_run.log.assert_called_once_with(metrics, step=step)
+
+    @patch("nemo_rl.utils.logger.swanlab")
+    def test_log_metrics_with_prefix(self, mock_swanlab):
+        """Test logging metrics with a prefix to SwanlabLogger."""
+        cfg = {}
+        logger = SwanlabLogger(cfg)
+
+        metrics = {"loss": 0.5, "accuracy": 0.8}
+        step = 10
+        prefix = "train"
+        logger.log_metrics(metrics, step, prefix)
+
+        # Check that log was called with prefixed metrics and step
+        mock_run = mock_swanlab.init.return_value
+        expected_metrics = {"train/loss": 0.5, "train/accuracy": 0.8}
+        mock_run.log.assert_called_once_with(expected_metrics, step=step)
+
+    @patch("nemo_rl.utils.logger.swanlab")
+    def test_log_metrics_with_step_metric(self, mock_swanlab):
+        """Test logging metrics with a step metric to SwanlabLogger."""
+        cfg = {}
+        logger = SwanlabLogger(cfg)
+
+        # Define step metric
+        step_metric = "iteration"
+
+        # Include the step metric in the metrics
+        metrics = {"loss": 0.5, "accuracy": 0.8, "iteration": 15}
+        step = 10  # This should be ignored when step_metric is provided
+
+        logger.log_metrics(metrics, step, step_metric=step_metric)
+
+        # Check that log was called with metrics and commit=False
+        # When using step_metric, step should be ignored and commit=False should be used
+        mock_run = mock_swanlab.init.return_value
+        mock_run.log.assert_called_once_with(metrics, commit=False)
+
+    @patch("nemo_rl.utils.logger.swanlab")
+    def test_log_metrics_with_prefix_and_step_metric(self, mock_swanlab):
+        """Test logging metrics with both prefix and step metric."""
+        cfg = {}
+        logger = SwanlabLogger(cfg)
+
+        # Define prefix and step metric
+        prefix = "train"
+        step_metric = "train/iteration"
+
+        # Include the step metric in the metrics
+        metrics = {"loss": 0.5, "accuracy": 0.8, "iteration": 15}
+        step = 10  # This should be ignored when step_metric is provided
+
+        logger.log_metrics(metrics, step, prefix=prefix, step_metric=step_metric)
+
+        # Check that log was called with prefixed metrics and commit=False
+        # The step_metric key gets prefixed based on the current implementation
+        mock_run = mock_swanlab.init.return_value
+        expected_metrics = {
+            "train/loss": 0.5,
+            "train/accuracy": 0.8,
+            "train/iteration": 15,
+        }
+        mock_run.log.assert_called_once_with(expected_metrics, commit=False)
+
+    @patch("nemo_rl.utils.logger.swanlab")
+    def test_define_metric(self, mock_swanlab):
+        """Test defining a metric with a custom step metric."""
+        cfg = {}
+        logger = SwanlabLogger(cfg)
+
+        # Define metric pattern and step metric
+        logger.define_metric("ray/*", step_metric="ray/ray_step")
+
+        # Check that define_metric was called
+        mock_run = mock_swanlab.init.return_value
+        mock_run.define_metric.assert_called_once_with(
+            "ray/*", step_metric="ray/ray_step"
+        )
+
+    @patch("nemo_rl.utils.logger.swanlab")
+    def test_log_hyperparams(self, mock_swanlab):
+        """Test logging hyperparameters to SwanlabLogger."""
+        cfg = {}
+        logger = SwanlabLogger(cfg)
+
+        params = {"lr": 0.001, "batch_size": 32, "model": {"hidden_size": 128}}
+        logger.log_hyperparams(params)
+
+        # Check that config.update was called with params
+        mock_run = mock_swanlab.init.return_value
+        mock_run.config.update.assert_called_once_with(params)
+
+
 class TestMLflowLogger:
     """Test the MLflowLogger class."""
 
@@ -410,6 +548,101 @@ def test_cleanup(self, mock_mlflow, temp_dir):
         # Check that end_run was called
         mock_mlflow.end_run.assert_called_once()
 
+    @patch("nemo_rl.utils.logger.mlflow")
+    def test_init_with_none_log_dir(self, mock_mlflow):
+        """Test initialization with None log_dir uses server default artifact location."""
+        cfg = {
+            "experiment_name": "test-experiment",
+            "run_name": "test-run",
+            "tracking_uri": "http://localhost:5000",
+        }
+        mock_mlflow.get_experiment_by_name.return_value = None
+
+        MLflowLogger(cfg, log_dir=None)
+
+        # Verify create_experiment was called without artifact_location
+        mock_mlflow.create_experiment.assert_called_once_with(name="test-experiment")
+        mock_mlflow.start_run.assert_called_once_with(run_name="test-run")
+
+    @patch("nemo_rl.utils.logger.mlflow")
+    def test_init_with_custom_log_dir(self, mock_mlflow):
+        """Test initialization with custom log_dir sets artifact_location."""
+        cfg = {
+            "experiment_name": "test-experiment",
+            "run_name": "test-run",
+            "tracking_uri": "http://localhost:5000",
+        }
+        mock_mlflow.get_experiment_by_name.return_value = None
+
+        MLflowLogger(cfg, log_dir="/custom/path")
+
+        # Verify create_experiment was called with artifact_location
+        mock_mlflow.create_experiment.assert_called_once_with(
+            name="test-experiment", artifact_location="/custom/path"
+        )
+        mock_mlflow.start_run.assert_called_once_with(run_name="test-run")
+
+    @patch("nemo_rl.utils.logger.mlflow")
+    def test_init_with_artifact_location_in_config(self, mock_mlflow):
+        """Test initialization with artifact_location in config takes precedence over log_dir."""
+        cfg = {
+            "experiment_name": "test-experiment",
+            "run_name": "test-run",
+            "tracking_uri": "http://localhost:5000",
+            "artifact_location": "/config/artifact/path",
+        }
+        mock_mlflow.get_experiment_by_name.return_value = None
+
+        MLflowLogger(cfg, log_dir="/fallback/path")
+
+        # Verify create_experiment was called with artifact_location from config
+        mock_mlflow.create_experiment.assert_called_once_with(
+            name=cfg["experiment_name"], artifact_location=cfg["artifact_location"]
+        )
+        mock_mlflow.set_tracking_uri.assert_called_once_with(cfg["tracking_uri"])
+        mock_mlflow.start_run.assert_called_once_with(run_name=cfg["run_name"])
+
+    @patch("nemo_rl.utils.logger.mlflow")
+    def test_init_with_artifact_location_none_in_config(self, mock_mlflow):
+        """Test initialization with artifact_location=None in config uses server default."""
+        cfg = {
+            "experiment_name": "test-experiment",
+            "run_name": "test-run",
+            "tracking_uri": "http://localhost:5000",
+            "artifact_location": None,
+        }
+        mock_mlflow.get_experiment_by_name.return_value = None
+
+        MLflowLogger(cfg, log_dir="/fallback/path")
+
+        # Verify create_experiment was called without artifact_location
+        # (None is explicitly set, so we don't pass it to MLflow)
+        mock_mlflow.create_experiment.assert_called_once_with(
+            name=cfg["experiment_name"], artifact_location=cfg["artifact_location"]
+        )
+        mock_mlflow.set_tracking_uri.assert_called_once_with(cfg["tracking_uri"])
+        mock_mlflow.start_run.assert_called_once_with(run_name=cfg["run_name"])
+
+    @patch("nemo_rl.utils.logger.mlflow")
+    def test_init_without_artifact_location_uses_log_dir(self, mock_mlflow):
+        """Test initialization without artifact_location in config uses log_dir."""
+        cfg = {
+            "experiment_name": "test-experiment",
+            "run_name": "test-run",
+            "tracking_uri": "http://localhost:5000",
+        }
+        mock_mlflow.get_experiment_by_name.return_value = None
+
+        log_dir = "/fallback/path"
+        MLflowLogger(cfg, log_dir=log_dir)
+
+        # Verify create_experiment was called with log_dir as artifact_location
+        mock_mlflow.create_experiment.assert_called_once_with(
+            name=cfg["experiment_name"], artifact_location=log_dir
+        )
+        mock_mlflow.set_tracking_uri.assert_called_once_with(cfg["tracking_uri"])
+        mock_mlflow.start_run.assert_called_once_with(run_name=cfg["run_name"])
+
 
 class TestRayGpuMonitorLogger:
     """Test the RayGpuMonitorLogger class."""
@@ -918,6 +1151,7 @@ def test_init_with_gpu_monitoring(
             "wandb_enabled": True,
             "tensorboard_enabled": True,
             "mlflow_enabled": False,
+            "swanlab_enabled": False,
             "monitor_gpus": True,
             "gpu_monitoring": {
                 "collection_interval": 15.0,
@@ -964,6 +1198,7 @@ def test_gpu_monitoring_without_wandb(
             "wandb_enabled": False,
             "tensorboard_enabled": True,
             "mlflow_enabled": False,
+            "swanlab_enabled": False,
             "monitor_gpus": True,
             "gpu_monitoring": {
                 "collection_interval": 15.0,
@@ -1001,6 +1236,7 @@ def test_gpu_monitoring_no_main_loggers(
         """Test GPU monitoring initialization when no main loggers (wandb/tensorboard) are enabled."""
         cfg = {
             "wandb_enabled": False,
+            "swanlab_enabled": False,
             "tensorboard_enabled": False,
             "mlflow_enabled": False,
             "monitor_gpus": True,
@@ -1055,6 +1291,7 @@ def test_init_no_loggers(self, mock_tb_logger, mock_wandb_logger, temp_dir):
             "wandb_enabled": False,
             "tensorboard_enabled": False,
             "mlflow_enabled": False,
+            "swanlab_enabled": False,
             "monitor_gpus": False,
             "log_dir": temp_dir,
         }
@@ -1072,6 +1309,7 @@ def test_init_wandb_only(self, mock_tb_logger, mock_wandb_logger, temp_dir):
             "wandb_enabled": True,
             "tensorboard_enabled": False,
             "mlflow_enabled": False,
+            "swanlab_enabled": False,
             "monitor_gpus": False,
             "wandb": {"project": "test-project"},
             "log_dir": temp_dir,
@@ -1084,6 +1322,28 @@ def test_init_wandb_only(self, mock_tb_logger, mock_wandb_logger, temp_dir):
         assert wandb_cfg == {"project": "test-project"}
         mock_tb_logger.assert_not_called()
 
+    @patch("nemo_rl.utils.logger.WandbLogger")
+    @patch("nemo_rl.utils.logger.SwanlabLogger")
+    @patch("nemo_rl.utils.logger.TensorboardLogger")
+    def test_init_swanlab_only(self, mock_tb_logger, mock_swanlab_logger, temp_dir):
+        """Test initialization with only SwanlabLogger enabled."""
+        cfg = {
+            "wandb_enabled": False,
+            "tensorboard_enabled": False,
+            "mlflow_enabled": False,
+            "swanlab_enabled": True,
+            "monitor_gpus": False,
+            "swanlab": {"project": "test-project"},
+            "log_dir": temp_dir,
+        }
+        logger = Logger(cfg)
+
+        assert len(logger.loggers) == 1
+        mock_swanlab_logger.assert_called_once()
+        swanlab_cfg = mock_swanlab_logger.call_args[0][0]
+        assert swanlab_cfg == {"project": "test-project"}
+        mock_tb_logger.assert_not_called()
+
     @patch("nemo_rl.utils.logger.WandbLogger")
     @patch("nemo_rl.utils.logger.TensorboardLogger")
     def test_init_tensorboard_only(self, mock_tb_logger, mock_wandb_logger, temp_dir):
@@ -1092,6 +1352,7 @@ def test_init_tensorboard_only(self, mock_tb_logger, mock_wandb_logger, temp_dir
             "wandb_enabled": False,
             "tensorboard_enabled": True,
             "mlflow_enabled": False,
+            "swanlab_enabled": False,
             "monitor_gpus": False,
             "tensorboard": {"log_dir": "test_logs"},
             "log_dir": temp_dir,
@@ -1112,6 +1373,7 @@ def test_init_both_loggers(self, mock_tb_logger, mock_wandb_logger, temp_dir):
             "wandb_enabled": True,
             "tensorboard_enabled": True,
             "mlflow_enabled": False,
+            "swanlab_enabled": False,
             "monitor_gpus": False,
             "wandb": {"project": "test-project"},
             "tensorboard": {"log_dir": "test_logs"},
@@ -1136,6 +1398,7 @@ def test_log_metrics(self, mock_tb_logger, mock_wandb_logger, temp_dir):
             "wandb_enabled": True,
             "tensorboard_enabled": True,
             "mlflow_enabled": False,
+            "swanlab_enabled": False,
             "monitor_gpus": False,
             "wandb": {"project": "test-project"},
             "tensorboard": {"log_dir": "test_logs"},
@@ -1163,6 +1426,7 @@ def test_log_hyperparams(self, mock_tb_logger, mock_wandb_logger, temp_dir):
             "wandb_enabled": True,
             "tensorboard_enabled": True,
             "mlflow_enabled": False,
+            "swanlab_enabled": False,
             "monitor_gpus": False,
             "wandb": {"project": "test-project"},
             "tensorboard": {"log_dir": "test_logs"},
@@ -1192,6 +1456,7 @@ def test_init_with_gpu_monitoring(
             "wandb_enabled": True,
             "tensorboard_enabled": True,
             "mlflow_enabled": False,
+            "swanlab_enabled": False,
             "monitor_gpus": True,
             "gpu_monitoring": {
                 "collection_interval": 15.0,
@@ -1237,6 +1502,7 @@ def test_log_metrics_with_prefix_and_step_metric(
             "wandb_enabled": True,
             "tensorboard_enabled": True,
             "mlflow_enabled": False,
+            "swanlab_enabled": False,
             "monitor_gpus": False,
             "wandb": {"project": "test-project"},
             "tensorboard": {"log_dir": "test_logs"},
@@ -1275,6 +1541,7 @@ def test_log_plot_token_mult_prob_error(
             "wandb_enabled": True,
             "tensorboard_enabled": True,
             "mlflow_enabled": False,
+            "swanlab_enabled": False,
             "monitor_gpus": False,
             "wandb": {"project": "test-project"},
             "tensorboard": {"log_dir": "test_logs"},
@@ -1329,6 +1596,7 @@ def test_init_mlflow_only(self, mock_tb_logger, mock_wandb_logger, temp_dir):
             "wandb_enabled": False,
             "tensorboard_enabled": False,
             "mlflow_enabled": True,
+            "swanlab_enabled": False,
             "monitor_gpus": False,
             "mlflow": {
                 "experiment_name": "test-experiment",
@@ -1346,16 +1614,24 @@ def test_init_mlflow_only(self, mock_tb_logger, mock_wandb_logger, temp_dir):
     @patch("nemo_rl.utils.logger.WandbLogger")
     @patch("nemo_rl.utils.logger.TensorboardLogger")
     @patch("nemo_rl.utils.logger.MLflowLogger")
+    @patch("nemo_rl.utils.logger.SwanlabLogger")
     def test_init_all_loggers(
-        self, mock_mlflow_logger, mock_tb_logger, mock_wandb_logger, temp_dir
+        self,
+        mock_swanlab_logger,
+        mock_mlflow_logger,
+        mock_tb_logger,
+        mock_wandb_logger,
+        temp_dir,
     ):
         """Test initialization with all loggers enabled."""
         cfg = {
             "wandb_enabled": True,
             "tensorboard_enabled": True,
             "mlflow_enabled": True,
+            "swanlab_enabled": True,
             "monitor_gpus": False,
             "wandb": {"project": "test-project"},
+            "swanlab": {"project": "test-project"},
             "tensorboard": {"log_dir": "test_logs"},
             "mlflow": {
                 "experiment_name": "test-experiment",
@@ -1366,24 +1642,33 @@ def test_init_all_loggers(
         }
         logger = Logger(cfg)
 
-        assert len(logger.loggers) == 3
+        assert len(logger.loggers) == 4
         mock_wandb_logger.assert_called_once()
         mock_tb_logger.assert_called_once()
         mock_mlflow_logger.assert_called_once()
+        mock_swanlab_logger.assert_called_once()
 
     @patch("nemo_rl.utils.logger.WandbLogger")
     @patch("nemo_rl.utils.logger.TensorboardLogger")
     @patch("nemo_rl.utils.logger.MLflowLogger")
+    @patch("nemo_rl.utils.logger.SwanlabLogger")
     def test_log_metrics_with_mlflow(
-        self, mock_mlflow_logger, mock_tb_logger, mock_wandb_logger, temp_dir
+        self,
+        mock_swanlab_logger,
+        mock_mlflow_logger,
+        mock_tb_logger,
+        mock_wandb_logger,
+        temp_dir,
     ):
         """Test logging metrics to all enabled loggers including MLflow."""
         cfg = {
             "wandb_enabled": True,
+            "swanlab_enabled": True,
             "tensorboard_enabled": True,
             "mlflow_enabled": True,
             "monitor_gpus": False,
             "wandb": {"project": "test-project"},
+            "swanlab": {"project": "test-project"},
             "tensorboard": {"log_dir": "test_logs"},
             "mlflow": {
                 "experiment_name": "test-experiment",
@@ -1398,6 +1683,7 @@ def test_log_metrics_with_mlflow(
         mock_wandb_instance = mock_wandb_logger.return_value
         mock_tb_instance = mock_tb_logger.return_value
         mock_mlflow_instance = mock_mlflow_logger.return_value
+        mock_swanlab_instance = mock_swanlab_logger.return_value
 
         metrics = {"loss": 0.5, "accuracy": 0.8}
         step = 10
@@ -1405,6 +1691,9 @@ def test_log_metrics_with_mlflow(
 
         # Check that log_metrics was called on all loggers
         mock_wandb_instance.log_metrics.assert_called_once_with(metrics, step, "", None)
+        mock_swanlab_instance.log_metrics.assert_called_once_with(
+            metrics, step, "", None
+        )
         mock_tb_instance.log_metrics.assert_called_once_with(metrics, step, "", None)
         mock_mlflow_instance.log_metrics.assert_called_once_with(
             metrics, step, "", None
@@ -1413,16 +1702,24 @@ def test_log_metrics_with_mlflow(
     @patch("nemo_rl.utils.logger.WandbLogger")
     @patch("nemo_rl.utils.logger.TensorboardLogger")
     @patch("nemo_rl.utils.logger.MLflowLogger")
+    @patch("nemo_rl.utils.logger.SwanlabLogger")
     def test_log_hyperparams_with_mlflow(
-        self, mock_mlflow_logger, mock_tb_logger, mock_wandb_logger, temp_dir
+        self,
+        mock_swanlab_logger,
+        mock_mlflow_logger,
+        mock_tb_logger,
+        mock_wandb_logger,
+        temp_dir,
     ):
         """Test logging hyperparameters to all enabled loggers including MLflow."""
         cfg = {
             "wandb_enabled": True,
+            "swanlab_enabled": True,
             "tensorboard_enabled": True,
             "mlflow_enabled": True,
             "monitor_gpus": False,
             "wandb": {"project": "test-project"},
+            "swanlab": {"project": "test-project"},
             "tensorboard": {"log_dir": "test_logs"},
             "mlflow": {"experiment_name": "test-experiment"},
             "log_dir": temp_dir,
@@ -1433,6 +1730,7 @@ def test_log_hyperparams_with_mlflow(
         mock_wandb_instance = mock_wandb_logger.return_value
         mock_tb_instance = mock_tb_logger.return_value
         mock_mlflow_instance = mock_mlflow_logger.return_value
+        mock_swanlab_instance = mock_swanlab_logger.return_value
 
         params = {"lr": 0.001, "batch_size": 32}
         logger.log_hyperparams(params)
@@ -1441,3 +1739,24 @@ def test_log_hyperparams_with_mlflow(
         mock_wandb_instance.log_hyperparams.assert_called_once_with(params)
         mock_tb_instance.log_hyperparams.assert_called_once_with(params)
         mock_mlflow_instance.log_hyperparams.assert_called_once_with(params)
+        mock_swanlab_instance.log_hyperparams.assert_called_once_with(params)
+
+
+def test_print_message_log_samples(capsys):
+    """Test that print_message_log_samples displays full content correctly."""
+    # Test message with full content (verifies our bug fix)
+    message_logs = [
+        [
+            {"role": "user", "content": "What is 2+2?"},
+            {"role": "assistant", "content": "2+2 = 4"},
+        ]
+    ]
+    rewards = [1.0]
+
+    print_message_log_samples(message_logs, rewards, num_samples=1, step=0)
+
+    captured = capsys.readouterr()
+    # Verify content is displayed properly
+    assert "What is 2+2?" in captured.out
+    assert "2+2 = 4" in captured.out
+    assert "Sample 1 | Reward: 1.0000" in captured.out
diff --git a/tests/unit/utils/test_native_checkpoint.py b/tests/unit/utils/test_native_checkpoint.py
index 69493da3b3..380119f4e7 100755
--- a/tests/unit/utils/test_native_checkpoint.py
+++ b/tests/unit/utils/test_native_checkpoint.py
@@ -42,6 +42,7 @@
     "logprob_batch_size": 1,
     "max_total_sequence_length": 1024,
     "precision": "float32",
+    "offload_optimizer_for_logprob": False,
     "optimizer": {
         "name": "torch.optim.AdamW",
         "kwargs": {
@@ -130,17 +131,6 @@ def policy(cluster, tokenizer):
     policy.worker_group.shutdown()
 
 
-@pytest.fixture(scope="module", autouse=True)
-def skip_tied_weight_check_for_all():
-    """Automatically skip tied weight check for all tests in this module."""
-    os.environ["NRL_SKIP_TIED_WEIGHT_CHECK"] = "1"
-
-    yield
-
-    # Restore the original value
-    os.environ.pop("NRL_SKIP_TIED_WEIGHT_CHECK", None)
-
-
 def get_dummy_state_dict(state_dict, dummy_dict={}):
     """Recursively get the dummy state dict
     by replacing tensors with random ones of the same shape.
diff --git a/tests/unit/utils/test_packed_tensor.py b/tests/unit/utils/test_packed_tensor.py
new file mode 100644
index 0000000000..6d321bd32a
--- /dev/null
+++ b/tests/unit/utils/test_packed_tensor.py
@@ -0,0 +1,199 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from unittest.mock import patch
+
+import pytest
+import torch
+
+from nemo_rl.utils.packed_tensor import (
+    packed_broadcast_consumer,
+    packed_broadcast_producer,
+)
+
+
+class MockCommunicationGroup:
+    """Mock communication group for testing broadcast operations."""
+
+    def __init__(self):
+        self.broadcasted_tensors = []
+        self.broadcast_count = 0
+
+    def broadcast(self, tensor, src):
+        """Mock broadcast that stores the tensor for later verification."""
+        # Store a copy of the tensor
+        self.broadcasted_tensors.append(tensor.clone())
+        self.broadcast_count += 1
+
+
+class MockConsumerCommunicationGroup:
+    """Mock communication group for consumer that returns pre-stored tensors."""
+
+    def __init__(self, tensors_to_return):
+        self.tensors_to_return = tensors_to_return
+        self.current_index = 0
+
+    def broadcast(self, tensor, src):
+        """Mock broadcast that fills the tensor with pre-stored data."""
+        if self.current_index < len(self.tensors_to_return):
+            tensor.copy_(self.tensors_to_return[self.current_index])
+            self.current_index += 1
+
+
+def create_mock_model_params():
+    """Create mock model parameters for testing."""
+    params = [
+        ("layer1.weight", torch.randn(10, 20, dtype=torch.float32)),
+        ("layer1.bias", torch.randn(10, dtype=torch.float32)),
+        ("layer2.weight", torch.randn(20, 30, dtype=torch.float32)),
+        ("layer2.bias", torch.randn(20, dtype=torch.float32)),
+        ("layer3.weight", torch.randn(30, 40, dtype=torch.float16)),
+    ]
+    return params
+
+
+def create_mock_state_dict_info(params):
+    """Create state dict info (name -> (shape, dtype)) from params."""
+    return {name: (tensor.shape, tensor.dtype) for name, tensor in params}
+
+
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+def test_packed_broadcast_producer_consumer_roundtrip():
+    """Test that producer and consumer work together correctly."""
+    # Create mock parameters
+    params = create_mock_model_params()
+
+    # Move params to CUDA
+    params_cuda = [(name, tensor.cuda()) for name, tensor in params]
+
+    # Create mock communication group for producer
+    producer_group = MockCommunicationGroup()
+
+    # Mock the target size to force packing
+    target_size = 2000
+    with patch(
+        "nemo_rl.utils.packed_tensor.get_target_packed_tensor_size",
+        return_value=target_size,
+    ):
+        # Post-iter function that just returns the tensor
+        post_iter_func = lambda x: x[1]
+
+        # Run producer
+        packed_broadcast_producer(
+            iterator=iter(params_cuda),
+            group=producer_group,
+            src=0,
+            post_iter_func=post_iter_func,
+        )
+
+        # Now test consumer with the broadcasted tensors
+        consumer_group = MockConsumerCommunicationGroup(
+            producer_group.broadcasted_tensors
+        )
+
+        # Create state dict info for consumer
+        state_dict_info = create_mock_state_dict_info(params_cuda)
+
+        # Store unpacked tensors
+        unpacked_tensors = {}
+
+        def post_unpack_func(tensor_list):
+            """Store unpacked tensors for verification."""
+            for name, tensor in tensor_list:
+                unpacked_tensors[name] = tensor
+
+        # Run consumer
+        packed_broadcast_consumer(
+            iterator=iter(state_dict_info.items()),
+            group=consumer_group,
+            src=0,
+            post_unpack_func=post_unpack_func,
+        )
+
+    # Verify all parameters were unpacked
+    assert len(unpacked_tensors) == len(params)
+
+    # Verify each tensor matches the original
+    for name, original_tensor in params_cuda:
+        assert name in unpacked_tensors
+        unpacked = unpacked_tensors[name]
+
+        # Check shape and dtype
+        assert unpacked.shape == original_tensor.shape
+        assert unpacked.dtype == original_tensor.dtype
+
+        # Check values are close (accounting for floating point precision)
+        assert torch.allclose(unpacked, original_tensor, rtol=1e-5, atol=1e-7)
+
+
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+def test_packed_broadcast_single_large_tensor():
+    """Test with a single tensor larger than target size."""
+    # Create a large tensor
+    large_tensor = torch.randn(1000, 1000, dtype=torch.float32).cuda()
+    params = [("large_weight", large_tensor)]
+
+    # Create mock communication group
+    mock_group = MockCommunicationGroup()
+
+    # Small target size to force the tensor to exceed it
+    with patch(
+        "nemo_rl.utils.packed_tensor.get_target_packed_tensor_size", return_value=100
+    ):
+        packed_broadcast_producer(
+            iterator=iter(params),
+            group=mock_group,
+            src=0,
+            post_iter_func=lambda x: x[1],
+        )
+
+    # Should still broadcast the tensor
+    assert mock_group.broadcast_count == 1
+    assert len(mock_group.broadcasted_tensors) == 1
+
+    # Verify the size matches the large tensor
+    expected_size = large_tensor.numel() * large_tensor.element_size()
+    assert mock_group.broadcasted_tensors[0].numel() == expected_size
+
+
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
+def test_packed_broadcast_multiple_batches():
+    """Test that tensors are properly batched when exceeding target size."""
+    # Create many small tensors
+    params = [
+        (f"weight_{i}", torch.randn(10, 10, dtype=torch.float32).cuda())
+        for i in range(20)
+    ]
+
+    # Create mock communication group
+    mock_group = MockCommunicationGroup()
+
+    # Small target size to force multiple batches
+    with patch(
+        "nemo_rl.utils.packed_tensor.get_target_packed_tensor_size", return_value=2000
+    ):
+        packed_broadcast_producer(
+            iterator=iter(params),
+            group=mock_group,
+            src=0,
+            post_iter_func=lambda x: x[1],
+        )
+
+    # Should have multiple broadcasts
+    assert mock_group.broadcast_count > 1
+
+    # Total size should match sum of all tensors
+    total_broadcasted_size = sum(t.numel() for t in mock_group.broadcasted_tensors)
+    expected_total_size = sum(t.numel() * t.element_size() for _, t in params)
+    assert total_broadcasted_size == expected_total_size
diff --git a/tests/unit/utils/test_timer.py b/tests/unit/utils/test_timer.py
index 56ba315b55..041193b777 100644
--- a/tests/unit/utils/test_timer.py
+++ b/tests/unit/utils/test_timer.py
@@ -18,7 +18,7 @@
 import numpy as np
 import pytest
 
-from nemo_rl.utils.timer import Timer
+from nemo_rl.utils.timer import TimeoutChecker, Timer
 
 
 class TestTimer:
@@ -188,3 +188,48 @@ def test_precise_timing(self, mock_perf_counter, timer):
         # Check the elapsed time
         assert elapsed == 5.0
         assert timer._timers["precise_test"][0] == 5.0
+
+
+class TestTimeoutChecker:
+    def test_infinite_timeout(self):
+        checker = TimeoutChecker(timeout=None)
+        time.sleep(0.1)
+        assert checker.check_save() is False
+
+    def test_short_timeout(self):
+        checker = TimeoutChecker(timeout="00:00:00:01")
+        time.sleep(1.1)
+        assert checker.check_save() is True
+
+    def test_double_save_prevented(self):
+        checker = TimeoutChecker(timeout="00:00:00:01")
+        time.sleep(1.1)
+        assert checker.check_save() is True
+        assert checker.check_save() is False
+
+    def test_fit_last_save_time_enabled(self):
+        # Create a TimeoutChecker with a 3-second timeout and enable fit_last_save_time logic
+        checker = TimeoutChecker(timeout="00:00:00:03", fit_last_save_time=True)
+        checker.start_iterations()
+
+        # Simulate 10 iterations, each taking about 0.1 seconds
+        # This builds up a stable average iteration time
+        for _ in range(10):
+            time.sleep(0.1)
+            checker.mark_iteration()
+
+        # Wait an additional ~2.0 seconds so that:
+        # elapsed time + avg iteration time >= timeout (3 seconds)
+        time.sleep(2.0)
+
+        result = checker.check_save()
+        # Assert that the checker triggers a save due to timeout
+        assert result is True
+
+    def test_iteration_tracking(self):
+        checker = TimeoutChecker()
+        checker.start_iterations()
+        time.sleep(0.05)
+        checker.mark_iteration()
+        assert len(checker.iteration_times) == 1
+        assert checker.iteration_times[0] > 0
diff --git a/tools/bisect-script.sh b/tools/bisect-script.sh
new file mode 100755
index 0000000000..cf18bc2e78
--- /dev/null
+++ b/tools/bisect-script.sh
@@ -0,0 +1,193 @@
+#!/bin/bash
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -euo pipefail
+
+# When we bisect, we need to ensure that the venvs are refreshed b/c the commit could
+# habe changed the uv.lock or 3rdparty submoduels, so we need to force a rebuild to be safe
+export NRL_FORCE_REBUILD_VENVS=true
+print_usage() {
+  cat <<EOF
+Usage: GOOD=<good_ref> BAD=<bad_ref> tools/bisect-script.sh [command ...]
+
+Runs a git bisect session between GOOD and BAD to find the first bad commit.
+Sets NRL_FORCE_REBUILD_VENVS=true to ensure test environments are rebuilt to match commit's uv.lock.
+
+Examples:
+  GOOD=56a6225 BAD=32faafa tools/bisect-script.sh uv run --group dev pre-commit run --all-files
+  GOOD=464ed38 BAD=c843f1b tools/bisect-script.sh uv run --group test pytest tests/unit/test_foobar.py
+
+  # Example ouptut:
+  #    1. Will run until hits the first bad commit.
+  #    2. Will show the bisect log (what was run) and visualize the bisect.
+  #    3. Reset git bisect state to return you to the git state you were originally.
+  #
+  #    25e05a3d557dfe59a14df43048e16b6eea04436e is the first bad commit
+  #    commit 25e05a3d557dfe59a14df43048e16b6eea04436e
+  #    Author: Terry Kong <terryk@nvidia.com>
+  #    Date:   Fri Sep 26 17:24:45 2025 +0000
+  #
+  #        3==4
+  #
+  #        Signed-off-by: Terry Kong <terryk@nvidia.com>
+  #
+  #     tests/unit/test_foobar.py | 2 +-
+  #     1 file changed, 1 insertion(+), 1 deletion(-)
+  #    bisect found first bad commit
+  #    + RUN_STATUS=0
+  #    + set +x
+  #    [bisect] --- bisect log ---
+  #    # bad: [c843f1b994cb7e331aa8bc41c3206a6e76e453ef] try echo
+  #    # good: [464ed38e68dcd23f0c1951784561dc8c78410ffe] add passing foobar
+  #    git bisect start 'c843f1b' '464ed38'
+  #    # good: [8b8b3961e9cdbc1b4a9b6a912f7d36d117952f62] try visualize
+  #    git bisect good 8b8b3961e9cdbc1b4a9b6a912f7d36d117952f62
+  #    # bad: [25e05a3d557dfe59a14df43048e16b6eea04436e] 3==4
+  #    git bisect bad 25e05a3d557dfe59a14df43048e16b6eea04436e
+  #    # good: [c82e0b69d52b8e1641226c022cb487afebe8ba99] 2==2
+  #    git bisect good c82e0b69d52b8e1641226c022cb487afebe8ba99
+  #    # first bad commit: [25e05a3d557dfe59a14df43048e16b6eea04436e] 3==4
+  #    [bisect] --- bisect visualize (oneline) ---
+  #    25e05a3d (HEAD) 3==4
+
+Exit codes inside the command determine good/bad:
+  0 -> good commit
+  non-zero -> bad commit
+  125 -> skip this commit (per git-bisect convention)
+
+Environment variables:
+  GOOD    Commit-ish known to be good (required)
+  BAD     Commit-ish suspected bad (required)
+  (The script will automatically restore the repo state with 'git bisect reset' on exit.)
+
+Notes:
+  - The working tree will be reset by git bisect. Ensure you have no uncommitted changes.
+  - If GOOD is an ancestor of BAD with 0 or 1 commits in between, git can
+    conclude immediately; the script will show the result and exit without
+    running your command.
+EOF
+}
+
+# Minimal color helpers: blue for info, red for errors (TTY-only; NO_COLOR disables)
+BLUE=""; RED=""; NC=""
+if [[ -z "${NO_COLOR:-}" ]] && { [[ -t 1 ]] || [[ -t 2 ]]; }; then
+  BLUE=$'\033[34m'
+  RED=$'\033[31m'
+  NC=$'\033[0m'
+fi
+
+iecho() { printf "%b%s%b\n" "$BLUE" "$*" "$NC"; }
+fecho() { printf "%b%s%b\n" "$RED" "$*" "$NC" >&2; }
+
+if [[ "${1:-}" == "-h" || "${1:-}" == "--help" ]]; then
+  print_usage
+  exit 0
+fi
+
+if [[ -z "${GOOD:-}" || -z "${BAD:-}" ]]; then
+  fecho "ERROR: GOOD and BAD environment variables are required."
+  echo >&2
+  print_usage >&2
+  exit 2
+fi
+
+if ! git rev-parse --is-inside-work-tree >/dev/null 2>&1; then
+  fecho "ERROR: Not inside a git repository."
+  exit 2
+fi
+
+# Ensure there is a command to run
+if [[ $# -lt 1 ]]; then
+  fecho "ERROR: Missing command to evaluate during bisect."
+  echo >&2
+  print_usage >&2
+  exit 2
+fi
+
+USER_CMD=("$@")
+
+# Require a clean working tree
+git update-index -q --refresh || true
+if ! git diff --quiet; then
+  fecho "ERROR: Unstaged changes present. Commit or stash before bisect."
+  exit 2
+fi
+if ! git diff --cached --quiet; then
+  fecho "ERROR: Staged changes present. Commit or stash before bisect."
+  exit 2
+fi
+
+# On interruption or script error, print helpful message
+on_interrupt_or_error() {
+  local status=$?
+  if git rev-parse --is-inside-work-tree >/dev/null 2>&1; then
+    if git bisect log >/dev/null 2>&1; then
+      iecho "[bisect] Script interrupted or failed (exit ${status})."
+      iecho "[bisect] Restoring original state with 'git bisect reset' on exit."
+    fi
+  fi
+}
+trap on_interrupt_or_error INT TERM ERR
+
+# Always reset bisect on exit to restore original state
+cleanup_reset() {
+  if [[ -n "${BISECT_NO_RESET:-}" ]]; then
+    # Respect user's request to not reset the bisect
+    return
+  fi
+  if git rev-parse --is-inside-work-tree >/dev/null 2>&1; then
+    if git bisect log >/dev/null 2>&1; then
+      git bisect reset >/dev/null 2>&1 || true
+    fi
+  fi
+}
+trap cleanup_reset EXIT
+
+# Check if we are already in a bisect session
+if git bisect log >/dev/null 2>&1; then
+  fecho "[bisect] We are already in a bisect session. Please reset the bisect manually if you want to start a new one."
+  exit 1
+fi
+
+set -x
+git bisect start "$BAD" "$GOOD"
+set +x
+
+# Detect immediate conclusion (no midpoints to test)
+if git bisect log >/dev/null 2>&1; then
+  if git bisect log | grep -q "first bad commit:"; then
+    iecho "[bisect] Immediate conclusion from endpoints; no midpoints to test."
+    iecho "[bisect] --- bisect log ---"
+    git bisect log | cat
+    exit 0
+  fi
+fi
+
+set -x
+set +e  # Temporarily allow the command to fail to capture the exit status
+git bisect run "${USER_CMD[@]}"
+RUN_STATUS=$?
+set -e
+set +x
+
+# Show bisect details before cleanup
+if git bisect log >/dev/null 2>&1; then
+  iecho "[bisect] --- bisect log ---"
+  git bisect log | cat
+fi
+
+exit $RUN_STATUS
+
+
diff --git a/tools/build-custom-vllm.sh b/tools/build-custom-vllm.sh
index 0ae3ec0c58..399f361643 100644
--- a/tools/build-custom-vllm.sh
+++ b/tools/build-custom-vllm.sh
@@ -13,20 +13,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-set -eoux pipefail
+set -eou pipefail
 
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO_ROOT="$(realpath "$SCRIPT_DIR/..")"
 
-# Default values
-DEFAULT_GIT_URL="https://github.com/terrykong/vllm.git"
-DEFAULT_BRANCH="terryk/demo-custom-vllm"
-DEFAULT_VLLM_COMMIT=a3319f4f04fbea7defe883e516df727711e516cd # use full commit hash from the main branch
 
 # Parse command line arguments
-GIT_URL=${1:-$DEFAULT_GIT_URL}
-BRANCH=${2:-$DEFAULT_BRANCH}
-export VLLM_COMMIT=${3:-$DEFAULT_VLLM_COMMIT}
-export VLLM_PRECOMPILED_WHEEL_LOCATION="https://wheels.vllm.ai/${DEFAULT_VLLM_COMMIT}/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
+GIT_URL=${1:-https://github.com/vllm-project/vllm.git}
+GIT_REF=${2:-cc99baf14dacc2497d0c5ed84e076ef2c37f6a4d}
+# NOTE: VLLM_USE_PRECOMPILED=1 didn't always seem to work since the wheels were sometimes built against an incompatible torch/cuda combo.
+# This commit was chosen as one close to the v0.10 release: git merge-base --fork-point origin/main tags/v0.10.0
+VLLM_WHEEL_COMMIT=${3:-862f2ef893d9751db0a92bd2d4ae0e3d9677872f}  # use full commit hash from the main branch
+export VLLM_PRECOMPILED_WHEEL_LOCATION="https://wheels.vllm.ai/${VLLM_WHEEL_COMMIT}/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
 
 BUILD_DIR=$(realpath "$SCRIPT_DIR/../3rdparty/vllm")
 if [[ -e "$BUILD_DIR" ]]; then
@@ -36,24 +35,30 @@ fi
 
 echo "Building vLLM from:"
 echo "  Vllm Git URL: $GIT_URL"
-echo "  Vllm Branch: $BRANCH"
-echo "  VLLM Wheel Commit: $VLLM_COMMIT"
-echo "  VLLM Precompiled Wheel Location: $VLLM_PRECOMPILED_WHEEL_LOCATION"
+echo "  Vllm Git ref: $GIT_REF"
+echo "  Vllm Wheel commit: $VLLM_WHEEL_COMMIT"
+echo "  Vllm Wheel location: $VLLM_PRECOMPILED_WHEEL_LOCATION"
 
 # Clone the repository
 echo "Cloning repository..."
 git clone "$GIT_URL" "$BUILD_DIR"
 cd "$BUILD_DIR"
-git checkout "$BRANCH"
+git checkout "$GIT_REF"
 
 # Create a new Python environment using uv
 echo "Creating Python environment..."
+# Pop the project environment set by user to not interfere with the one we create for the vllm repo
+OLD_UV_PROJECT_ENVIRONMENT=$UV_PROJECT_ENVIRONMENT
+unset UV_PROJECT_ENVIRONMENT
 uv venv
 
 # Remove all comments from requirements files to prevent use_existing_torch.py from incorrectly removing xformers
 echo "Removing comments from requirements files..."
 find requirements/ -name "*.txt" -type f -exec sed -i 's/#.*$//' {} \; 2>/dev/null || true
 find requirements/ -name "*.txt" -type f -exec sed -i '/^[[:space:]]*$/d' {} \; 2>/dev/null || true
+# Replace xformers==.* (but preserve any platform markers at the end)
+# NOTE: that xformers is bumped from 0.0.30 to 0.0.31 to work with torch==2.7.1. This version may need to change to change when we upgrade torch.
+find requirements/ -name "*.txt" -type f -exec sed -i -E 's/^(xformers)==[^;[:space:]]*/\1==0.0.32.post1/' {} \; 2>/dev/null || true
 
 uv run --no-project use_existing_torch.py
 
@@ -61,7 +66,7 @@ uv run --no-project use_existing_torch.py
 echo "Installing dependencies..."
 uv pip install --upgrade pip
 uv pip install numpy setuptools setuptools_scm
-uv pip install torch==2.7.0 --torch-backend=cu128
+uv pip install torch==2.8.0 --torch-backend=cu128
 
 # Install vLLM using precompiled wheel
 echo "Installing vLLM with precompiled wheel..."
@@ -69,5 +74,96 @@ uv pip install --no-build-isolation -e .
 
 echo "Build completed successfully!"
 echo "The built vLLM is available in: $BUILD_DIR"
-echo "You can now update your pyproject.toml to use this local version."
-echo "Follow instructions on https://github.com/NVIDIA-NeMo/RL/blob/main/docs/guides/use-custom-vllm.md for how to configure your local NeMo RL environment to use this custom vLLM."
+
+echo "Updating repo pyproject.toml to point vLLM to local clone..."
+
+PYPROJECT_TOML="$REPO_ROOT/pyproject.toml"
+if [[ ! -f "$PYPROJECT_TOML" ]]; then
+  echo "[ERROR] pyproject.toml not found at $PYPROJECT_TOML. This script must be run from the repo root and pyproject.toml must exist."
+  exit 1
+fi
+
+cd "$REPO_ROOT"
+
+export UV_PROJECT_ENVIRONMENT=$OLD_UV_PROJECT_ENVIRONMENT
+if [[ -n "$UV_PROJECT_ENVIRONMENT" ]]; then
+    # We optionally set this if the project environment is outside of the project directory.
+    # If we do not set this then uv pip install commands will fail
+    export VIRTUAL_ENV=$UV_PROJECT_ENVIRONMENT
+fi
+# Use tomlkit via uv to idempotently update pyproject.toml
+uv run --no-project --with tomlkit python - <<'PY'
+from pathlib import Path
+from tomlkit import parse, dumps, inline_table
+
+pyproject_path = Path("pyproject.toml")
+text = pyproject_path.read_text()
+doc = parse(text)
+
+# 1) Ensure setuptools_scm in [project].dependencies
+project = doc.get("project")
+if project is None:
+    raise SystemExit("[ERROR] Missing [project] in pyproject.toml")
+
+deps = project.get("dependencies")
+
+if not any(x.startswith("setuptools_scm") for x in deps):
+    deps.append("setuptools_scm")
+
+# 2) Update [project.optional-dependencies].vllm: unpin vllm==... -> vllm
+opt = project.get("optional-dependencies")
+vllm_list = opt["vllm"]
+# Remove any pinned vllm==...
+keep_items = []
+has_unpinned_vllm = False
+for item in vllm_list:
+    s = str(item).strip()
+    if s.startswith("vllm=="):
+        continue
+    if s == "vllm":
+        has_unpinned_vllm = True
+    keep_items.append(item)
+if not has_unpinned_vllm:
+    keep_items.append("vllm")
+vllm_list.clear()
+for it in keep_items:
+    vllm_list.append(it)
+
+# 3) Add [tool.uv.sources].vllm = { path = "3rdparty/vllm", editable = true }
+tool = doc.setdefault("tool", {})
+uv = tool.setdefault("uv", {})
+sources = uv.setdefault("sources", {})
+desired = inline_table()
+desired.update({"path": "3rdparty/vllm", "editable": True})
+sources["vllm"] = desired
+
+# 4) Ensure [tool.uv].no-build-isolation-package includes "vllm"
+nbip = uv.setdefault("no-build-isolation-package", [])
+nbip_strs = [str(x) for x in nbip]
+if "vllm" not in nbip_strs:
+    nbip.append("vllm")
+
+pyproject_path.write_text(dumps(doc))
+print("[INFO] Updated pyproject.toml for local vLLM.")
+PY
+
+# Ensure build deps and re-lock
+uv pip install setuptools_scm
+uv lock
+
+# Write to a file that a docker build will use to set the necessary env vars
+cat <<EOF >$BUILD_DIR/nemo-rl.env
+export VLLM_GIT_REF=$GIT_REF
+export VLLM_PRECOMPILED_WHEEL_LOCATION=$VLLM_PRECOMPILED_WHEEL_LOCATION
+EOF
+
+cat <<EOF
+[INFO] pyproject.toml updated. NeMo RL is now configured to use the local vLLM at 3rdparty/vllm.
+[INFO] Verify this new vllm version by running:
+
+VLLM_PRECOMPILED_WHEEL_LOCATION=$VLLM_PRECOMPILED_WHEEL_LOCATION \\
+  uv run --extra vllm vllm serve Qwen/Qwen3-0.6B
+
+[INFO] For more information on this custom install, visit https://github.com/NVIDIA-NeMo/RL/blob/main/docs/guides/use-custom-vllm.md
+[IMPORTANT] Remember to set the shell variable 'VLLM_PRECOMPILED_WHEEL_LOCATION' when running NeMo RL apps with this custom vLLM to avoid re-compiling.
+EOF
diff --git a/tools/code_snapshot.sh b/tools/code_snapshot.sh
index 62136a8632..95474a4a6c 100644
--- a/tools/code_snapshot.sh
+++ b/tools/code_snapshot.sh
@@ -16,12 +16,14 @@ if [[ ! -e "$PROJECT_ROOT/.git" ]]; then
 elif [[ $# -lt 1 ]]; then
   echo2 "[Error]: This script requires one argument: the name of the experiment to be used as the snapshot directory name"
   echo2 "Usage: bash tools/code_snapshot.sh <experiment_name>"
+  echo2 "Usage: CODE_SNAPSHOT_DIRNAME=code_snapshots_dbg bash tools/code_snapshot.sh <experiment_name>"
   exit 1
 fi
 
 EXP_NAME=$1
+CODE_SNAPSHOT_DIRNAME=${CODE_SNAPSHOT_DIRNAME:-code_snapshots}
 
-SNAPSHOT_DIR="$PROJECT_ROOT/code_snapshots/${EXP_NAME}"
+SNAPSHOT_DIR="$PROJECT_ROOT/${CODE_SNAPSHOT_DIRNAME}/${EXP_NAME}"
 if [[ ! -d "$SNAPSHOT_DIR" ]]; then
   echo2 "Creating new code snapshot in $SNAPSHOT_DIR"
   mkdir -p $SNAPSHOT_DIR
@@ -32,9 +34,11 @@ else
   exit
 fi
 
-echo2 "Copying git-tracked files..."
-rsync -a --files-from=<(git ls-files) ./ $SNAPSHOT_DIR/
+echo2 "Copying git-tracked files and submodules..."
+rsync -a --files-from=<(
+  git ls-files --recurse-submodules --cached --full-name
+) ./ $SNAPSHOT_DIR/
 
 
 # Echo the snapshot directory so the caller can use it to `cd` into it
-echo ${SNAPSHOT_DIR}
\ No newline at end of file
+echo ${SNAPSHOT_DIR}
diff --git a/tools/config_cli.py b/tools/config_cli.py
new file mode 100755
index 0000000000..04780e7747
--- /dev/null
+++ b/tools/config_cli.py
@@ -0,0 +1,527 @@
+#!/usr/bin/env -S uv run --script -q
+# /// script
+# dependencies = [
+#   "omegaconf"
+# ]
+# ///
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Utilities for working with YAML configs in this repo.
+
+Subcommands:
+  - expand: Resolve a config with OmegaConf interpolation and inheritance.
+  - minimize: Given a base config and a config, remove keys in the config that
+    are equal to the base, and ensure a defaults entry pointing to the base
+    exists. The defaults path in the resulting config is written relative to
+    the base config file.
+  - minimize-check: Same args as `minimize` but only checks if minimization
+    would change the file; exits non-zero if changes are needed.
+
+The `expand` and `minimize` commands support printing to stdout or in-place editing of the config file.
+
+Example:
+  # Expand a config with a root level "defaults" key to see the full config; print to stdout
+  tools/config_cli.py expand examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.yaml
+
+  # Expand a config with a root level "defaults" key to see the full config; edit the config in place
+  tools/config_cli.py expand examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.yaml --in-place
+
+  # Minimize a config and remove all keys that are present in the base config; print to stdout
+  # tools/config_cli.py minimize <base_config> <config>
+  tools/config_cli.py minimize examples/configs/dpo.yaml examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.yaml
+
+  # Minimize a config and remove all keys that are present in the base config; edit the config in place
+  # tools/config_cli.py minimize <base_config> <config>
+  tools/config_cli.py minimize examples/configs/dpo.yaml examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.yaml --in-place
+
+  # Minimize all llm the configs:
+  for algo in grpo dpo sft distillation; do
+    base_config=examples/configs/${algo}.yaml
+    if [[ ${algo} == grpo ]]; then
+      base_config=examples/configs/grpo_math_1B.yaml
+    elif [[ ${algo} == distillation ]]; then
+      base_config=examples/configs/distillation_math.yaml
+    fi
+    for recipe in examples/configs/recipes/llm/${algo}-*.yaml; do
+      tools/config_cli.py minimize $base_config $recipe --in-place
+    done
+  done
+
+  # Minimize vlm configs:
+  for recipe in examples/configs/recipes/vlm/vlm_grpo-*.yaml; do
+    tools/config_cli.py minimize examples/configs/vlm_grpo_3B.yaml $recipe --in-place
+  done
+
+  # Compare two configs
+  tools/config_cli.py compare examples/configs/grpo_math_1B.yaml examples/configs/grpo_math_8B.yaml
+
+  # Minimize a config and compare it to not minimzing (should be the same)
+  tools/config_cli.py minimize examples/configs/dpo.yaml examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.yaml >examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.yaml.minimized
+  tools/config_cli.py compare \
+    examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.yaml \
+    examples/configs/recipes/llm/dpo-llama3.1-8b-instruct-4n8g-fsdp2tp2-quick.v2.yaml.minimized
+"""
+
+import argparse
+import sys
+from pathlib import Path
+
+# ============================================================================
+# VENDORED SECTION: Minimal self-contained config loader (no nemo_rl dependency)
+#
+# Original source: `nemo_rl/utils/config.py`
+#   - Functions adapted: `resolve_path`, `load_config_with_inheritance`, `load_config`
+#   - Purpose: avoid importing from nemo_rl so this script is standalone
+#   - If upstream changes, consider updating this vendored block accordingly
+# ============================================================================
+from typing import Any, Iterable, Optional, Union, cast
+
+from omegaconf import DictConfig, ListConfig, OmegaConf
+
+
+def resolve_path(base_path: Path, path: str) -> Path:
+    """Resolve a path relative to the base path."""
+    if path.startswith("/"):
+        return Path(path)
+    return base_path / path
+
+
+def load_config_with_inheritance(
+    config_path: Union[str, Path],
+    base_dir: Optional[Union[str, Path]] = None,
+) -> DictConfig:
+    """Load a config file with inheritance support.
+
+    Args:
+        config_path: Path to the config file
+        base_dir: Base directory for resolving relative paths. If None, uses config_path's directory
+
+    Returns:
+        Merged config dictionary
+    """
+    config_path = Path(config_path)
+    if base_dir is None:
+        base_dir = config_path.parent
+    base_dir = Path(base_dir)
+
+    config = OmegaConf.load(config_path)
+    assert isinstance(config, DictConfig), (
+        "Config must be a Dictionary Config (List Config not supported)"
+    )
+
+    # Handle inheritance
+    if "defaults" in config:
+        defaults = config.pop("defaults")
+        if isinstance(defaults, (str, Path)):
+            defaults = [defaults]
+        elif isinstance(defaults, ListConfig):
+            defaults = [str(d) for d in defaults]
+
+        # Load and merge all parent configs
+        base_config = OmegaConf.create({})
+        for default in defaults:
+            parent_path = resolve_path(base_dir, str(default))
+            parent_config = load_config_with_inheritance(parent_path, base_dir)
+            base_config = cast(DictConfig, OmegaConf.merge(base_config, parent_config))
+
+        # Merge with current config
+        config = cast(DictConfig, OmegaConf.merge(base_config, config))
+
+    return config
+
+
+def load_config(config_path: Union[str, Path]) -> DictConfig:
+    """Load a config file with inheritance support and convert it to an OmegaConf object.
+
+    The config inheritance system supports:
+
+    1. Single inheritance:
+        ```yaml
+        # child.yaml
+        defaults: parent.yaml
+        common:
+          value: 43
+        ```
+
+    2. Multiple inheritance:
+        ```yaml
+        # child.yaml
+        defaults:
+          - parent1.yaml
+          - parent2.yaml
+        common:
+          value: 44
+        ```
+
+    3. Nested inheritance:
+        ```yaml
+        # parent.yaml
+        defaults: grandparent.yaml
+        common:
+          value: 43
+
+        # child.yaml
+        defaults: parent.yaml
+        common:
+          value: 44
+        ```
+
+    4. Variable interpolation:
+        ```yaml
+        # parent.yaml
+        base_value: 42
+        derived:
+          value: ${base_value}
+
+        # child.yaml
+        defaults: parent.yaml
+        base_value: 43  # This will update both base_value and derived.value
+        ```
+
+    The system handles:
+    - Relative and absolute paths
+    - Multiple inheritance
+    - Nested inheritance
+    - Variable interpolation
+
+    The inheritance is resolved depth-first, with later configs overriding earlier ones.
+    This means in multiple inheritance, the last config in the list takes precedence.
+
+    Args:
+        config_path: Path to the config file
+
+    Returns:
+        Merged config dictionary
+    """
+    return load_config_with_inheritance(config_path)
+
+
+# ============================================================================
+# END VENDORED SECTION
+# ============================================================================
+
+
+def _dict_like(obj: Any) -> bool:
+    return isinstance(obj, dict)
+
+
+def _list_like(obj: Any) -> bool:
+    return isinstance(obj, list)
+
+
+REMOVE = object()
+
+
+def _prune_equal(a: Any, b: Any) -> Any:
+    """Return a copy of `a` with entries equal to `b` removed.
+
+    - If both are dicts: recursively prune and drop keys whose subtree is empty
+      after pruning or equal.
+    - If both are lists of same length: recursively prune by index and drop list
+      if becomes entirely empty or equal.
+    - Else: if equal, return a sentinel indicating removal; otherwise return `a`.
+    """
+    if _dict_like(a) and _dict_like(b):
+        out: dict[str, Any] = {}
+        a_dict: dict[str, Any] = a  # type: ignore[assignment]
+        b_dict: dict[str, Any] = b  # type: ignore[assignment]
+        for key, a_val in a_dict.items():
+            if key in b_dict:
+                pruned = _prune_equal(a_val, b_dict[key])
+                if pruned is REMOVE:
+                    # equal, skip
+                    continue
+                # keep if subtree has content
+                if pruned != {} and pruned != []:
+                    out[key] = pruned
+            else:
+                out[key] = a_val
+        return out
+
+    if _list_like(a) and _list_like(b) and len(a) == len(b):
+        # Only remove if entire list equals base; avoid partial list pruning
+        # to prevent semantic changes in ordered config sections.
+        if a == b:
+            return REMOVE
+        return a
+
+    # Base types
+    if a == b:
+        return REMOVE
+    return a
+
+
+def _ensure_defaults_relative(
+    child_path: Path, base_path: Path, child_cfg: dict[str, Any]
+) -> None:
+    """Ensure `defaults:` points to the base, with a path relative to the base config file.
+
+    The path we store must be a string such that, when the resulting minimized
+    config sits at `child_path`, the `defaults` string references the base
+    config location. The instruction asks that the defaults path in the resulting
+    config is relative to the base config; we interpret this as "express `base`
+    relative to the directory of the base file", then make that path relative
+    to the child config so that hydra resolution works from the child file.
+    """
+    # Compute a relative reference from child dir to base file
+    import os
+
+    rel_from_child_to_base = os.path.relpath(
+        str(base_path), start=str(child_path.parent)
+    )
+
+    existing = child_cfg.get("defaults")
+    if existing is None:
+        child_cfg["defaults"] = str(rel_from_child_to_base)
+        return
+    # Normalize various forms: string, single list element, list
+    if isinstance(existing, str):
+        existing_list: list[Any] = [existing]
+    else:
+        existing_list = list(existing) if isinstance(existing, Iterable) else [existing]
+    # Put our base at the first position if not present
+    if str(rel_from_child_to_base) not in [str(x) for x in existing_list]:
+        existing_list.insert(0, str(rel_from_child_to_base))
+    # If it's a single element list, collapse to string for this repo's style
+    if len(existing_list) == 1:
+        child_cfg["defaults"] = existing_list[0]
+    else:
+        child_cfg["defaults"] = existing_list
+
+
+def expand(args: argparse.Namespace) -> int:
+    # Merge defaults/inheritance using repo loader; preserve ${...}
+    cfg = load_config(str(Path(args.config).resolve()))
+    # Preserve ${...} by not resolving
+    text = OmegaConf.to_yaml(cfg)
+    if args.in_place:
+        Path(args.config).write_text(text)
+    else:
+        print(text + ("\n" if not text.endswith("\n") else ""), end="")
+    return 0
+
+
+def minimize(args: argparse.Namespace) -> int:
+    child_path = Path(args.config).resolve()
+    base_path = Path(args.base).resolve()
+
+    child_cfg_raw = OmegaConf.load(child_path)
+    if not isinstance(child_cfg_raw, DictConfig):
+        raise TypeError(
+            f"Config at {child_path} must be a mapping (DictConfig), got {type(child_cfg_raw)}"
+        )
+    base_cfg_raw = OmegaConf.load(base_path)
+    if not isinstance(base_cfg_raw, DictConfig):
+        raise TypeError(
+            f"Config at {base_path} must be a mapping (DictConfig), got {type(base_cfg_raw)}"
+        )
+
+    # Resolve both before comparison
+    child_resolved = OmegaConf.to_container(child_cfg_raw)
+    base_resolved = OmegaConf.to_container(base_cfg_raw)
+
+    if not isinstance(child_resolved, dict) or not isinstance(base_resolved, dict):
+        raise TypeError("Both child and base configs must be mappings after resolution")
+
+    pruned = _prune_equal(child_resolved, base_resolved)
+
+    # Ensure mapping output
+    if pruned is None or not isinstance(pruned, dict):
+        pruned = {} if pruned is None else {"value": pruned}
+
+    # Ensure defaults reference base (relative path from child)
+    _ensure_defaults_relative(child_path, base_path, pruned)
+
+    # Ensure `defaults` appears first in the top-level mapping
+    if "defaults" in pruned:
+        pruned = {"defaults": pruned["defaults"], **pruned}
+
+    # Emit
+    text = OmegaConf.to_yaml(OmegaConf.create(pruned))
+    if args.in_place:
+        Path(args.config).write_text(text)
+    else:
+        print(text + ("\n" if not text.endswith("\n") else ""), end="")
+    return 0
+
+
+def _flatten(d: Any, prefix: str = "") -> dict[str, Any]:
+    out: dict[str, Any] = {}
+    if isinstance(d, dict):
+        for k, v in d.items():
+            key = f"{prefix}.{k}" if prefix else str(k)
+            out.update(_flatten(v, key))
+    elif isinstance(d, list):
+        for i, v in enumerate(d):
+            key = f"{prefix}[{i}]"
+            out.update(_flatten(v, key))
+    else:
+        out[prefix] = d
+    return out
+
+
+def compare(args: argparse.Namespace) -> int:
+    left_path = Path(args.left).resolve()
+    right_path = Path(args.right).resolve()
+
+    # Expand via repo loader, then convert to plain dict/list so _flatten works
+    left = OmegaConf.to_container(load_config(str(left_path)))  # type: ignore[assignment]
+    right = OmegaConf.to_container(load_config(str(right_path)))  # type: ignore[assignment]
+
+    lf = _flatten(left)
+    rf = _flatten(right)
+
+    left_keys = set(lf.keys())
+    right_keys = set(rf.keys())
+
+    added = sorted(right_keys - left_keys)
+    removed = sorted(left_keys - right_keys)
+    common = sorted(left_keys & right_keys)
+
+    changed: list[str] = []
+    for k in common:
+        if lf[k] != rf[k]:
+            changed.append(k)
+
+    if not added and not removed and not changed:
+        print("Configs are identical after expansion")
+        return 0
+
+    # Print concise report with explicit left/right context
+    print("Comparing configs after expansion:")
+    print(f"  Left : {left_path}")
+    print(f"  Right: {right_path}")
+
+    if added:
+        print("\nAdded in Right (missing in Left):")
+        for k in added:
+            print(f"  {k} = {rf[k]}")
+
+    if removed:
+        print("\nRemoved in Right (only in Left):")
+        for k in removed:
+            print(f"  {k} = {lf[k]}")
+
+    if changed:
+        print("\nChanged (Left -> Right):")
+        for k in changed:
+            print(f"  {k}: {lf[k]} -> {rf[k]}")
+    return 0
+
+
+def minimize_check(args: argparse.Namespace) -> int:
+    """Check if minimizing would change the file. Exit non-zero if so.
+
+    Args (same as `minimize`):
+      base: Base config path
+      config: Child config path
+    """
+    child_path = Path(args.config).resolve()
+    base_path = Path(args.base).resolve()
+
+    # Compute minimized text (same as minimize())
+    child_cfg_raw = OmegaConf.load(child_path)
+    base_cfg_raw = OmegaConf.load(base_path)
+    if not isinstance(child_cfg_raw, DictConfig) or not isinstance(
+        base_cfg_raw, DictConfig
+    ):
+        print(
+            f"[minimize-check] Both child and base must be mappings: {child_path} vs {base_path}",
+            file=sys.stderr,
+        )
+        return 2
+
+    child_resolved = OmegaConf.to_container(child_cfg_raw)
+    base_resolved = OmegaConf.to_container(base_cfg_raw)
+    if not isinstance(child_resolved, dict) or not isinstance(base_resolved, dict):
+        print(
+            f"[minimize-check] Both child and base must resolve to mappings: {child_path} vs {base_path}",
+            file=sys.stderr,
+        )
+        return 2
+
+    pruned = _prune_equal(child_resolved, base_resolved)
+    if pruned is None or not isinstance(pruned, dict):
+        pruned = {} if pruned is None else {"value": pruned}
+    _ensure_defaults_relative(child_path, base_path, pruned)
+    if "defaults" in pruned:
+        pruned = {"defaults": pruned["defaults"], **pruned}
+    minimized_text = OmegaConf.to_yaml(OmegaConf.create(pruned))
+
+    # Normalize current file via OmegaConf to reduce noise from formatting differences
+    try:
+        current_norm_text = OmegaConf.to_yaml(OmegaConf.load(child_path))
+    except Exception:
+        current_norm_text = child_path.read_text()
+
+    if current_norm_text != minimized_text:
+        print(
+            f"[minimize-check] {child_path} is not minimized.\n"
+            f"  Suggested fix: tools/config_cli.py minimize {base_path} {child_path} --in-place",
+            file=sys.stderr,
+        )
+        return 1
+
+    return 0
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Config tools (expand, minimize)")
+    sub = parser.add_subparsers(dest="cmd", required=True)
+
+    p_expand = sub.add_parser("expand", help="Resolve a config with OmegaConf")
+    p_expand.add_argument("config", help="Path to config YAML")
+    p_expand.add_argument(
+        "--in-place",
+        action="store_true",
+        dest="in_place",
+        help="Edit file in place instead of printing",
+    )
+    p_expand.set_defaults(func=expand)
+
+    p_min = sub.add_parser(
+        "minimize",
+        help="Remove keys equal to base and ensure defaults reference base",
+    )
+    p_min.add_argument("base", help="Base config path")
+    p_min.add_argument("config", help="Child config path")
+    p_min.add_argument(
+        "--in-place",
+        action="store_true",
+        dest="in_place",
+        help="Edit file in place instead of printing",
+    )
+    p_min.set_defaults(func=minimize)
+
+    p_cmp = sub.add_parser(
+        "compare", help="Compare two configs after expanding their defaults"
+    )
+    p_cmp.add_argument("left", help="Left config path")
+    p_cmp.add_argument("right", help="Right config path")
+    p_cmp.set_defaults(func=compare)
+
+    p_minchk = sub.add_parser(
+        "minimize-check",
+        help=(
+            "Exit non-zero if minimizing would change the file; args mirror `minimize`"
+        ),
+    )
+    p_minchk.add_argument("base", help="Base config path")
+    p_minchk.add_argument("config", help="Child config path")
+    p_minchk.set_defaults(func=minimize_check)
+
+    args = parser.parse_args()
+    ret = args.func(args)
+    if isinstance(ret, int):
+        sys.exit(ret)
diff --git a/tools/launch b/tools/launch
index 4c76cee78d..4c76651cea 100755
--- a/tools/launch
+++ b/tools/launch
@@ -150,6 +150,7 @@ for SCRIPT in $SCRIPTS; do
 SCRIPT_DIR=\$( cd -- "\$( dirname -- "\${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
 cd \$SCRIPT_DIR
 
+${EXTRA_ENV:-} \\
 HF_HOME=$HF_HOME \\
 HF_DATASETS_CACHE=$HF_DATASETS_CACHE \\
 COMMAND="apt install -y jq && uv run $rel_script ${RELEASE_ARGS[@]}" \\
@@ -158,7 +159,7 @@ MOUNTS="$SNAPSHOT_DIR:$SNAPSHOT_DIR${MOUNTS}" \\
 sbatch \\
     --nodes=$NUM_NODES \\
     --account=$ACCOUNT \\
-    --job-name=$ACCOUNT:$JOB_NAME \\
+    --job-name=$ACCOUNT:${JOB_NAME}${SLURM_JOB_SUFFIX:-} \\
     --partition=$PARTITION \\
     --time=0:${NUM_MINUTES}:0 \\
     --gres=gpu:8 \\
diff --git a/tools/model_diagnostics/3.check_hf_model_embeddings_untrained.py b/tools/model_diagnostics/3.check_hf_model_embeddings_untrained.py
new file mode 100755
index 0000000000..d3684df45c
--- /dev/null
+++ b/tools/model_diagnostics/3.check_hf_model_embeddings_untrained.py
@@ -0,0 +1,288 @@
+#!/usr/bin/env python3
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Model Diagnostic: Check HuggingFace Model Embeddings for Untrained Patterns.
+
+This script loads a HuggingFace model and analyzes the input and output embeddings
+to detect patterns that suggest the model may be untrained or improperly initialized.
+
+uv run --extra mcore 3.check_hf_model_embeddings_untrained.py --model nvidia/Nemotron-H-8B-Base-8K
+"""
+
+import argparse
+
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+
+
+def format_index_ranges(indices):
+    """Format a list of indices into range strings like '0-1,3-6'."""
+    if not indices:
+        return ""
+
+    ranges = []
+    start = end = indices[0]
+
+    for i in range(1, len(indices)):
+        if indices[i] == end + 1:
+            end = indices[i]
+        else:
+            ranges.append(str(start) if start == end else f"{start}-{end}")
+            start = end = indices[i]
+
+    # Add the last range
+    ranges.append(str(start) if start == end else f"{start}-{end}")
+    return ",".join(ranges)
+
+
+def get_token_info(tokenizer, idx):
+    """Get token information for a given index."""
+    if not tokenizer:
+        return "N/A"
+    try:
+        return repr(tokenizer.decode([idx]))
+    except Exception:
+        return "N/A"
+
+
+def print_problematic_embeddings(
+    weights, indices, problem_type, metric_values, threshold, tokenizer=None
+):
+    """Print detailed information about each problematic embedding."""
+    if not indices:
+        return
+
+    print(f"\n--- Detailed {problem_type} Embeddings ---")
+    for idx in indices:
+        embedding = weights[idx]
+        metric_val = metric_values[idx].item()
+        token_info = get_token_info(tokenizer, idx)
+
+        # Get first 2 and last 2 values
+        first_two = embedding[:2].tolist()
+        last_two = embedding[-2:].tolist()
+
+        print(
+            f"Index {idx}: {problem_type} (metric: {metric_val:.2e} > {threshold:.2e})"
+        )
+        print(f"  Token: {token_info}")
+        print(
+            f"  Values: [{first_two[0]:.2e}, {first_two[1]:.2e}, ..., {last_two[0]:.2e}, {last_two[1]:.2e}]"
+        )
+
+
+def find_output_embeddings(model):
+    """Find the output embeddings layer in various model architectures."""
+    if hasattr(model, "get_output_embeddings"):
+        return model.get_output_embeddings()
+    elif hasattr(model, "lm_head"):
+        return model.lm_head
+    elif hasattr(model, "embed_out"):
+        return model.embed_out
+    return None
+
+
+def check_embedding_layer(
+    embeddings,
+    layer_name,
+    near_zero_threshold,
+    identical_threshold,
+    tokenizer=None,
+    model=None,
+):
+    """Check an embedding layer for untrained patterns."""
+    print(f"\n=== {layer_name} Analysis ===")
+
+    # Check if embeddings are tied (for output embeddings)
+    tied_info = ""
+    if layer_name == "Output Embeddings" and model and hasattr(model, "config"):
+        tied = getattr(model.config, "tie_word_embeddings", False)
+        tied_info = f" (Tied: {tied})"
+        print(f"Tied word embeddings: {tied}")
+
+    # Get embedding weights
+    weights = (
+        embeddings.weight.data if hasattr(embeddings, "weight") else embeddings.data
+    )
+
+    print(f"Shape: {weights.shape}")
+    print(f"Dtype: {weights.dtype}")
+
+    # Check for near-zero embeddings
+    near_zero_mask = torch.abs(weights) < near_zero_threshold
+    near_zero_rows = near_zero_mask.all(dim=1)
+    near_zero_indices = torch.where(near_zero_rows)[0].tolist()
+
+    # Check for identical embeddings using standard deviation
+    row_stds = weights.std(dim=1)
+    identical_mask = row_stds < identical_threshold
+    identical_indices = torch.where(identical_mask)[0].tolist()
+
+    # Print detailed problematic embeddings
+    max_abs_values = torch.abs(weights).max(dim=1)[0]
+    print_problematic_embeddings(
+        weights,
+        near_zero_indices,
+        "Near-zero",
+        max_abs_values,
+        near_zero_threshold,
+        tokenizer,
+    )
+    print_problematic_embeddings(
+        weights,
+        identical_indices,
+        "Identical",
+        row_stds,
+        identical_threshold,
+        tokenizer,
+    )
+
+    # Return summary data instead of printing
+    num_near_zero = len(near_zero_indices)
+    num_identical = len(identical_indices)
+    total_embeddings = weights.shape[0]
+
+    # Flag potential issues
+    issues = []
+    if num_near_zero > 0:
+        issues.append(f"{num_near_zero} near-zero embeddings")
+    if num_identical > 0:
+        issues.append(f"{num_identical} identical embeddings")
+
+    return {
+        "layer_name": layer_name,
+        "tied_info": tied_info,
+        "shape": weights.shape,
+        "dtype": weights.dtype,
+        "num_near_zero": num_near_zero,
+        "num_identical": num_identical,
+        "total_embeddings": total_embeddings,
+        "near_zero_indices": near_zero_indices,
+        "identical_indices": identical_indices,
+        "near_zero_threshold": near_zero_threshold,
+        "identical_threshold": identical_threshold,
+        "mean_abs": torch.abs(weights).mean().item(),
+        "max_abs": torch.abs(weights).max().item(),
+        "min_std": row_stds.min().item(),
+        "max_std": row_stds.max().item(),
+        "issues": issues,
+    }
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Check HuggingFace model embeddings for untrained patterns"
+    )
+    parser.add_argument(
+        "--model",
+        default="nvidia/Nemotron-H-8B-Base-8K",
+        help="HuggingFace model name or path",
+    )
+    parser.add_argument(
+        "--near-zero-threshold",
+        type=float,
+        default=1e-10,
+        help="Threshold for detecting near-zero embeddings (default: 1e-10)",
+    )
+    parser.add_argument(
+        "--identical-threshold",
+        type=float,
+        default=1e-8,
+        help="Threshold for detecting identical embeddings via std dev (default: 1e-8)",
+    )
+
+    args = parser.parse_args()
+
+    print(f"Loading model: {args.model}")
+
+    # Load model and tokenizer
+    model = AutoModelForCausalLM.from_pretrained(
+        args.model, torch_dtype="auto", trust_remote_code=True
+    )
+    tokenizer = AutoTokenizer.from_pretrained(args.model, trust_remote_code=True)
+
+    print("Model loaded successfully")
+    print(f"Model type: {type(model).__name__}")
+    print(f"Vocabulary size: {len(tokenizer)}")
+
+    # Collect summary data from both embeddings
+    summaries = []
+
+    # Check input embeddings
+    input_embeddings = model.get_input_embeddings()
+    if input_embeddings is not None:
+        input_summary = check_embedding_layer(
+            input_embeddings,
+            "Input Embeddings",
+            args.near_zero_threshold,
+            args.identical_threshold,
+            tokenizer,
+            model,
+        )
+        summaries.append(input_summary)
+    else:
+        print("\n⚠️  Could not find input embeddings layer")
+
+    # Check output embeddings
+    output_embeddings = find_output_embeddings(model)
+    if output_embeddings is not None:
+        output_summary = check_embedding_layer(
+            output_embeddings,
+            "Output Embeddings",
+            args.near_zero_threshold,
+            args.identical_threshold,
+            tokenizer,
+            model,
+        )
+        summaries.append(output_summary)
+    else:
+        print("\n⚠️  Could not find output embeddings layer")
+
+    # Print summaries together
+    print("\n" + "=" * 80)
+    print("EMBEDDING SUMMARIES")
+    print("=" * 80)
+
+    for summary in summaries:
+        print(f"\n--- {summary['layer_name']} Summary{summary['tied_info']} ---")
+        print(f"Shape: {summary['shape']}, Dtype: {summary['dtype']}")
+
+        print(
+            f"Near-zero embeddings (abs < {summary['near_zero_threshold']:.2e}): {summary['num_near_zero']}/{summary['total_embeddings']} ({100 * summary['num_near_zero'] / summary['total_embeddings']:.1f}%)"
+        )
+        if summary["near_zero_indices"]:
+            print(f"  Indices: {format_index_ranges(summary['near_zero_indices'])}")
+
+        print(
+            f"Identical embeddings (std < {summary['identical_threshold']:.2e}): {summary['num_identical']}/{summary['total_embeddings']} ({100 * summary['num_identical'] / summary['total_embeddings']:.1f}%)"
+        )
+        if summary["identical_indices"]:
+            print(f"  Indices: {format_index_ranges(summary['identical_indices'])}")
+
+        print(
+            f"Statistics: mean_abs={summary['mean_abs']:.6f}, max_abs={summary['max_abs']:.6f}, std_range=[{summary['min_std']:.6f}, {summary['max_std']:.6f}]"
+        )
+
+        if summary["issues"]:
+            print(f"⚠️  POTENTIAL ISSUES: {', '.join(summary['issues'])}")
+        else:
+            print("✅ No obvious untrained patterns detected")
+
+    print("\n=== Final Summary ===")
+    print(f"Model: {args.model}")
+    print("Analysis complete.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/model_diagnostics/4.vllm_precision_compilation_test.py b/tools/model_diagnostics/4.vllm_precision_compilation_test.py
new file mode 100644
index 0000000000..276f88943f
--- /dev/null
+++ b/tools/model_diagnostics/4.vllm_precision_compilation_test.py
@@ -0,0 +1,242 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+import os
+from contextlib import contextmanager
+
+import numpy as np
+import torch
+from vllm import LLM, SamplingParams
+
+
+@contextmanager
+def environment(env_vars):
+    """Context manager to temporarily set environment variables.
+
+    Args:
+        env_vars (dict): Dictionary of environment variable names and values to set
+
+    Example:
+        with environment({"CUDA_VISIBLE_DEVICES": "0"}):
+            # Code here runs with CUDA_VISIBLE_DEVICES=0
+            pass
+        # Environment variables are restored here
+    """
+    # Store original values
+    original_values = {}
+    for key in env_vars:
+        if key in os.environ:
+            original_values[key] = os.environ[key]
+        else:
+            original_values[key] = None
+
+    # Set new values
+    for key, value in env_vars.items():
+        if value is None:
+            if key in os.environ:
+                del os.environ[key]
+        else:
+            os.environ[key] = str(value)
+
+    try:
+        yield
+    finally:
+        # Restore original values
+        for key, value in original_values.items():
+            if value is None:
+                if key in os.environ:
+                    del os.environ[key]
+            else:
+                os.environ[key] = value
+
+
+def extract_logprobs(logprobs):
+    output = []
+    for lp in logprobs:
+        if lp is not None:
+            output.append(list(lp.values())[0].logprob)
+    return output
+
+
+def pad_logprobs_list(logprobs_list):
+    """Pad a list of logprobs lists into a numpy array.
+
+    Args:
+        logprobs_list (list): List of lists, where each inner list contains logprobs
+
+    Returns:
+        np.ndarray: Padded numpy array with shape (num_sequences, max_length)
+    """
+    if not logprobs_list:
+        return np.array([])
+
+    max_length = max(len(lp) for lp in logprobs_list)
+    padded_array = np.full((len(logprobs_list), max_length), np.nan, dtype=np.float32)
+
+    for i, lp in enumerate(logprobs_list):
+        padded_array[i, : len(lp)] = lp
+
+    return padded_array
+
+
+def assert_logprobs_close(actual, expected, test_name, atol=1e-3, rtol=1e-3):
+    """Assert that two logprobs arrays are close to each other.
+
+    Args:
+        actual: The actual logprobs array
+        expected: The expected logprobs array
+        test_name (str): Name of the test for error messages
+        atol (float): Absolute tolerance
+        rtol (float): Relative tolerance
+    """
+    try:
+        np.testing.assert_allclose(actual, expected, atol=atol, rtol=rtol)
+        print(
+            f"{test_name}: PASSED - Arrays are close within tolerance (atol={atol}, rtol={rtol})"
+        )
+    except AssertionError as e:
+        print("=" * 100)
+        print(f"{test_name}: FAILED - Arrays are different")
+        print(f"  Detailed error: {e}")
+        print("=" * 100)
+
+
+def get_logprobs(llm, prompts, sampling_params):
+    outputs = llm.generate(prompts, sampling_params)
+    prompt_lps = []
+    generation_lps = []
+
+    # Collect all logprobs
+    for output in outputs:
+        prompt_logprobs = extract_logprobs(output.prompt_logprobs)
+        generation_logprobs = extract_logprobs(output.outputs[0].logprobs)
+        prompt_lps.append(prompt_logprobs)
+        generation_lps.append(generation_logprobs)
+
+    # Use common padding function
+    padded_prompt_lps = pad_logprobs_list(prompt_lps)
+    padded_generation_lps = pad_logprobs_list(generation_lps)
+
+    return padded_prompt_lps, padded_generation_lps
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--model",
+        type=str,
+        nargs="?",
+        default="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
+    )
+    args = parser.parse_args()
+    seed = 0
+
+    sampling_params = SamplingParams(
+        temperature=1.0,
+        top_p=1.0,
+        max_tokens=8192,
+        prompt_logprobs=0,
+        logprobs=0,
+        seed=seed,
+    )
+
+    prompts = [
+        "Hello, my name is",
+        "The president of the United States is",
+        "The capital of France is",
+        "The future of AI is",
+        "<｜begin▁of▁sentence｜><｜User｜>Think step-by-step to solve the following problem. Output your answer inside of \\\\boxed{} tags.:\n$A B C D$ is a rectangle with $A B=20$ and $B C=3$. A circle with radius 5, centered at the midpoint of $D C$, meets the rectangle at four points: $W, X, Y$, and $Z$. Find the area of quadrilateral $W X Y Z$.\n\nLet's think step-by-step<｜Assistant｜><think>\n",
+    ]
+
+    common_llm_kwargs = {
+        "model": args.model,
+        "trust_remote_code": True,
+        "enable_prefix_caching": True,
+        "enable_chunked_prefill": True,
+    }
+
+    eager_prompt_lps, eager_generation_lps = get_logprobs(
+        LLM(enforce_eager=True, **common_llm_kwargs),  # eager mode for ground truth lps
+        prompts,
+        sampling_params,
+    )
+
+    torch.cuda.empty_cache()
+
+    cuda_graph_prompt_lps, cuda_graph_generation_lps = get_logprobs(
+        LLM(enforce_eager=False, **common_llm_kwargs),  # cuda graph mode
+        prompts,
+        sampling_params,
+    )
+
+    assert_logprobs_close(
+        cuda_graph_prompt_lps,
+        eager_prompt_lps,
+        "Eager and cuda graph mode lps (prompt lps)",
+    )
+    assert_logprobs_close(
+        cuda_graph_generation_lps,
+        eager_generation_lps,
+        "Eager and cuda graph mode lps (generation lps)",
+    )
+
+    torch.cuda.empty_cache()
+
+    with environment(env_vars={"TORCHINDUCTOR_EMULATE_PRECISION_CASTS": "1"}):
+        cuda_graph_prompt_lps_w_flag, cuda_graph_generation_lps_w_flag = get_logprobs(
+            LLM(enforce_eager=False, **common_llm_kwargs),
+            prompts,
+            sampling_params,
+        )
+
+    assert_logprobs_close(
+        cuda_graph_prompt_lps_w_flag,
+        eager_prompt_lps,
+        "Eager and cuda graph mode lps with torch inductor precision flag (prompt lps)",
+    )
+    assert_logprobs_close(
+        cuda_graph_generation_lps_w_flag,
+        eager_generation_lps,
+        "Eager and cuda graph mode lps with torch inductor precision flag (generation lps)",
+    )
+
+    torch.cuda.empty_cache()
+
+    (
+        cuda_graph_prompt_lps_w_inductor_disabled,
+        cuda_graph_generation_lps_w_inductor_disabled,
+    ) = get_logprobs(
+        LLM(
+            enforce_eager=False,
+            compilation_config={"use_inductor": False},
+            **common_llm_kwargs,
+        ),
+        prompts,
+        sampling_params,
+    )
+
+    assert_logprobs_close(
+        cuda_graph_prompt_lps_w_inductor_disabled,
+        eager_prompt_lps,
+        "Eager and cuda graph mode lps with use_inductor disabled (prompt lps)",
+    )
+    assert_logprobs_close(
+        cuda_graph_generation_lps_w_inductor_disabled,
+        eager_generation_lps,
+        "Eager and cuda graph mode lps with use_inductor disabled (generation lps)",
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/refit_verifier.py b/tools/refit_verifier.py
new file mode 100644
index 0000000000..5627fa9bf0
--- /dev/null
+++ b/tools/refit_verifier.py
@@ -0,0 +1,625 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Refitted Policy Comparison Script.
+
+This script compares logprobs between a Megatron policy and a vLLM policy
+after performing model weight refitting. It demonstrates the workflow for
+getting consistent logprobs across different inference backends.
+
+Usage:
+    uv run --extra mcore python3 tools/refit_verifier.py --model_name /path/to/model
+
+
+Example Output:
+
+--- Comparing Logprobs ---
+
+Input prompt: The following are multiple choice questions (with answers) about world religions.
+
+When was the first Buddhist temple constructed in Japan?
+A. 325 CE
+B. 119 CE
+C. 451 CE
+D. 596 CE
+Answer:
+Input tokens: tensor([200000,    954,   2182,    583,   6146,   9031,   5808,    330,   5992,
+      8860,     21,   1509,   3817,  99867,   1574,   7022,    812,    290,
+      1660, 120819,  55594,  24043,    310,  11197,   1044,     45,     26,
+       220,  23325,  13607,    198,     46,     26,    220,  12860,  13607,
+       198,     47,     26,    220,  34518,  13607,    198,     48,     26,
+       220,  43145,  13607,    198,   4984,     38])
+
+Comparing 10 generated tokens (from position 51 to 60):
+vLLM generated logprobs: tensor([-7.0227, -7.1559, -6.4603, -6.7419, -6.3026, -6.8391, -6.3128, -6.6454,
+    -7.1514, -6.8304])
+Megatron generated logprobs: tensor([-7.0225, -7.1873, -6.4600, -6.7418, -6.3027, -6.8704, -6.2502, -6.6453,
+    -7.1518, -6.8304])
+Absolute difference: tensor([2.0981e-04, 3.1348e-02, 2.6035e-04, 1.6689e-04, 1.4973e-04, 3.1272e-02,
+    6.2590e-02, 1.7643e-04, 3.2902e-04, 4.1485e-05])
+Mean absolute difference: 0.012654399499297142
+Max absolute difference: 0.06259012222290039
+
+--- Token-by-Token Comparison (Generated Tokens Only) ---
+Token           Token ID   Position   vLLM         Megatron     Diff
+---------------------------------------------------------------------------
+tok_51          pos_51     51         -7.022674    -7.022464    0.000210
+tok_52          pos_52     52         -7.155923    -7.187271    0.031348
+tok_53          pos_53     53         -6.460307    -6.460047    0.000260
+tok_54          pos_54     54         -6.741926    -6.741759    0.000167
+tok_55          pos_55     55         -6.302569    -6.302719    0.000150
+tok_56          pos_56     56         -6.839099    -6.870371    0.031272
+tok_57          pos_57     57         -6.312774    -6.250184    0.062590
+tok_58          pos_58     58         -6.645445    -6.645269    0.000176
+tok_59          pos_59     59         -7.151441    -7.151770    0.000329
+tok_60          pos_60     60         -6.830355    -6.830397    0.000041
+"""
+
+import argparse
+import copy
+
+import ray
+import torch
+from transformers import AutoTokenizer
+
+from nemo_rl.algorithms.grpo import refit_policy_generation
+from nemo_rl.distributed.batched_data_dict import BatchedDataDict
+from nemo_rl.distributed.virtual_cluster import RayVirtualCluster
+from nemo_rl.models.generation import configure_generation_config
+from nemo_rl.models.generation.vllm import VllmGeneration
+from nemo_rl.models.policy.lm_policy import Policy
+
+
+def parse_args():
+    """Parse command line arguments."""
+    parser = argparse.ArgumentParser(
+        description="Compare Megatron and vLLM policy logprobs after refitting"
+    )
+
+    parser.add_argument(
+        "--model_name",
+        type=str,
+        default="/root/checkpoints/llama4-scout-custom-init",
+        help="Path to the model checkpoint",
+    )
+    parser.add_argument(
+        "--tp_size",
+        type=int,
+        default=1,
+        help="Tensor parallelism size (TP) for Megatron",
+    )
+    parser.add_argument(
+        "--ep_size",
+        type=int,
+        default=1,
+        help="Expert parallelism size (EP) for Megatron",
+    )
+    parser.add_argument(
+        "--pp_size",
+        type=int,
+        default=1,
+        help="Pipeline parallelism size (PP) for Megatron",
+    )
+    parser.add_argument(
+        "--max_new_tokens",
+        type=int,
+        default=10,
+        help="Maximum number of new tokens to generate",
+    )
+    parser.add_argument(
+        "--max_sequence_length",
+        type=int,
+        default=256,
+        help="Maximum total sequence length",
+    )
+    parser.add_argument(
+        "--refit_buffer_size_gb", type=int, default=4, help="Refit buffer size in GB"
+    )
+    parser.add_argument(
+        "--prompt",
+        type=str,
+        default="Here is a short introduction to me:",
+        help="Input prompt for generation",
+    )
+
+    return parser.parse_args()
+
+
+def setup_configs(args, tokenizer):
+    """Setup configuration dictionaries for Megatron and vLLM.
+
+    Args:
+        args: Parsed command line arguments
+        tokenizer: HuggingFace tokenizer
+
+    Returns:
+        tuple: (megatron_config, vllm_config)
+    """
+    # Megatron Configuration
+    megatron_config = {
+        "model_name": args.model_name,
+        "training_backend": "megatron",
+        "train_global_batch_size": 1,
+        "train_micro_batch_size": 1,
+        "generation_batch_size": 2,
+        "learning_rate": 0.0001,
+        "logprob_batch_size": 1,
+        "generation": {
+            "max_total_sequence_length": args.max_sequence_length,
+            "max_new_tokens": args.max_sequence_length,
+            "do_sample": False,
+            "temperature": 1.0,
+            "pad_token_id": tokenizer.eos_token_id,
+            "colocated": {
+                "enabled": True,
+                "resources": {
+                    "gpus_per_node": None,
+                    "num_nodes": None,
+                },
+            },
+        },
+        "precision": "bfloat16",
+        "offload_optimizer_for_logprob": False,
+        "pipeline_dtype": "bfloat16",
+        "parallel_output": True,
+        "max_total_sequence_length": args.max_sequence_length,
+        "fsdp_offload_enabled": False,
+        "max_grad_norm": 1.0,
+        "refit_buffer_size_gb": args.refit_buffer_size_gb,
+        "make_sequence_length_divisible_by": args.tp_size,
+        "optimizer": {
+            "type": "adam",
+            "kwargs": {
+                "lr": 0.0001,
+                "weight_decay": 0.0,
+                "eps": 1e-8,
+            },
+        },
+        "dtensor_cfg": {
+            "enabled": False,
+        },
+        "dynamic_batching": {
+            "enabled": False,
+            "train_mb_tokens": 256,
+            "logprob_mb_tokens": 256,
+            "sequence_length_round": 64,
+        },
+        "sequence_packing": {
+            "enabled": False,
+        },
+        "megatron_cfg": {
+            "enabled": True,
+            "empty_unused_memory_level": 1,
+            "tensor_model_parallel_size": args.tp_size,
+            "sequence_parallel": False,
+            "expert_tensor_parallel_size": args.tp_size,
+            "expert_model_parallel_size": args.ep_size,
+            "pipeline_model_parallel_size": args.pp_size,
+            "context_parallel_size": 1,
+            "num_layers_in_first_pipeline_stage": None,
+            "num_layers_in_last_pipeline_stage": None,
+            "activation_checkpointing": False,
+            "moe_router_dtype": "fp64",
+            "moe_router_load_balancing_type": "none",
+            "moe_router_bias_update_rate": 0.0,
+            "moe_permute_fusion": False,
+            "pipeline_dtype": "bfloat16",
+            "train_iters": 1,
+            "bias_activation_fusion": False,
+            "freeze_moe_router": False,
+            "apply_rope_fusion": False,
+            "optimizer": {
+                "optimizer": "adam",
+                "lr": 5.0e-6,
+                "min_lr": 5.0e-7,
+                "weight_decay": 0.01,
+                "bf16": False,
+                "fp16": False,
+                "params_dtype": "float32",
+                # Adam optimizer settings
+                "adam_beta1": 0.9,
+                "adam_beta2": 0.999,
+                "adam_eps": 1e-8,
+                # SGD optimizer settings
+                "sgd_momentum": 0.9,
+                # Distributed optimizer settings
+                "use_distributed_optimizer": True,
+                "use_precision_aware_optimizer": True,
+                "clip_grad": 1.0,
+                # Optimizer CPU offload settings
+                "optimizer_cpu_offload": False,
+                "optimizer_offload_fraction": 0.0,
+            },
+            "scheduler": {
+                "start_weight_decay": 0.01,
+                "end_weight_decay": 0.01,
+                "weight_decay_incr_style": "constant",
+                "lr_decay_style": "constant",
+                "lr_decay_iters": None,
+                "lr_warmup_iters": 50,
+                "lr_warmup_init": 5.0e-7,
+            },
+            "distributed_data_parallel_config": {
+                "grad_reduce_in_fp32": False,
+                "overlap_grad_reduce": False,
+                "overlap_param_gather": False,
+                "use_custom_fsdp": False,
+                "data_parallel_sharding_strategy": "optim_grads_params",
+            },
+        },
+    }
+
+    # vLLM Configuration (match new VllmGeneration expectations: TP/PP/EP provided separately)
+    vllm_config = {
+        "backend": "vllm",
+        "model_name": args.model_name,
+        "tokenizer": {
+            "name": args.model_name,
+        },
+        "dtype": "bfloat16",
+        "max_new_tokens": args.max_new_tokens,
+        "temperature": 1.0,
+        "top_p": 1.0,
+        "top_k": None,
+        "stop_token_ids": None,
+        "stop_strings": None,
+        "vllm_cfg": {
+            "tensor_parallel_size": args.tp_size,
+            "pipeline_parallel_size": args.pp_size,
+            "expert_parallel_size": args.ep_size,
+            "gpu_memory_utilization": 0.6,
+            "max_model_len": args.max_sequence_length,
+            "precision": "bfloat16",
+            "async_engine": False,
+            "skip_tokenizer_init": False,
+            "load_format": "dummy",
+            "enforce_eager": "False",
+        },
+        "colocated": {
+            "enabled": True,
+            "resources": {
+                "gpus_per_node": None,
+                "num_nodes": None,
+            },
+        },
+        "vllm_kwargs": {},
+    }
+
+    # Configure vLLM with tokenizer
+    vllm_config = configure_generation_config(vllm_config, tokenizer)
+
+    return megatron_config, vllm_config
+
+
+def setup_clusters_and_policies(args, megatron_config, vllm_config, tokenizer):
+    """Setup Ray clusters and initialize policies.
+
+    Args:
+        args: Parsed command line arguments
+        megatron_config: Megatron configuration dictionary
+        vllm_config: vLLM configuration dictionary
+        tokenizer: HuggingFace tokenizer
+
+    Returns:
+        tuple: (megatron_cluster, policy, vllm_inference_policy)
+    """
+    gpus_per_node = args.tp_size * args.ep_size * args.pp_size
+    print(f"Setting up Megatron Cluster with TP={gpus_per_node}")
+    megatron_cluster = RayVirtualCluster(
+        name="megatron_cluster",
+        bundle_ct_per_node_list=[gpus_per_node],
+        use_gpus=True,
+        num_gpus_per_node=gpus_per_node,
+        max_colocated_worker_groups=2,
+    )
+
+    print("Instantiating Policy with Megatron backend...")
+    policy = Policy(
+        cluster=megatron_cluster,
+        config=megatron_config,
+        tokenizer=tokenizer,
+        init_reference_model=False,
+        init_optimizer=False,
+    )
+
+    # Create vLLM inference configuration with limited generation
+    vllm_inference_config = vllm_config.copy()
+    vllm_inference_config["max_new_tokens"] = args.max_new_tokens
+    vllm_inference_config = configure_generation_config(
+        vllm_inference_config, tokenizer
+    )
+
+    # Create vLLM policy for inference-only logprobs
+    vllm_inference_policy = VllmGeneration(
+        cluster=megatron_cluster, config=vllm_inference_config
+    )
+
+    return megatron_cluster, policy, vllm_inference_policy
+
+
+def prepare_input_data(prompt, tokenizer):
+    """Tokenize the input prompt and prepare generation data.
+
+    Args:
+        prompt: Input text prompt
+        tokenizer: HuggingFace tokenizer
+
+    Returns:
+        BatchedDataDict: Prepared input data
+    """
+    print("Preparing input data...")
+
+    # Tokenize the prompt
+    tokenized = tokenizer(
+        [prompt],
+        padding=True,
+        truncation=True,
+        return_tensors="pt",
+        padding_side="right",
+    )
+
+    # Calculate input lengths from attention mask
+    input_ids = tokenized["input_ids"]
+    attention_mask = tokenized["attention_mask"]
+    input_lengths = attention_mask.sum(dim=1).to(torch.int32)
+
+    generation_data = BatchedDataDict(
+        {
+            "input_ids": input_ids,
+            "input_lengths": input_lengths,
+        }
+    )
+
+    return generation_data
+
+
+def run_model_refitting(policy, vllm_inference_policy, refit_buffer_size_gb):
+    """Perform model weight refitting between Megatron and vLLM policies.
+
+    Args:
+        policy: Megatron policy
+        vllm_inference_policy: vLLM inference policy
+        refit_buffer_size_gb: Buffer size for refitting in GB
+    """
+    print("\n--- Performing Model Refitting ---")
+
+    # Perform the refitting between policies using GRPO's refit function
+    # Note: colocated_inference=True since we're using the same cluster
+    refit_policy_generation(
+        policy,
+        vllm_inference_policy,
+        colocated_inference=True,
+        _refit_buffer_size_gb=refit_buffer_size_gb,
+    )
+    print("Model refitting completed")
+
+
+def generate_and_compare_logprobs(policy, vllm_inference_policy, generation_data):
+    """Generate outputs and compare logprobs between vLLM and Megatron policies.
+
+    Args:
+        policy: Megatron policy
+        vllm_inference_policy: vLLM inference policy
+        generation_data: Input data for generation
+
+    Returns:
+        tuple: (vllm_logprobs_data, megatron_generation_data)
+    """
+    # Generate with vLLM for logprobs
+    print("\n--- Getting vLLM Policy Logprobs ---")
+    vllm_logprobs_data = vllm_inference_policy.generate(generation_data, greedy=True)
+    print(f"vLLM Logprobs shape: {vllm_logprobs_data['logprobs'].shape}")
+    print(f"vLLM Logprobs sample: {vllm_logprobs_data['logprobs'][0, -10:]}")
+
+    # Generate with Megatron policy
+    print("\n--- Getting Megatron Generation ---")
+    policy.prepare_for_generation()
+
+    # Prepare input data for Megatron using vLLM outputs
+    megatron_input_data = copy.deepcopy(generation_data)
+    print("=" * 100)
+    print(megatron_input_data)
+    print(vllm_logprobs_data)
+    megatron_input_data["input_ids"] = vllm_logprobs_data["output_ids"]
+    megatron_input_data["input_lengths"] = vllm_logprobs_data[
+        "unpadded_sequence_lengths"
+    ]
+
+    # Get logprobs from Megatron
+    policy.prepare_for_lp_inference()
+    megatron_generation_data = policy.get_logprobs(megatron_input_data)
+    print(f"Megatron Generation shape: {megatron_generation_data['logprobs'].shape}")
+    print(
+        f"Megatron Generation sample: {megatron_generation_data['logprobs'][0, -10:]}"
+    )
+
+    return vllm_logprobs_data, megatron_generation_data
+
+
+def analyze_logprob_differences(
+    vllm_logprobs_data, megatron_generation_data, generation_data, tokenizer, prompt
+):
+    """Analyze and display differences between vLLM and Megatron logprobs.
+
+    Args:
+        vllm_logprobs_data: vLLM generation results
+        megatron_generation_data: Megatron generation results
+        generation_data: Original input data
+        tokenizer: HuggingFace tokenizer
+        prompt: Original input prompt
+    """
+    print("\n--- Comparing Logprobs ---")
+    print(f"Input prompt: {prompt}")
+    print(
+        f"Input tokens: {generation_data['input_ids'][0, : generation_data['input_lengths'][0]]}"
+    )
+
+    # Extract generation parameters
+    input_length = generation_data["input_lengths"][0].item()
+    total_length = vllm_logprobs_data["logprobs"].shape[1]
+    generated_length = vllm_logprobs_data["generation_lengths"][0].item()
+
+    if generated_length > 0:
+        print(
+            f"\nComparing {generated_length} generated tokens (from position {input_length} to {total_length - 1}):"
+        )
+
+        # Extract generated logprobs
+        vllm_gen_logprobs = vllm_logprobs_data["logprobs"][0, input_length:total_length]
+        megatron_gen_logprobs = megatron_generation_data["logprobs"][
+            0, input_length:total_length
+        ]
+
+        print(f"vLLM generated logprobs: {vllm_gen_logprobs}")
+        print(f"Megatron generated logprobs: {megatron_gen_logprobs}")
+
+        # Calculate and display differences
+        abs_diff = torch.abs(vllm_gen_logprobs - megatron_gen_logprobs)
+        print(f"Absolute difference: {abs_diff}")
+        print(f"Mean absolute difference: {torch.mean(abs_diff)}")
+        print(f"Max absolute difference: {torch.max(abs_diff)}")
+
+        # Detailed token-by-token comparison
+        _detailed_token_comparison(
+            vllm_gen_logprobs,
+            megatron_gen_logprobs,
+            vllm_logprobs_data,
+            input_length,
+            total_length,
+            tokenizer,
+        )
+    else:
+        print(
+            f"No generated tokens to compare (input_length: {input_length}, total_length: {total_length})"
+        )
+
+
+def _detailed_token_comparison(
+    vllm_logprobs,
+    megatron_logprobs,
+    vllm_logprobs_data,
+    input_length,
+    total_length,
+    tokenizer,
+):
+    """Display detailed token-by-token comparison of logprobs.
+
+    Args:
+        vllm_logprobs: vLLM logprobs for generated tokens
+        megatron_logprobs: Megatron logprobs for generated tokens
+        vllm_logprobs_data: Vllm generation data
+        input_length: Length of input sequence
+        total_length: Total sequence length
+        tokenizer: HuggingFace tokenizer
+    """
+    print("\n--- Token-by-Token Comparison (Generated Tokens Only) ---")
+
+    if total_length > input_length:
+        # Get generated tokens if available
+        if "output_ids" in vllm_logprobs_data:
+            generated_tokens = vllm_logprobs_data["output_ids"][
+                0, input_length:total_length
+            ]
+        else:
+            generated_tokens = torch.arange(input_length, total_length)
+
+        # Display header
+        print(
+            f"{'Token':<15} {'Token ID':<10} {'Position':<10} {'vLLM':<12} {'Megatron':<12} {'Diff':<12}"
+        )
+        print("-" * 75)
+
+        # Display each token comparison
+        for i, pos in enumerate(range(input_length, total_length)):
+            if "output_ids" in vllm_logprobs_data:
+                token_id = generated_tokens[i].item()
+                token_text = tokenizer.decode([token_id])
+            else:
+                token_id = f"pos_{pos}"
+                token_text = f"tok_{pos}"
+
+            vllm_lp = vllm_logprobs[i].item()
+            megatron_lp = megatron_logprobs[i].item()
+            diff = abs(vllm_lp - megatron_lp)
+
+            print(
+                f"{token_text:<15} {token_id:<10} {pos:<10} {vllm_lp:<12.6f} {megatron_lp:<12.6f} {diff:<12.6f}"
+            )
+    else:
+        print("No generated tokens to compare in detail.")
+
+
+def cleanup_resources(vllm_inference_policy):
+    """Clean up resources and shutdown policies.
+
+    Args:
+        vllm_inference_policy: vLLM policy to shutdown
+    """
+    print("\n--- Cleaning up ---")
+    vllm_inference_policy.shutdown()
+    print("Cleanup completed successfully!")
+
+
+def main():
+    """Main execution function."""
+    # Parse command line arguments
+    args = parse_args()
+
+    # Initialize Ray
+    ray.init()
+
+    # Setup tokenizer
+    tokenizer = AutoTokenizer.from_pretrained(args.model_name)
+    if tokenizer.pad_token_id is None:
+        tokenizer.pad_token_id = tokenizer.eos_token_id
+
+    # Setup configurations
+    megatron_config, vllm_config = setup_configs(args, tokenizer)
+
+    # Setup clusters and policies
+    megatron_cluster, policy, vllm_inference_policy = setup_clusters_and_policies(
+        args, megatron_config, vllm_config, tokenizer
+    )
+
+    # Prepare input data
+    generation_data = prepare_input_data(args.prompt, tokenizer)
+
+    # prepare refit info
+    state_dict_info = policy.prepare_refit_info()
+    vllm_inference_policy.prepare_refit_info(state_dict_info)
+
+    # Perform model refitting
+    run_model_refitting(policy, vllm_inference_policy, args.refit_buffer_size_gb)
+
+    # Generate and compare logprobs
+    vllm_logprobs_data, megatron_generation_data = generate_and_compare_logprobs(
+        policy, vllm_inference_policy, generation_data
+    )
+
+    # Analyze differences
+    analyze_logprob_differences(
+        vllm_logprobs_data,
+        megatron_generation_data,
+        generation_data,
+        tokenizer,
+        args.prompt,
+    )
+
+    # Cleanup
+    cleanup_resources(vllm_inference_policy)
+
+    print("Script completed successfully!")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/uv.lock b/uv.lock
index 321e66f287..7b06abd41f 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1,41 +1,70 @@
 version = 1
-revision = 2
+revision = 3
 requires-python = ">=3.12"
 resolution-markers = [
-    "python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
-    "python_full_version < '3.13' and platform_machine != 'aarch64' and platform_machine != 'arm64' and sys_platform == 'linux'",
-    "python_full_version < '3.13' and platform_machine == 'arm64' and sys_platform == 'linux'",
-    "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
     "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'",
+    "python_full_version >= '3.13' and sys_platform == 'win32'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
     "python_full_version >= '3.13' and sys_platform == 'darwin'",
+    "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version < '3.13' and sys_platform == 'win32'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
     "python_full_version < '3.13' and sys_platform == 'darwin'",
-    "python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux'",
-    "python_full_version < '3.13' and sys_platform != 'darwin' and sys_platform != 'linux'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
 ]
 
 [manifest]
 members = [
+    "megatron-bridge",
     "megatron-core",
+    "nemo-automodel",
     "nemo-rl",
-    "nemo-tron",
+    "penguin",
 ]
+overrides = [
+    { name = "opencv-python-headless", specifier = ">=4.11.0" },
+    { name = "transformer-engine", extras = ["pytorch"], specifier = "==2.8.0" },
+]
+
+[[manifest.dependency-metadata]]
+name = "causal-conv1d"
+version = "1.5.0.post8"
+requires-dist = ["torch", "packaging", "ninja"]
+
+[[manifest.dependency-metadata]]
+name = "deep-ep"
+version = "1.1.0+e3908bf"
+requires-dist = ["torch", "packaging", "ninja"]
+
+[[manifest.dependency-metadata]]
+name = "deep-gemm"
+version = "2.0.0+7b6b556"
+requires-dist = ["torch", "packaging", "ninja"]
 
 [[manifest.dependency-metadata]]
 name = "flash-attn"
 requires-dist = ["torch", "einops", "setuptools", "psutil", "ninja"]
 
+[[manifest.dependency-metadata]]
+name = "mamba-ssm"
+version = "2.2.4"
+requires-dist = ["torch", "packaging", "ninja", "causal-conv1d"]
+
 [[package]]
 name = "absl-py"
-version = "2.3.0"
+version = "2.3.1"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/03/15/18693af986560a5c3cc0b84a8046b536ffb2cdb536e03cce897f2759e284/absl_py-2.3.0.tar.gz", hash = "sha256:d96fda5c884f1b22178852f30ffa85766d50b99e00775ea626c23304f582fc4f", size = 116400, upload-time = "2025-05-27T09:15:50.143Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/10/2a/c93173ffa1b39c1d0395b7e842bbdc62e556ca9d8d3b5572926f3e4ca752/absl_py-2.3.1.tar.gz", hash = "sha256:a97820526f7fbfd2ec1bce83f3f25e3a14840dac0d8e02a0b71cd75db3f77fc9", size = 116588, upload-time = "2025-07-03T09:31:44.05Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/87/04/9d75e1d3bb4ab8ec67ff10919476ccdee06c098bcfcf3a352da5f985171d/absl_py-2.3.0-py3-none-any.whl", hash = "sha256:9824a48b654a306168f63e0d97714665f8490b8d89ec7bf2efc24bf67cf579b3", size = 135657, upload-time = "2025-05-27T09:15:48.742Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/aa/ba0014cc4659328dc818a28827be78e6d97312ab0cb98105a770924dc11e/absl_py-2.3.1-py3-none-any.whl", hash = "sha256:eeecf07f0c2a93ace0772c92e596ace6d3d3996c042b2128459aaae2a76de11d", size = 135811, upload-time = "2025-07-03T09:31:42.253Z" },
 ]
 
 [[package]]
 name = "accelerate"
-version = "1.8.1"
+version = "1.10.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "huggingface-hub" },
@@ -44,11 +73,12 @@ dependencies = [
     { name = "psutil" },
     { name = "pyyaml" },
     { name = "safetensors" },
-    { name = "torch" },
+    { name = "torch", version = "2.8.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
+    { name = "torch", version = "2.8.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/bd/c2/b9e33ad13232606dded4c546e654fb06a15f1dbcbd95d81c9f9dd3ccc771/accelerate-1.8.1.tar.gz", hash = "sha256:f60df931671bc4e75077b852990469d4991ce8bd3a58e72375c3c95132034db9", size = 380872, upload-time = "2025-06-20T15:36:14.618Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/f7/66/be171836d86dc5b8698b3a9bf4b9eb10cb53369729939f88bf650167588b/accelerate-1.10.0.tar.gz", hash = "sha256:8270568fda9036b5cccdc09703fef47872abccd56eb5f6d53b54ea5fb7581496", size = 392261, upload-time = "2025-08-07T10:54:51.664Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/91/d9/e044c9d42d8ad9afa96533b46ecc9b7aea893d362b3c52bd78fb9fe4d7b3/accelerate-1.8.1-py3-none-any.whl", hash = "sha256:c47b8994498875a2b1286e945bd4d20e476956056c7941d512334f4eb44ff991", size = 365338, upload-time = "2025-06-20T15:36:12.71Z" },
+    { url = "https://files.pythonhosted.org/packages/30/dd/0107f0aa179869ee9f47ef5a2686abd5e022fdc82af901d535e52fe91ce1/accelerate-1.10.0-py3-none-any.whl", hash = "sha256:260a72b560e100e839b517a331ec85ed495b3889d12886e79d1913071993c5a3", size = 374718, upload-time = "2025-08-07T10:54:49.988Z" },
 ]
 
 [[package]]
@@ -63,6 +93,21 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/8d/3f/95338030883d8c8b91223b4e21744b04d11b161a3ef117295d8241f50ab4/accessible_pygments-0.0.5-py3-none-any.whl", hash = "sha256:88ae3211e68a1d0b011504b2ffc1691feafce124b845bd072ab6f9f66f34d4b7", size = 1395903, upload-time = "2024-05-10T11:23:08.421Z" },
 ]
 
+[[package]]
+name = "accumulation-tree"
+version = "0.6.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ac/dc/4ffda8a22b6af3f41bcec07ddfebe723218976eaa016cefbc904634a4e85/accumulation_tree-0.6.4.tar.gz", hash = "sha256:5f907667e4106b5ba140b6b871e1902eb2a93d429b92f8a9f7ddb2bee7704334", size = 12635, upload-time = "2024-09-26T21:50:40.627Z" }
+
+[[package]]
+name = "aiofiles"
+version = "24.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/0b/03/a88171e277e8caa88a4c77808c20ebb04ba74cc4681bf1e9416c862de237/aiofiles-24.1.0.tar.gz", hash = "sha256:22a075c9e5a3810f0c2e48f3008c94d68c65d763b9b03857924c99e57355166c", size = 30247, upload-time = "2024-06-24T11:02:03.584Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a5/45/30bb92d442636f570cb5651bc661f52b610e2eec3f891a5dc3a4c3667db0/aiofiles-24.1.0-py3-none-any.whl", hash = "sha256:b4ec55f4195e3eb5d7abd1bf7e061763e864dd4954231fb8539a0ef8bb8260e5", size = 15896, upload-time = "2024-06-24T11:02:01.529Z" },
+]
+
 [[package]]
 name = "aiohappyeyeballs"
 version = "2.6.1"
@@ -74,7 +119,7 @@ wheels = [
 
 [[package]]
 name = "aiohttp"
-version = "3.12.13"
+version = "3.12.15"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "aiohappyeyeballs" },
@@ -85,42 +130,42 @@ dependencies = [
     { name = "propcache" },
     { name = "yarl" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/42/6e/ab88e7cb2a4058bed2f7870276454f85a7c56cd6da79349eb314fc7bbcaa/aiohttp-3.12.13.tar.gz", hash = "sha256:47e2da578528264a12e4e3dd8dd72a7289e5f812758fe086473fab037a10fcce", size = 7819160, upload-time = "2025-06-14T15:15:41.354Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/b4/6a/ce40e329788013cd190b1d62bbabb2b6a9673ecb6d836298635b939562ef/aiohttp-3.12.13-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0aa580cf80558557285b49452151b9c69f2fa3ad94c5c9e76e684719a8791b73", size = 700491, upload-time = "2025-06-14T15:14:00.048Z" },
-    { url = "https://files.pythonhosted.org/packages/28/d9/7150d5cf9163e05081f1c5c64a0cdf3c32d2f56e2ac95db2a28fe90eca69/aiohttp-3.12.13-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b103a7e414b57e6939cc4dece8e282cfb22043efd0c7298044f6594cf83ab347", size = 475104, upload-time = "2025-06-14T15:14:01.691Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/91/d42ba4aed039ce6e449b3e2db694328756c152a79804e64e3da5bc19dffc/aiohttp-3.12.13-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:78f64e748e9e741d2eccff9597d09fb3cd962210e5b5716047cbb646dc8fe06f", size = 467948, upload-time = "2025-06-14T15:14:03.561Z" },
-    { url = "https://files.pythonhosted.org/packages/99/3b/06f0a632775946981d7c4e5a865cddb6e8dfdbaed2f56f9ade7bb4a1039b/aiohttp-3.12.13-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:29c955989bf4c696d2ededc6b0ccb85a73623ae6e112439398935362bacfaaf6", size = 1714742, upload-time = "2025-06-14T15:14:05.558Z" },
-    { url = "https://files.pythonhosted.org/packages/92/a6/2552eebad9ec5e3581a89256276009e6a974dc0793632796af144df8b740/aiohttp-3.12.13-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:d640191016763fab76072c87d8854a19e8e65d7a6fcfcbf017926bdbbb30a7e5", size = 1697393, upload-time = "2025-06-14T15:14:07.194Z" },
-    { url = "https://files.pythonhosted.org/packages/d8/9f/bd08fdde114b3fec7a021381b537b21920cdd2aa29ad48c5dffd8ee314f1/aiohttp-3.12.13-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4dc507481266b410dede95dd9f26c8d6f5a14315372cc48a6e43eac652237d9b", size = 1752486, upload-time = "2025-06-14T15:14:08.808Z" },
-    { url = "https://files.pythonhosted.org/packages/f7/e1/affdea8723aec5bd0959171b5490dccd9a91fcc505c8c26c9f1dca73474d/aiohttp-3.12.13-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8a94daa873465d518db073bd95d75f14302e0208a08e8c942b2f3f1c07288a75", size = 1798643, upload-time = "2025-06-14T15:14:10.767Z" },
-    { url = "https://files.pythonhosted.org/packages/f3/9d/666d856cc3af3a62ae86393baa3074cc1d591a47d89dc3bf16f6eb2c8d32/aiohttp-3.12.13-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:177f52420cde4ce0bb9425a375d95577fe082cb5721ecb61da3049b55189e4e6", size = 1718082, upload-time = "2025-06-14T15:14:12.38Z" },
-    { url = "https://files.pythonhosted.org/packages/f3/ce/3c185293843d17be063dada45efd2712bb6bf6370b37104b4eda908ffdbd/aiohttp-3.12.13-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0f7df1f620ec40f1a7fbcb99ea17d7326ea6996715e78f71a1c9a021e31b96b8", size = 1633884, upload-time = "2025-06-14T15:14:14.415Z" },
-    { url = "https://files.pythonhosted.org/packages/3a/5b/f3413f4b238113be35dfd6794e65029250d4b93caa0974ca572217745bdb/aiohttp-3.12.13-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3062d4ad53b36e17796dce1c0d6da0ad27a015c321e663657ba1cc7659cfc710", size = 1694943, upload-time = "2025-06-14T15:14:16.48Z" },
-    { url = "https://files.pythonhosted.org/packages/82/c8/0e56e8bf12081faca85d14a6929ad5c1263c146149cd66caa7bc12255b6d/aiohttp-3.12.13-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:8605e22d2a86b8e51ffb5253d9045ea73683d92d47c0b1438e11a359bdb94462", size = 1716398, upload-time = "2025-06-14T15:14:18.589Z" },
-    { url = "https://files.pythonhosted.org/packages/ea/f3/33192b4761f7f9b2f7f4281365d925d663629cfaea093a64b658b94fc8e1/aiohttp-3.12.13-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:54fbbe6beafc2820de71ece2198458a711e224e116efefa01b7969f3e2b3ddae", size = 1657051, upload-time = "2025-06-14T15:14:20.223Z" },
-    { url = "https://files.pythonhosted.org/packages/5e/0b/26ddd91ca8f84c48452431cb4c5dd9523b13bc0c9766bda468e072ac9e29/aiohttp-3.12.13-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:050bd277dfc3768b606fd4eae79dd58ceda67d8b0b3c565656a89ae34525d15e", size = 1736611, upload-time = "2025-06-14T15:14:21.988Z" },
-    { url = "https://files.pythonhosted.org/packages/c3/8d/e04569aae853302648e2c138a680a6a2f02e374c5b6711732b29f1e129cc/aiohttp-3.12.13-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:2637a60910b58f50f22379b6797466c3aa6ae28a6ab6404e09175ce4955b4e6a", size = 1764586, upload-time = "2025-06-14T15:14:23.979Z" },
-    { url = "https://files.pythonhosted.org/packages/ac/98/c193c1d1198571d988454e4ed75adc21c55af247a9fda08236602921c8c8/aiohttp-3.12.13-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e986067357550d1aaa21cfe9897fa19e680110551518a5a7cf44e6c5638cb8b5", size = 1724197, upload-time = "2025-06-14T15:14:25.692Z" },
-    { url = "https://files.pythonhosted.org/packages/e7/9e/07bb8aa11eec762c6b1ff61575eeeb2657df11ab3d3abfa528d95f3e9337/aiohttp-3.12.13-cp312-cp312-win32.whl", hash = "sha256:ac941a80aeea2aaae2875c9500861a3ba356f9ff17b9cb2dbfb5cbf91baaf5bf", size = 421771, upload-time = "2025-06-14T15:14:27.364Z" },
-    { url = "https://files.pythonhosted.org/packages/52/66/3ce877e56ec0813069cdc9607cd979575859c597b6fb9b4182c6d5f31886/aiohttp-3.12.13-cp312-cp312-win_amd64.whl", hash = "sha256:671f41e6146a749b6c81cb7fd07f5a8356d46febdaaaf07b0e774ff04830461e", size = 447869, upload-time = "2025-06-14T15:14:29.05Z" },
-    { url = "https://files.pythonhosted.org/packages/11/0f/db19abdf2d86aa1deec3c1e0e5ea46a587b97c07a16516b6438428b3a3f8/aiohttp-3.12.13-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:d4a18e61f271127465bdb0e8ff36e8f02ac4a32a80d8927aa52371e93cd87938", size = 694910, upload-time = "2025-06-14T15:14:30.604Z" },
-    { url = "https://files.pythonhosted.org/packages/d5/81/0ab551e1b5d7f1339e2d6eb482456ccbe9025605b28eed2b1c0203aaaade/aiohttp-3.12.13-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:532542cb48691179455fab429cdb0d558b5e5290b033b87478f2aa6af5d20ace", size = 472566, upload-time = "2025-06-14T15:14:32.275Z" },
-    { url = "https://files.pythonhosted.org/packages/34/3f/6b7d336663337672d29b1f82d1f252ec1a040fe2d548f709d3f90fa2218a/aiohttp-3.12.13-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d7eea18b52f23c050ae9db5d01f3d264ab08f09e7356d6f68e3f3ac2de9dfabb", size = 464856, upload-time = "2025-06-14T15:14:34.132Z" },
-    { url = "https://files.pythonhosted.org/packages/26/7f/32ca0f170496aa2ab9b812630fac0c2372c531b797e1deb3deb4cea904bd/aiohttp-3.12.13-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ad7c8e5c25f2a26842a7c239de3f7b6bfb92304593ef997c04ac49fb703ff4d7", size = 1703683, upload-time = "2025-06-14T15:14:36.034Z" },
-    { url = "https://files.pythonhosted.org/packages/ec/53/d5513624b33a811c0abea8461e30a732294112318276ce3dbf047dbd9d8b/aiohttp-3.12.13-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:6af355b483e3fe9d7336d84539fef460120c2f6e50e06c658fe2907c69262d6b", size = 1684946, upload-time = "2025-06-14T15:14:38Z" },
-    { url = "https://files.pythonhosted.org/packages/37/72/4c237dd127827b0247dc138d3ebd49c2ded6114c6991bbe969058575f25f/aiohttp-3.12.13-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a95cf9f097498f35c88e3609f55bb47b28a5ef67f6888f4390b3d73e2bac6177", size = 1737017, upload-time = "2025-06-14T15:14:39.951Z" },
-    { url = "https://files.pythonhosted.org/packages/0d/67/8a7eb3afa01e9d0acc26e1ef847c1a9111f8b42b82955fcd9faeb84edeb4/aiohttp-3.12.13-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b8ed8c38a1c584fe99a475a8f60eefc0b682ea413a84c6ce769bb19a7ff1c5ef", size = 1786390, upload-time = "2025-06-14T15:14:42.151Z" },
-    { url = "https://files.pythonhosted.org/packages/48/19/0377df97dd0176ad23cd8cad4fd4232cfeadcec6c1b7f036315305c98e3f/aiohttp-3.12.13-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a0b9170d5d800126b5bc89d3053a2363406d6e327afb6afaeda2d19ee8bb103", size = 1708719, upload-time = "2025-06-14T15:14:44.039Z" },
-    { url = "https://files.pythonhosted.org/packages/61/97/ade1982a5c642b45f3622255173e40c3eed289c169f89d00eeac29a89906/aiohttp-3.12.13-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:372feeace612ef8eb41f05ae014a92121a512bd5067db8f25101dd88a8db11da", size = 1622424, upload-time = "2025-06-14T15:14:45.945Z" },
-    { url = "https://files.pythonhosted.org/packages/99/ab/00ad3eea004e1d07ccc406e44cfe2b8da5acb72f8c66aeeb11a096798868/aiohttp-3.12.13-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a946d3702f7965d81f7af7ea8fb03bb33fe53d311df48a46eeca17e9e0beed2d", size = 1675447, upload-time = "2025-06-14T15:14:47.911Z" },
-    { url = "https://files.pythonhosted.org/packages/3f/fe/74e5ce8b2ccaba445fe0087abc201bfd7259431d92ae608f684fcac5d143/aiohttp-3.12.13-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:a0c4725fae86555bbb1d4082129e21de7264f4ab14baf735278c974785cd2041", size = 1707110, upload-time = "2025-06-14T15:14:50.334Z" },
-    { url = "https://files.pythonhosted.org/packages/ef/c4/39af17807f694f7a267bd8ab1fbacf16ad66740862192a6c8abac2bff813/aiohttp-3.12.13-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:9b28ea2f708234f0a5c44eb6c7d9eb63a148ce3252ba0140d050b091b6e842d1", size = 1649706, upload-time = "2025-06-14T15:14:52.378Z" },
-    { url = "https://files.pythonhosted.org/packages/38/e8/f5a0a5f44f19f171d8477059aa5f28a158d7d57fe1a46c553e231f698435/aiohttp-3.12.13-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:d4f5becd2a5791829f79608c6f3dc745388162376f310eb9c142c985f9441cc1", size = 1725839, upload-time = "2025-06-14T15:14:54.617Z" },
-    { url = "https://files.pythonhosted.org/packages/fd/ac/81acc594c7f529ef4419d3866913f628cd4fa9cab17f7bf410a5c3c04c53/aiohttp-3.12.13-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:60f2ce6b944e97649051d5f5cc0f439360690b73909230e107fd45a359d3e911", size = 1759311, upload-time = "2025-06-14T15:14:56.597Z" },
-    { url = "https://files.pythonhosted.org/packages/38/0d/aabe636bd25c6ab7b18825e5a97d40024da75152bec39aa6ac8b7a677630/aiohttp-3.12.13-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:69fc1909857401b67bf599c793f2183fbc4804717388b0b888f27f9929aa41f3", size = 1708202, upload-time = "2025-06-14T15:14:58.598Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/ab/561ef2d8a223261683fb95a6283ad0d36cb66c87503f3a7dde7afe208bb2/aiohttp-3.12.13-cp313-cp313-win32.whl", hash = "sha256:7d7e68787a2046b0e44ba5587aa723ce05d711e3a3665b6b7545328ac8e3c0dd", size = 420794, upload-time = "2025-06-14T15:15:00.939Z" },
-    { url = "https://files.pythonhosted.org/packages/9d/47/b11d0089875a23bff0abd3edb5516bcd454db3fefab8604f5e4b07bd6210/aiohttp-3.12.13-cp313-cp313-win_amd64.whl", hash = "sha256:5a178390ca90419bfd41419a809688c368e63c86bd725e1186dd97f6b89c2706", size = 446735, upload-time = "2025-06-14T15:15:02.858Z" },
+sdist = { url = "https://files.pythonhosted.org/packages/9b/e7/d92a237d8802ca88483906c388f7c201bbe96cd80a165ffd0ac2f6a8d59f/aiohttp-3.12.15.tar.gz", hash = "sha256:4fc61385e9c98d72fcdf47e6dd81833f47b2f77c114c29cd64a361be57a763a2", size = 7823716, upload-time = "2025-07-29T05:52:32.215Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/63/97/77cb2450d9b35f517d6cf506256bf4f5bda3f93a66b4ad64ba7fc917899c/aiohttp-3.12.15-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:802d3868f5776e28f7bf69d349c26fc0efadb81676d0afa88ed00d98a26340b7", size = 702333, upload-time = "2025-07-29T05:50:46.507Z" },
+    { url = "https://files.pythonhosted.org/packages/83/6d/0544e6b08b748682c30b9f65640d006e51f90763b41d7c546693bc22900d/aiohttp-3.12.15-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f2800614cd560287be05e33a679638e586a2d7401f4ddf99e304d98878c29444", size = 476948, upload-time = "2025-07-29T05:50:48.067Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/1d/c8c40e611e5094330284b1aea8a4b02ca0858f8458614fa35754cab42b9c/aiohttp-3.12.15-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8466151554b593909d30a0a125d638b4e5f3836e5aecde85b66b80ded1cb5b0d", size = 469787, upload-time = "2025-07-29T05:50:49.669Z" },
+    { url = "https://files.pythonhosted.org/packages/38/7d/b76438e70319796bfff717f325d97ce2e9310f752a267bfdf5192ac6082b/aiohttp-3.12.15-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2e5a495cb1be69dae4b08f35a6c4579c539e9b5706f606632102c0f855bcba7c", size = 1716590, upload-time = "2025-07-29T05:50:51.368Z" },
+    { url = "https://files.pythonhosted.org/packages/79/b1/60370d70cdf8b269ee1444b390cbd72ce514f0d1cd1a715821c784d272c9/aiohttp-3.12.15-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:6404dfc8cdde35c69aaa489bb3542fb86ef215fc70277c892be8af540e5e21c0", size = 1699241, upload-time = "2025-07-29T05:50:53.628Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/2b/4968a7b8792437ebc12186db31523f541943e99bda8f30335c482bea6879/aiohttp-3.12.15-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3ead1c00f8521a5c9070fcb88f02967b1d8a0544e6d85c253f6968b785e1a2ab", size = 1754335, upload-time = "2025-07-29T05:50:55.394Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/c1/49524ed553f9a0bec1a11fac09e790f49ff669bcd14164f9fab608831c4d/aiohttp-3.12.15-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6990ef617f14450bc6b34941dba4f12d5613cbf4e33805932f853fbd1cf18bfb", size = 1800491, upload-time = "2025-07-29T05:50:57.202Z" },
+    { url = "https://files.pythonhosted.org/packages/de/5e/3bf5acea47a96a28c121b167f5ef659cf71208b19e52a88cdfa5c37f1fcc/aiohttp-3.12.15-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd736ed420f4db2b8148b52b46b88ed038d0354255f9a73196b7bbce3ea97545", size = 1719929, upload-time = "2025-07-29T05:50:59.192Z" },
+    { url = "https://files.pythonhosted.org/packages/39/94/8ae30b806835bcd1cba799ba35347dee6961a11bd507db634516210e91d8/aiohttp-3.12.15-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3c5092ce14361a73086b90c6efb3948ffa5be2f5b6fbcf52e8d8c8b8848bb97c", size = 1635733, upload-time = "2025-07-29T05:51:01.394Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/46/06cdef71dd03acd9da7f51ab3a9107318aee12ad38d273f654e4f981583a/aiohttp-3.12.15-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:aaa2234bb60c4dbf82893e934d8ee8dea30446f0647e024074237a56a08c01bd", size = 1696790, upload-time = "2025-07-29T05:51:03.657Z" },
+    { url = "https://files.pythonhosted.org/packages/02/90/6b4cfaaf92ed98d0ec4d173e78b99b4b1a7551250be8937d9d67ecb356b4/aiohttp-3.12.15-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:6d86a2fbdd14192e2f234a92d3b494dd4457e683ba07e5905a0b3ee25389ac9f", size = 1718245, upload-time = "2025-07-29T05:51:05.911Z" },
+    { url = "https://files.pythonhosted.org/packages/2e/e6/2593751670fa06f080a846f37f112cbe6f873ba510d070136a6ed46117c6/aiohttp-3.12.15-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a041e7e2612041a6ddf1c6a33b883be6a421247c7afd47e885969ee4cc58bd8d", size = 1658899, upload-time = "2025-07-29T05:51:07.753Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/28/c15bacbdb8b8eb5bf39b10680d129ea7410b859e379b03190f02fa104ffd/aiohttp-3.12.15-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:5015082477abeafad7203757ae44299a610e89ee82a1503e3d4184e6bafdd519", size = 1738459, upload-time = "2025-07-29T05:51:09.56Z" },
+    { url = "https://files.pythonhosted.org/packages/00/de/c269cbc4faa01fb10f143b1670633a8ddd5b2e1ffd0548f7aa49cb5c70e2/aiohttp-3.12.15-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:56822ff5ddfd1b745534e658faba944012346184fbfe732e0d6134b744516eea", size = 1766434, upload-time = "2025-07-29T05:51:11.423Z" },
+    { url = "https://files.pythonhosted.org/packages/52/b0/4ff3abd81aa7d929b27d2e1403722a65fc87b763e3a97b3a2a494bfc63bc/aiohttp-3.12.15-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b2acbbfff69019d9014508c4ba0401822e8bae5a5fdc3b6814285b71231b60f3", size = 1726045, upload-time = "2025-07-29T05:51:13.689Z" },
+    { url = "https://files.pythonhosted.org/packages/71/16/949225a6a2dd6efcbd855fbd90cf476052e648fb011aa538e3b15b89a57a/aiohttp-3.12.15-cp312-cp312-win32.whl", hash = "sha256:d849b0901b50f2185874b9a232f38e26b9b3d4810095a7572eacea939132d4e1", size = 423591, upload-time = "2025-07-29T05:51:15.452Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/d8/fa65d2a349fe938b76d309db1a56a75c4fb8cc7b17a398b698488a939903/aiohttp-3.12.15-cp312-cp312-win_amd64.whl", hash = "sha256:b390ef5f62bb508a9d67cb3bba9b8356e23b3996da7062f1a57ce1a79d2b3d34", size = 450266, upload-time = "2025-07-29T05:51:17.239Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/33/918091abcf102e39d15aba2476ad9e7bd35ddb190dcdd43a854000d3da0d/aiohttp-3.12.15-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:9f922ffd05034d439dde1c77a20461cf4a1b0831e6caa26151fe7aa8aaebc315", size = 696741, upload-time = "2025-07-29T05:51:19.021Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/2a/7495a81e39a998e400f3ecdd44a62107254803d1681d9189be5c2e4530cd/aiohttp-3.12.15-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:2ee8a8ac39ce45f3e55663891d4b1d15598c157b4d494a4613e704c8b43112cd", size = 474407, upload-time = "2025-07-29T05:51:21.165Z" },
+    { url = "https://files.pythonhosted.org/packages/49/fc/a9576ab4be2dcbd0f73ee8675d16c707cfc12d5ee80ccf4015ba543480c9/aiohttp-3.12.15-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3eae49032c29d356b94eee45a3f39fdf4b0814b397638c2f718e96cfadf4c4e4", size = 466703, upload-time = "2025-07-29T05:51:22.948Z" },
+    { url = "https://files.pythonhosted.org/packages/09/2f/d4bcc8448cf536b2b54eed48f19682031ad182faa3a3fee54ebe5b156387/aiohttp-3.12.15-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b97752ff12cc12f46a9b20327104448042fce5c33a624f88c18f66f9368091c7", size = 1705532, upload-time = "2025-07-29T05:51:25.211Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/f3/59406396083f8b489261e3c011aa8aee9df360a96ac8fa5c2e7e1b8f0466/aiohttp-3.12.15-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:894261472691d6fe76ebb7fcf2e5870a2ac284c7406ddc95823c8598a1390f0d", size = 1686794, upload-time = "2025-07-29T05:51:27.145Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/71/164d194993a8d114ee5656c3b7ae9c12ceee7040d076bf7b32fb98a8c5c6/aiohttp-3.12.15-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5fa5d9eb82ce98959fc1031c28198b431b4d9396894f385cb63f1e2f3f20ca6b", size = 1738865, upload-time = "2025-07-29T05:51:29.366Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/00/d198461b699188a93ead39cb458554d9f0f69879b95078dce416d3209b54/aiohttp-3.12.15-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f0fa751efb11a541f57db59c1dd821bec09031e01452b2b6217319b3a1f34f3d", size = 1788238, upload-time = "2025-07-29T05:51:31.285Z" },
+    { url = "https://files.pythonhosted.org/packages/85/b8/9e7175e1fa0ac8e56baa83bf3c214823ce250d0028955dfb23f43d5e61fd/aiohttp-3.12.15-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5346b93e62ab51ee2a9d68e8f73c7cf96ffb73568a23e683f931e52450e4148d", size = 1710566, upload-time = "2025-07-29T05:51:33.219Z" },
+    { url = "https://files.pythonhosted.org/packages/59/e4/16a8eac9df39b48ae102ec030fa9f726d3570732e46ba0c592aeeb507b93/aiohttp-3.12.15-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:049ec0360f939cd164ecbfd2873eaa432613d5e77d6b04535e3d1fbae5a9e645", size = 1624270, upload-time = "2025-07-29T05:51:35.195Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/f8/cd84dee7b6ace0740908fd0af170f9fab50c2a41ccbc3806aabcb1050141/aiohttp-3.12.15-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:b52dcf013b57464b6d1e51b627adfd69a8053e84b7103a7cd49c030f9ca44461", size = 1677294, upload-time = "2025-07-29T05:51:37.215Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/42/d0f1f85e50d401eccd12bf85c46ba84f947a84839c8a1c2c5f6e8ab1eb50/aiohttp-3.12.15-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:9b2af240143dd2765e0fb661fd0361a1b469cab235039ea57663cda087250ea9", size = 1708958, upload-time = "2025-07-29T05:51:39.328Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/6b/f6fa6c5790fb602538483aa5a1b86fcbad66244997e5230d88f9412ef24c/aiohttp-3.12.15-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ac77f709a2cde2cc71257ab2d8c74dd157c67a0558a0d2799d5d571b4c63d44d", size = 1651553, upload-time = "2025-07-29T05:51:41.356Z" },
+    { url = "https://files.pythonhosted.org/packages/04/36/a6d36ad545fa12e61d11d1932eef273928b0495e6a576eb2af04297fdd3c/aiohttp-3.12.15-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:47f6b962246f0a774fbd3b6b7be25d59b06fdb2f164cf2513097998fc6a29693", size = 1727688, upload-time = "2025-07-29T05:51:43.452Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/c8/f195e5e06608a97a4e52c5d41c7927301bf757a8e8bb5bbf8cef6c314961/aiohttp-3.12.15-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:760fb7db442f284996e39cf9915a94492e1896baac44f06ae551974907922b64", size = 1761157, upload-time = "2025-07-29T05:51:45.643Z" },
+    { url = "https://files.pythonhosted.org/packages/05/6a/ea199e61b67f25ba688d3ce93f63b49b0a4e3b3d380f03971b4646412fc6/aiohttp-3.12.15-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ad702e57dc385cae679c39d318def49aef754455f237499d5b99bea4ef582e51", size = 1710050, upload-time = "2025-07-29T05:51:48.203Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/2e/ffeb7f6256b33635c29dbed29a22a723ff2dd7401fff42ea60cf2060abfb/aiohttp-3.12.15-cp313-cp313-win32.whl", hash = "sha256:f813c3e9032331024de2eb2e32a88d86afb69291fbc37a3a3ae81cc9917fb3d0", size = 422647, upload-time = "2025-07-29T05:51:50.718Z" },
+    { url = "https://files.pythonhosted.org/packages/1b/8e/78ee35774201f38d5e1ba079c9958f7629b1fd079459aea9467441dbfbf5/aiohttp-3.12.15-cp313-cp313-win_amd64.whl", hash = "sha256:1a649001580bdb37c6fdb1bebbd7e3bc688e8ec2b5c6f52edbb664662b17dc84", size = 449067, upload-time = "2025-07-29T05:51:52.549Z" },
 ]
 
 [[package]]
@@ -137,23 +182,15 @@ wheels = [
 
 [[package]]
 name = "aiosignal"
-version = "1.3.2"
+version = "1.4.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "frozenlist" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/ba/b5/6d55e80f6d8a08ce22b982eafa278d823b541c925f11ee774b0b9c43473d/aiosignal-1.3.2.tar.gz", hash = "sha256:a8c255c66fafb1e499c9351d0bf32ff2d8a0321595ebac3b93713656d2436f54", size = 19424, upload-time = "2024-12-13T17:10:40.86Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/ec/6a/bc7e17a3e87a2985d3e8f4da4cd0f481060eb78fb08596c42be62c90a4d9/aiosignal-1.3.2-py2.py3-none-any.whl", hash = "sha256:45cde58e409a301715980c2b01d0c28bdde3770d8290b5eb2173759d9acb31a5", size = 7597, upload-time = "2024-12-13T17:10:38.469Z" },
-]
-
-[[package]]
-name = "airportsdata"
-version = "20250622"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/40/98/5f055f0d3fce23411948c30b6be359bb28821c4919069b2bea3f2af78d70/airportsdata-20250622.tar.gz", hash = "sha256:7adaa4cffdc6e8122d16a63e958ab1eb0b2e57e8c1bf0d10b8218f64067550e6", size = 903216, upload-time = "2025-06-22T06:55:49.743Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/61/62/06741b579156360248d1ec624842ad0edf697050bbaf7c3e46394e106ad1/aiosignal-1.4.0.tar.gz", hash = "sha256:f47eecd9468083c2029cc99945502cb7708b082c232f9aca65da147157b251c7", size = 25007, upload-time = "2025-07-03T22:54:43.528Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/10/24/4daf2c931855ffd9c3fe8cbea133ca0689afaed9d8c0b04597f8e074d79e/airportsdata-20250622-py3-none-any.whl", hash = "sha256:80954c0109bb05fda7c745a1f7ed1d91c29c5fc196ce9b39aa6e8b43617bac4c", size = 912712, upload-time = "2025-06-22T06:55:47.639Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/76/641ae371508676492379f16e2fa48f4e2c11741bd63c48be4b12a6b09cba/aiosignal-1.4.0-py3-none-any.whl", hash = "sha256:053243f8b92b990551949e63930a839ff0cf0b0ebbe0597b0f3fb19e1a0fe82e", size = 7490, upload-time = "2025-07-03T22:54:42.156Z" },
 ]
 
 [[package]]
@@ -205,16 +242,16 @@ sdist = { url = "https://files.pythonhosted.org/packages/3e/38/7859ff46355f76f8d
 
 [[package]]
 name = "anyio"
-version = "4.9.0"
+version = "4.10.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "idna" },
     { name = "sniffio" },
     { name = "typing-extensions", marker = "python_full_version < '3.13'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/95/7d/4c1bd541d4dffa1b52bd83fb8527089e097a106fc90b467a7313b105f840/anyio-4.9.0.tar.gz", hash = "sha256:673c0c244e15788651a4ff38710fea9675823028a6f08a5eda409e0c9840a028", size = 190949, upload-time = "2025-03-17T00:02:54.77Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/f1/b4/636b3b65173d3ce9a38ef5f0522789614e590dab6a8d505340a4efe4c567/anyio-4.10.0.tar.gz", hash = "sha256:3f3fae35c96039744587aa5b8371e7e8e603c0702999535961dd336026973ba6", size = 213252, upload-time = "2025-08-04T08:54:26.451Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/a1/ee/48ca1a7c89ffec8b6a0c5d02b89c305671d5ffd8d3c94acf8b8c408575bb/anyio-4.9.0-py3-none-any.whl", hash = "sha256:9f76d541cad6e36af7beb62e978876f3b41e3e04f2c1fbf0884604c0a9c4d93c", size = 100916, upload-time = "2025-03-17T00:02:52.713Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/12/e5e0282d673bb9746bacfb6e2dba8719989d3660cdb2ea79aee9a9651afb/anyio-4.10.0-py3-none-any.whl", hash = "sha256:60e474ac86736bbfd6f210f7a61218939c318f43f9972497381f1c5e930ed3d1", size = 107213, upload-time = "2025-08-04T08:54:24.882Z" },
 ]
 
 [[package]]
@@ -228,11 +265,23 @@ wheels = [
 
 [[package]]
 name = "astroid"
-version = "3.3.10"
+version = "3.3.11"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/18/74/dfb75f9ccd592bbedb175d4a32fc643cf569d7c218508bfbd6ea7ef9c091/astroid-3.3.11.tar.gz", hash = "sha256:1e5a5011af2920c7c67a53f65d536d65bfa7116feeaf2354d8b94f29573bb0ce", size = 400439, upload-time = "2025-07-13T18:04:23.177Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/af/0f/3b8fdc946b4d9cc8cc1e8af42c4e409468c84441b933d037e101b3d72d86/astroid-3.3.11-py3-none-any.whl", hash = "sha256:54c760ae8322ece1abd213057c4b5bba7c49818853fc901ef09719a60dbf9dec", size = 275612, upload-time = "2025-07-13T18:04:21.07Z" },
+]
+
+[[package]]
+name = "asttokens"
+version = "2.4.1"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/00/c2/9b2de9ed027f9fe5734a6c0c0a601289d796b3caaf1e372e23fa88a73047/astroid-3.3.10.tar.gz", hash = "sha256:c332157953060c6deb9caa57303ae0d20b0fbdb2e59b4a4f2a6ba49d0a7961ce", size = 398941, upload-time = "2025-05-10T13:33:10.405Z" }
+dependencies = [
+    { name = "six" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/45/1d/f03bcb60c4a3212e15f99a56085d93093a497718adf828d050b9d675da81/asttokens-2.4.1.tar.gz", hash = "sha256:b03869718ba9a6eb027e134bfdf69f38a236d681c83c160d510768af11254ba0", size = 62284, upload-time = "2023-10-26T10:03:05.06Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/15/58/5260205b9968c20b6457ed82f48f9e3d6edf2f1f95103161798b73aeccf0/astroid-3.3.10-py3-none-any.whl", hash = "sha256:104fb9cb9b27ea95e847a94c003be03a9e039334a8ebca5ee27dafaf5c5711eb", size = 275388, upload-time = "2025-05-10T13:33:08.391Z" },
+    { url = "https://files.pythonhosted.org/packages/45/86/4736ac618d82a20d87d2f92ae19441ebc7ac9e7a581d7e58bbe79233b24a/asttokens-2.4.1-py2.py3-none-any.whl", hash = "sha256:051ed49c3dcae8913ea7cd08e46a606dba30b79993209636c4875bc1d637bc24", size = 27764, upload-time = "2023-10-26T10:03:01.789Z" },
 ]
 
 [[package]]
@@ -244,6 +293,88 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/77/06/bb80f5f86020c4551da315d78b3ab75e8228f89f0162f2c3a819e407941a/attrs-25.3.0-py3-none-any.whl", hash = "sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3", size = 63815, upload-time = "2025-03-13T11:10:21.14Z" },
 ]
 
+[[package]]
+name = "audioop-lts"
+version = "0.2.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/38/53/946db57842a50b2da2e0c1e34bd37f36f5aadba1a929a3971c5d7841dbca/audioop_lts-0.2.2.tar.gz", hash = "sha256:64d0c62d88e67b98a1a5e71987b7aa7b5bcffc7dcee65b635823dbdd0a8dbbd0", size = 30686, upload-time = "2025-08-05T16:43:17.409Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/de/d4/94d277ca941de5a507b07f0b592f199c22454eeaec8f008a286b3fbbacd6/audioop_lts-0.2.2-cp313-abi3-macosx_10_13_universal2.whl", hash = "sha256:fd3d4602dc64914d462924a08c1a9816435a2155d74f325853c1f1ac3b2d9800", size = 46523, upload-time = "2025-08-05T16:42:20.836Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/5a/656d1c2da4b555920ce4177167bfeb8623d98765594af59702c8873f60ec/audioop_lts-0.2.2-cp313-abi3-macosx_10_13_x86_64.whl", hash = "sha256:550c114a8df0aafe9a05442a1162dfc8fec37e9af1d625ae6060fed6e756f303", size = 27455, upload-time = "2025-08-05T16:42:22.283Z" },
+    { url = "https://files.pythonhosted.org/packages/1b/83/ea581e364ce7b0d41456fb79d6ee0ad482beda61faf0cab20cbd4c63a541/audioop_lts-0.2.2-cp313-abi3-macosx_11_0_arm64.whl", hash = "sha256:9a13dc409f2564de15dd68be65b462ba0dde01b19663720c68c1140c782d1d75", size = 26997, upload-time = "2025-08-05T16:42:23.849Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/3b/e8964210b5e216e5041593b7d33e97ee65967f17c282e8510d19c666dab4/audioop_lts-0.2.2-cp313-abi3-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:51c916108c56aa6e426ce611946f901badac950ee2ddaf302b7ed35d9958970d", size = 85844, upload-time = "2025-08-05T16:42:25.208Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/2e/0a1c52faf10d51def20531a59ce4c706cb7952323b11709e10de324d6493/audioop_lts-0.2.2-cp313-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:47eba38322370347b1c47024defbd36374a211e8dd5b0dcbce7b34fdb6f8847b", size = 85056, upload-time = "2025-08-05T16:42:26.559Z" },
+    { url = "https://files.pythonhosted.org/packages/75/e8/cd95eef479656cb75ab05dfece8c1f8c395d17a7c651d88f8e6e291a63ab/audioop_lts-0.2.2-cp313-abi3-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ba7c3a7e5f23e215cb271516197030c32aef2e754252c4c70a50aaff7031a2c8", size = 93892, upload-time = "2025-08-05T16:42:27.902Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/1e/a0c42570b74f83efa5cca34905b3eef03f7ab09fe5637015df538a7f3345/audioop_lts-0.2.2-cp313-abi3-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:def246fe9e180626731b26e89816e79aae2276f825420a07b4a647abaa84becc", size = 96660, upload-time = "2025-08-05T16:42:28.9Z" },
+    { url = "https://files.pythonhosted.org/packages/50/d5/8a0ae607ca07dbb34027bac8db805498ee7bfecc05fd2c148cc1ed7646e7/audioop_lts-0.2.2-cp313-abi3-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e160bf9df356d841bb6c180eeeea1834085464626dc1b68fa4e1d59070affdc3", size = 79143, upload-time = "2025-08-05T16:42:29.929Z" },
+    { url = "https://files.pythonhosted.org/packages/12/17/0d28c46179e7910bfb0bb62760ccb33edb5de973052cb2230b662c14ca2e/audioop_lts-0.2.2-cp313-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:4b4cd51a57b698b2d06cb9993b7ac8dfe89a3b2878e96bc7948e9f19ff51dba6", size = 84313, upload-time = "2025-08-05T16:42:30.949Z" },
+    { url = "https://files.pythonhosted.org/packages/84/ba/bd5d3806641564f2024e97ca98ea8f8811d4e01d9b9f9831474bc9e14f9e/audioop_lts-0.2.2-cp313-abi3-musllinux_1_2_ppc64le.whl", hash = "sha256:4a53aa7c16a60a6857e6b0b165261436396ef7293f8b5c9c828a3a203147ed4a", size = 93044, upload-time = "2025-08-05T16:42:31.959Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/5e/435ce8d5642f1f7679540d1e73c1c42d933331c0976eb397d1717d7f01a3/audioop_lts-0.2.2-cp313-abi3-musllinux_1_2_riscv64.whl", hash = "sha256:3fc38008969796f0f689f1453722a0f463da1b8a6fbee11987830bfbb664f623", size = 78766, upload-time = "2025-08-05T16:42:33.302Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/3b/b909e76b606cbfd53875693ec8c156e93e15a1366a012f0b7e4fb52d3c34/audioop_lts-0.2.2-cp313-abi3-musllinux_1_2_s390x.whl", hash = "sha256:15ab25dd3e620790f40e9ead897f91e79c0d3ce65fe193c8ed6c26cffdd24be7", size = 87640, upload-time = "2025-08-05T16:42:34.854Z" },
+    { url = "https://files.pythonhosted.org/packages/30/e7/8f1603b4572d79b775f2140d7952f200f5e6c62904585d08a01f0a70393a/audioop_lts-0.2.2-cp313-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:03f061a1915538fd96272bac9551841859dbb2e3bf73ebe4a23ef043766f5449", size = 86052, upload-time = "2025-08-05T16:42:35.839Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/96/c37846df657ccdda62ba1ae2b6534fa90e2e1b1742ca8dcf8ebd38c53801/audioop_lts-0.2.2-cp313-abi3-win32.whl", hash = "sha256:3bcddaaf6cc5935a300a8387c99f7a7fbbe212a11568ec6cf6e4bc458c048636", size = 26185, upload-time = "2025-08-05T16:42:37.04Z" },
+    { url = "https://files.pythonhosted.org/packages/34/a5/9d78fdb5b844a83da8a71226c7bdae7cc638861085fff7a1d707cb4823fa/audioop_lts-0.2.2-cp313-abi3-win_amd64.whl", hash = "sha256:a2c2a947fae7d1062ef08c4e369e0ba2086049a5e598fda41122535557012e9e", size = 30503, upload-time = "2025-08-05T16:42:38.427Z" },
+    { url = "https://files.pythonhosted.org/packages/34/25/20d8fde083123e90c61b51afb547bb0ea7e77bab50d98c0ab243d02a0e43/audioop_lts-0.2.2-cp313-abi3-win_arm64.whl", hash = "sha256:5f93a5db13927a37d2d09637ccca4b2b6b48c19cd9eda7b17a2e9f77edee6a6f", size = 24173, upload-time = "2025-08-05T16:42:39.704Z" },
+    { url = "https://files.pythonhosted.org/packages/58/a7/0a764f77b5c4ac58dc13c01a580f5d32ae8c74c92020b961556a43e26d02/audioop_lts-0.2.2-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:73f80bf4cd5d2ca7814da30a120de1f9408ee0619cc75da87d0641273d202a09", size = 47096, upload-time = "2025-08-05T16:42:40.684Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/ed/ebebedde1a18848b085ad0fa54b66ceb95f1f94a3fc04f1cd1b5ccb0ed42/audioop_lts-0.2.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:106753a83a25ee4d6f473f2be6b0966fc1c9af7e0017192f5531a3e7463dce58", size = 27748, upload-time = "2025-08-05T16:42:41.992Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/6e/11ca8c21af79f15dbb1c7f8017952ee8c810c438ce4e2b25638dfef2b02c/audioop_lts-0.2.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:fbdd522624141e40948ab3e8cdae6e04c748d78710e9f0f8d4dae2750831de19", size = 27329, upload-time = "2025-08-05T16:42:42.987Z" },
+    { url = "https://files.pythonhosted.org/packages/84/52/0022f93d56d85eec5da6b9da6a958a1ef09e80c39f2cc0a590c6af81dcbb/audioop_lts-0.2.2-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:143fad0311e8209ece30a8dbddab3b65ab419cbe8c0dde6e8828da25999be911", size = 92407, upload-time = "2025-08-05T16:42:44.336Z" },
+    { url = "https://files.pythonhosted.org/packages/87/1d/48a889855e67be8718adbc7a01f3c01d5743c325453a5e81cf3717664aad/audioop_lts-0.2.2-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:dfbbc74ec68a0fd08cfec1f4b5e8cca3d3cd7de5501b01c4b5d209995033cde9", size = 91811, upload-time = "2025-08-05T16:42:45.325Z" },
+    { url = "https://files.pythonhosted.org/packages/98/a6/94b7213190e8077547ffae75e13ed05edc488653c85aa5c41472c297d295/audioop_lts-0.2.2-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:cfcac6aa6f42397471e4943e0feb2244549db5c5d01efcd02725b96af417f3fe", size = 100470, upload-time = "2025-08-05T16:42:46.468Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/e9/78450d7cb921ede0cfc33426d3a8023a3bda755883c95c868ee36db8d48d/audioop_lts-0.2.2-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:752d76472d9804ac60f0078c79cdae8b956f293177acd2316cd1e15149aee132", size = 103878, upload-time = "2025-08-05T16:42:47.576Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/e2/cd5439aad4f3e34ae1ee852025dc6aa8f67a82b97641e390bf7bd9891d3e/audioop_lts-0.2.2-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:83c381767e2cc10e93e40281a04852facc4cd9334550e0f392f72d1c0a9c5753", size = 84867, upload-time = "2025-08-05T16:42:49.003Z" },
+    { url = "https://files.pythonhosted.org/packages/68/4b/9d853e9076c43ebba0d411e8d2aa19061083349ac695a7d082540bad64d0/audioop_lts-0.2.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:c0022283e9556e0f3643b7c3c03f05063ca72b3063291834cca43234f20c60bb", size = 90001, upload-time = "2025-08-05T16:42:50.038Z" },
+    { url = "https://files.pythonhosted.org/packages/58/26/4bae7f9d2f116ed5593989d0e521d679b0d583973d203384679323d8fa85/audioop_lts-0.2.2-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:a2d4f1513d63c795e82948e1305f31a6d530626e5f9f2605408b300ae6095093", size = 99046, upload-time = "2025-08-05T16:42:51.111Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/67/a9f4fb3e250dda9e9046f8866e9fa7d52664f8985e445c6b4ad6dfb55641/audioop_lts-0.2.2-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:c9c8e68d8b4a56fda8c025e538e639f8c5953f5073886b596c93ec9b620055e7", size = 84788, upload-time = "2025-08-05T16:42:52.198Z" },
+    { url = "https://files.pythonhosted.org/packages/70/f7/3de86562db0121956148bcb0fe5b506615e3bcf6e63c4357a612b910765a/audioop_lts-0.2.2-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:96f19de485a2925314f5020e85911fb447ff5fbef56e8c7c6927851b95533a1c", size = 94472, upload-time = "2025-08-05T16:42:53.59Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/32/fd772bf9078ae1001207d2df1eef3da05bea611a87dd0e8217989b2848fa/audioop_lts-0.2.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:e541c3ef484852ef36545f66209444c48b28661e864ccadb29daddb6a4b8e5f5", size = 92279, upload-time = "2025-08-05T16:42:54.632Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/41/affea7181592ab0ab560044632571a38edaf9130b84928177823fbf3176a/audioop_lts-0.2.2-cp313-cp313t-win32.whl", hash = "sha256:d5e73fa573e273e4f2e5ff96f9043858a5e9311e94ffefd88a3186a910c70917", size = 26568, upload-time = "2025-08-05T16:42:55.627Z" },
+    { url = "https://files.pythonhosted.org/packages/28/2b/0372842877016641db8fc54d5c88596b542eec2f8f6c20a36fb6612bf9ee/audioop_lts-0.2.2-cp313-cp313t-win_amd64.whl", hash = "sha256:9191d68659eda01e448188f60364c7763a7ca6653ed3f87ebb165822153a8547", size = 30942, upload-time = "2025-08-05T16:42:56.674Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/ca/baf2b9cc7e96c179bb4a54f30fcd83e6ecb340031bde68f486403f943768/audioop_lts-0.2.2-cp313-cp313t-win_arm64.whl", hash = "sha256:c174e322bb5783c099aaf87faeb240c8d210686b04bd61dfd05a8e5a83d88969", size = 24603, upload-time = "2025-08-05T16:42:57.571Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/73/413b5a2804091e2c7d5def1d618e4837f1cb82464e230f827226278556b7/audioop_lts-0.2.2-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:f9ee9b52f5f857fbaf9d605a360884f034c92c1c23021fb90b2e39b8e64bede6", size = 47104, upload-time = "2025-08-05T16:42:58.518Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/8c/daa3308dc6593944410c2c68306a5e217f5c05b70a12e70228e7dd42dc5c/audioop_lts-0.2.2-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:49ee1a41738a23e98d98b937a0638357a2477bc99e61b0f768a8f654f45d9b7a", size = 27754, upload-time = "2025-08-05T16:43:00.132Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/86/c2e0f627168fcf61781a8f72cab06b228fe1da4b9fa4ab39cfb791b5836b/audioop_lts-0.2.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:5b00be98ccd0fc123dcfad31d50030d25fcf31488cde9e61692029cd7394733b", size = 27332, upload-time = "2025-08-05T16:43:01.666Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/bd/35dce665255434f54e5307de39e31912a6f902d4572da7c37582809de14f/audioop_lts-0.2.2-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:a6d2e0f9f7a69403e388894d4ca5ada5c47230716a03f2847cfc7bd1ecb589d6", size = 92396, upload-time = "2025-08-05T16:43:02.991Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/d2/deeb9f51def1437b3afa35aeb729d577c04bcd89394cb56f9239a9f50b6f/audioop_lts-0.2.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f9b0b8a03ef474f56d1a842af1a2e01398b8f7654009823c6d9e0ecff4d5cfbf", size = 91811, upload-time = "2025-08-05T16:43:04.096Z" },
+    { url = "https://files.pythonhosted.org/packages/76/3b/09f8b35b227cee28cc8231e296a82759ed80c1a08e349811d69773c48426/audioop_lts-0.2.2-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2b267b70747d82125f1a021506565bdc5609a2b24bcb4773c16d79d2bb260bbd", size = 100483, upload-time = "2025-08-05T16:43:05.085Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/15/05b48a935cf3b130c248bfdbdea71ce6437f5394ee8533e0edd7cfd93d5e/audioop_lts-0.2.2-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0337d658f9b81f4cd0fdb1f47635070cc084871a3d4646d9de74fdf4e7c3d24a", size = 103885, upload-time = "2025-08-05T16:43:06.197Z" },
+    { url = "https://files.pythonhosted.org/packages/83/80/186b7fce6d35b68d3d739f228dc31d60b3412105854edb975aa155a58339/audioop_lts-0.2.2-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:167d3b62586faef8b6b2275c3218796b12621a60e43f7e9d5845d627b9c9b80e", size = 84899, upload-time = "2025-08-05T16:43:07.291Z" },
+    { url = "https://files.pythonhosted.org/packages/49/89/c78cc5ac6cb5828f17514fb12966e299c850bc885e80f8ad94e38d450886/audioop_lts-0.2.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:0d9385e96f9f6da847f4d571ce3cb15b5091140edf3db97276872647ce37efd7", size = 89998, upload-time = "2025-08-05T16:43:08.335Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/4b/6401888d0c010e586c2ca50fce4c903d70a6bb55928b16cfbdfd957a13da/audioop_lts-0.2.2-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:48159d96962674eccdca9a3df280e864e8ac75e40a577cc97c5c42667ffabfc5", size = 99046, upload-time = "2025-08-05T16:43:09.367Z" },
+    { url = "https://files.pythonhosted.org/packages/de/f8/c874ca9bb447dae0e2ef2e231f6c4c2b0c39e31ae684d2420b0f9e97ee68/audioop_lts-0.2.2-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:8fefe5868cd082db1186f2837d64cfbfa78b548ea0d0543e9b28935ccce81ce9", size = 84843, upload-time = "2025-08-05T16:43:10.749Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/c0/0323e66f3daebc13fd46b36b30c3be47e3fc4257eae44f1e77eb828c703f/audioop_lts-0.2.2-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:58cf54380c3884fb49fdd37dfb7a772632b6701d28edd3e2904743c5e1773602", size = 94490, upload-time = "2025-08-05T16:43:12.131Z" },
+    { url = "https://files.pythonhosted.org/packages/98/6b/acc7734ac02d95ab791c10c3f17ffa3584ccb9ac5c18fd771c638ed6d1f5/audioop_lts-0.2.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:088327f00488cdeed296edd9215ca159f3a5a5034741465789cad403fcf4bec0", size = 92297, upload-time = "2025-08-05T16:43:13.139Z" },
+    { url = "https://files.pythonhosted.org/packages/13/c3/c3dc3f564ce6877ecd2a05f8d751b9b27a8c320c2533a98b0c86349778d0/audioop_lts-0.2.2-cp314-cp314t-win32.whl", hash = "sha256:068aa17a38b4e0e7de771c62c60bbca2455924b67a8814f3b0dee92b5820c0b3", size = 27331, upload-time = "2025-08-05T16:43:14.19Z" },
+    { url = "https://files.pythonhosted.org/packages/72/bb/b4608537e9ffcb86449091939d52d24a055216a36a8bf66b936af8c3e7ac/audioop_lts-0.2.2-cp314-cp314t-win_amd64.whl", hash = "sha256:a5bf613e96f49712073de86f20dbdd4014ca18efd4d34ed18c75bd808337851b", size = 31697, upload-time = "2025-08-05T16:43:15.193Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/22/91616fe707a5c5510de2cac9b046a30defe7007ba8a0c04f9c08f27df312/audioop_lts-0.2.2-cp314-cp314t-win_arm64.whl", hash = "sha256:b492c3b040153e68b9fdaff5913305aaaba5bb433d8a7f73d5cf6a64ed3cc1dd", size = 25206, upload-time = "2025-08-05T16:43:16.444Z" },
+]
+
+[[package]]
+name = "av"
+version = "15.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/17/89/940a509ee7e9449f0c877fa984b37b7cc485546035cc67bbc353f2ac20f3/av-15.0.0.tar.gz", hash = "sha256:871c1a9becddf00b60b1294dc0bff9ff193ac31286aeec1a34039bd27e650183", size = 3833128, upload-time = "2025-07-03T16:23:48.455Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/89/81/c5d009ea9c01a513b7af6aac2ac49c0f2f7193345071cd6dd4d91bef3ab9/av-15.0.0-cp312-cp312-macosx_13_0_arm64.whl", hash = "sha256:84e2ede9459e64e768f4bc56d9df65da9e94b704ee3eccfe2e5b1da1da754313", size = 21782026, upload-time = "2025-07-03T16:22:18.41Z" },
+    { url = "https://files.pythonhosted.org/packages/16/8a/ffe9fcac35a07efc6aa0d765015efa499d88823c01499f318760460f8088/av-15.0.0-cp312-cp312-macosx_13_0_x86_64.whl", hash = "sha256:9473ed92d6942c5a449a2c79d49f3425eb0272499d1a3559b32c1181ff736a08", size = 26974939, upload-time = "2025-07-03T16:22:21.493Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/e7/0816e52134dc2d0259bb1aaad78573eacaf2bebc1a643de34e3384b520d6/av-15.0.0-cp312-cp312-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:56a53fe4e09bebd99355eaa0ce221b681eaf205bdda114f5e17fb79f3c3746ad", size = 34573486, upload-time = "2025-07-03T16:22:24.684Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/f4/07cc05712e9824a4bb68beea44eb5a7369dee3f00fa258879190004b7fc5/av-15.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:247dd9a99d7ed3577b8c1e9977e811f423b04504ff36c9dcd7a4de3e6e5fe5ad", size = 38418908, upload-time = "2025-07-03T16:22:27.799Z" },
+    { url = "https://files.pythonhosted.org/packages/19/48/7f3a21a41e291f8c5b8a98f95cfef308ce1b024a634413ce910c270efd7d/av-15.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:fc50a7d5f60109221ccf44f8fa4c56ce73f22948b7f19b1717fcc58f7fbc383e", size = 40010257, upload-time = "2025-07-03T16:22:31.15Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/c9/ced392e82d39084544d2d0c05decb36446028928eddf0d40ec3d8fe6c050/av-15.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:77deaec8943abfebd4e262924f2f452d6594cf0bc67d8d98aac0462b476e4182", size = 40381801, upload-time = "2025-07-03T16:22:34.254Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/73/a23ad111200e27f5773e94b0b6f9e2ea492a72ded7f4787a358d9d504a8b/av-15.0.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:601d9b0740e47a17ec96ba2a537ebfd4d6edc859ae6f298475c06caa51f0a019", size = 37219417, upload-time = "2025-07-03T16:22:37.497Z" },
+    { url = "https://files.pythonhosted.org/packages/45/0c/2ac20143b74e3792ede40bfd397ce72fa4e76a03999c2fd0aee3997b6971/av-15.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e021f67e0db7256c9f5d3d6a2a4237a4a4a804b131b33e7f2778981070519b20", size = 41242077, upload-time = "2025-07-03T16:22:40.86Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/30/40452705dffbfef0f5505d36218970dfeff0a86048689910219c8717b310/av-15.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:383f1b57520d790069d85fc75f43cfa32fca07f5fb3fb842be37bd596638602c", size = 31357617, upload-time = "2025-07-03T16:22:43.934Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/27/c2e248498ce78dd504b0b1818ce88e71e30a7e26c348bdf5d6467d7b06f7/av-15.0.0-cp313-cp313-macosx_13_0_arm64.whl", hash = "sha256:0701c116f32bd9478023f610722f6371d15ca0c068ff228d355f54a7cf23d9cb", size = 21746400, upload-time = "2025-07-03T16:22:46.604Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/d8/11f8452f19f4ddc189e978b215420131db40e3919135c14a0d13520f7c94/av-15.0.0-cp313-cp313-macosx_13_0_x86_64.whl", hash = "sha256:57fb6232494ec575b8e78e5a9ef9b811d78f8d67324476ec8430ca3146751124", size = 26939576, upload-time = "2025-07-03T16:22:49.255Z" },
+    { url = "https://files.pythonhosted.org/packages/00/1c/b109fd41487d91b8843f9e199b65e89ca533a612ec788b11ed0ba9812ea3/av-15.0.0-cp313-cp313-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:801a3e0afd5c36df70d012d083bfca67ab22d0ebd2c860c0d9432ac875bc0ad6", size = 34284344, upload-time = "2025-07-03T16:22:52.373Z" },
+    { url = "https://files.pythonhosted.org/packages/99/71/aee35fa182d0a41227fbd3f4250fd94c54acdd2995025ee59dd948bba930/av-15.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:d5e97791b96741b344bf6dbea4fb14481c117b1f7fe8113721e8d80e26cbb388", size = 38130346, upload-time = "2025-07-03T16:22:56.755Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/c4/2d9bbc9c42a804c99bc571eeacb2fe1582fe9cfdb726616876cada937d6a/av-15.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:acb4e4aa6bb394d3a9e60feb4cb7a856fc7bac01f3c99019b1d0f11c898c682c", size = 39728857, upload-time = "2025-07-03T16:23:00.392Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/d6/a5746e9fb4fdf326e9897abd7538413210e66f35ad4793fe30f87859249d/av-15.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:02d2d80bdbe184f1f3f49b3f5eae7f0ff7cba0a62ab3b18be0505715e586ad29", size = 40109012, upload-time = "2025-07-03T16:23:04.1Z" },
+    { url = "https://files.pythonhosted.org/packages/77/1f/da89798231ad0feacfaaea4efec4f1779060226986f97498eabe2c7c54a8/av-15.0.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:603f3ae751f6678df5d8b949f92c6f8257064bba8b3e8db606a24c29d31b4e25", size = 36929211, upload-time = "2025-07-03T16:23:07.694Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/4c/2bcabe65a1c19e552f03540f16155a0d02cb9b7a90d31242ab3e0c7ea0d8/av-15.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:682686a9ea2745e63c8878641ec26b1787b9210533f3e945a6e07e24ab788c2e", size = 40967172, upload-time = "2025-07-03T16:23:13.488Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/f0/fe14adaa670ab7a3f709805a8494fd0a2eeb6a5b18b8c59dc6014639a5b1/av-15.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:5758231163b5486dfbf664036be010b7f5ebb24564aaeb62577464be5ea996e0", size = 31332650, upload-time = "2025-07-03T16:23:16.558Z" },
+]
+
 [[package]]
 name = "babel"
 version = "2.17.0"
@@ -254,53 +385,12 @@ wheels = [
 ]
 
 [[package]]
-name = "bcrypt"
-version = "4.3.0"
+name = "backoff"
+version = "2.2.1"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/bb/5d/6d7433e0f3cd46ce0b43cd65e1db465ea024dbb8216fb2404e919c2ad77b/bcrypt-4.3.0.tar.gz", hash = "sha256:3a3fd2204178b6d2adcf09cb4f6426ffef54762577a7c9b54c159008cb288c18", size = 25697, upload-time = "2025-02-28T01:24:09.174Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/bf/2c/3d44e853d1fe969d229bd58d39ae6902b3d924af0e2b5a60d17d4b809ded/bcrypt-4.3.0-cp313-cp313t-macosx_10_12_universal2.whl", hash = "sha256:f01e060f14b6b57bbb72fc5b4a83ac21c443c9a2ee708e04a10e9192f90a6281", size = 483719, upload-time = "2025-02-28T01:22:34.539Z" },
-    { url = "https://files.pythonhosted.org/packages/a1/e2/58ff6e2a22eca2e2cff5370ae56dba29d70b1ea6fc08ee9115c3ae367795/bcrypt-4.3.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5eeac541cefd0bb887a371ef73c62c3cd78535e4887b310626036a7c0a817bb", size = 272001, upload-time = "2025-02-28T01:22:38.078Z" },
-    { url = "https://files.pythonhosted.org/packages/37/1f/c55ed8dbe994b1d088309e366749633c9eb90d139af3c0a50c102ba68a1a/bcrypt-4.3.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:59e1aa0e2cd871b08ca146ed08445038f42ff75968c7ae50d2fdd7860ade2180", size = 277451, upload-time = "2025-02-28T01:22:40.787Z" },
-    { url = "https://files.pythonhosted.org/packages/d7/1c/794feb2ecf22fe73dcfb697ea7057f632061faceb7dcf0f155f3443b4d79/bcrypt-4.3.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:0042b2e342e9ae3d2ed22727c1262f76cc4f345683b5c1715f0250cf4277294f", size = 272792, upload-time = "2025-02-28T01:22:43.144Z" },
-    { url = "https://files.pythonhosted.org/packages/13/b7/0b289506a3f3598c2ae2bdfa0ea66969812ed200264e3f61df77753eee6d/bcrypt-4.3.0-cp313-cp313t-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74a8d21a09f5e025a9a23e7c0fd2c7fe8e7503e4d356c0a2c1486ba010619f09", size = 289752, upload-time = "2025-02-28T01:22:45.56Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/24/d0fb023788afe9e83cc118895a9f6c57e1044e7e1672f045e46733421fe6/bcrypt-4.3.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:0142b2cb84a009f8452c8c5a33ace5e3dfec4159e7735f5afe9a4d50a8ea722d", size = 277762, upload-time = "2025-02-28T01:22:47.023Z" },
-    { url = "https://files.pythonhosted.org/packages/e4/38/cde58089492e55ac4ef6c49fea7027600c84fd23f7520c62118c03b4625e/bcrypt-4.3.0-cp313-cp313t-manylinux_2_34_aarch64.whl", hash = "sha256:12fa6ce40cde3f0b899729dbd7d5e8811cb892d31b6f7d0334a1f37748b789fd", size = 272384, upload-time = "2025-02-28T01:22:49.221Z" },
-    { url = "https://files.pythonhosted.org/packages/de/6a/d5026520843490cfc8135d03012a413e4532a400e471e6188b01b2de853f/bcrypt-4.3.0-cp313-cp313t-manylinux_2_34_x86_64.whl", hash = "sha256:5bd3cca1f2aa5dbcf39e2aa13dd094ea181f48959e1071265de49cc2b82525af", size = 277329, upload-time = "2025-02-28T01:22:51.603Z" },
-    { url = "https://files.pythonhosted.org/packages/b3/a3/4fc5255e60486466c389e28c12579d2829b28a527360e9430b4041df4cf9/bcrypt-4.3.0-cp313-cp313t-musllinux_1_1_aarch64.whl", hash = "sha256:335a420cfd63fc5bc27308e929bee231c15c85cc4c496610ffb17923abf7f231", size = 305241, upload-time = "2025-02-28T01:22:53.283Z" },
-    { url = "https://files.pythonhosted.org/packages/c7/15/2b37bc07d6ce27cc94e5b10fd5058900eb8fb11642300e932c8c82e25c4a/bcrypt-4.3.0-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:0e30e5e67aed0187a1764911af023043b4542e70a7461ad20e837e94d23e1d6c", size = 309617, upload-time = "2025-02-28T01:22:55.461Z" },
-    { url = "https://files.pythonhosted.org/packages/5f/1f/99f65edb09e6c935232ba0430c8c13bb98cb3194b6d636e61d93fe60ac59/bcrypt-4.3.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:3b8d62290ebefd49ee0b3ce7500f5dbdcf13b81402c05f6dafab9a1e1b27212f", size = 335751, upload-time = "2025-02-28T01:22:57.81Z" },
-    { url = "https://files.pythonhosted.org/packages/00/1b/b324030c706711c99769988fcb694b3cb23f247ad39a7823a78e361bdbb8/bcrypt-4.3.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:2ef6630e0ec01376f59a006dc72918b1bf436c3b571b80fa1968d775fa02fe7d", size = 355965, upload-time = "2025-02-28T01:22:59.181Z" },
-    { url = "https://files.pythonhosted.org/packages/aa/dd/20372a0579dd915dfc3b1cd4943b3bca431866fcb1dfdfd7518c3caddea6/bcrypt-4.3.0-cp313-cp313t-win32.whl", hash = "sha256:7a4be4cbf241afee43f1c3969b9103a41b40bcb3a3f467ab19f891d9bc4642e4", size = 155316, upload-time = "2025-02-28T01:23:00.763Z" },
-    { url = "https://files.pythonhosted.org/packages/6d/52/45d969fcff6b5577c2bf17098dc36269b4c02197d551371c023130c0f890/bcrypt-4.3.0-cp313-cp313t-win_amd64.whl", hash = "sha256:5c1949bf259a388863ced887c7861da1df681cb2388645766c89fdfd9004c669", size = 147752, upload-time = "2025-02-28T01:23:02.908Z" },
-    { url = "https://files.pythonhosted.org/packages/11/22/5ada0b9af72b60cbc4c9a399fdde4af0feaa609d27eb0adc61607997a3fa/bcrypt-4.3.0-cp38-abi3-macosx_10_12_universal2.whl", hash = "sha256:f81b0ed2639568bf14749112298f9e4e2b28853dab50a8b357e31798686a036d", size = 498019, upload-time = "2025-02-28T01:23:05.838Z" },
-    { url = "https://files.pythonhosted.org/packages/b8/8c/252a1edc598dc1ce57905be173328eda073083826955ee3c97c7ff5ba584/bcrypt-4.3.0-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:864f8f19adbe13b7de11ba15d85d4a428c7e2f344bac110f667676a0ff84924b", size = 279174, upload-time = "2025-02-28T01:23:07.274Z" },
-    { url = "https://files.pythonhosted.org/packages/29/5b/4547d5c49b85f0337c13929f2ccbe08b7283069eea3550a457914fc078aa/bcrypt-4.3.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3e36506d001e93bffe59754397572f21bb5dc7c83f54454c990c74a468cd589e", size = 283870, upload-time = "2025-02-28T01:23:09.151Z" },
-    { url = "https://files.pythonhosted.org/packages/be/21/7dbaf3fa1745cb63f776bb046e481fbababd7d344c5324eab47f5ca92dd2/bcrypt-4.3.0-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:842d08d75d9fe9fb94b18b071090220697f9f184d4547179b60734846461ed59", size = 279601, upload-time = "2025-02-28T01:23:11.461Z" },
-    { url = "https://files.pythonhosted.org/packages/6d/64/e042fc8262e971347d9230d9abbe70d68b0a549acd8611c83cebd3eaec67/bcrypt-4.3.0-cp38-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:7c03296b85cb87db865d91da79bf63d5609284fc0cab9472fdd8367bbd830753", size = 297660, upload-time = "2025-02-28T01:23:12.989Z" },
-    { url = "https://files.pythonhosted.org/packages/50/b8/6294eb84a3fef3b67c69b4470fcdd5326676806bf2519cda79331ab3c3a9/bcrypt-4.3.0-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:62f26585e8b219cdc909b6a0069efc5e4267e25d4a3770a364ac58024f62a761", size = 284083, upload-time = "2025-02-28T01:23:14.5Z" },
-    { url = "https://files.pythonhosted.org/packages/62/e6/baff635a4f2c42e8788fe1b1633911c38551ecca9a749d1052d296329da6/bcrypt-4.3.0-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:beeefe437218a65322fbd0069eb437e7c98137e08f22c4660ac2dc795c31f8bb", size = 279237, upload-time = "2025-02-28T01:23:16.686Z" },
-    { url = "https://files.pythonhosted.org/packages/39/48/46f623f1b0c7dc2e5de0b8af5e6f5ac4cc26408ac33f3d424e5ad8da4a90/bcrypt-4.3.0-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:97eea7408db3a5bcce4a55d13245ab3fa566e23b4c67cd227062bb49e26c585d", size = 283737, upload-time = "2025-02-28T01:23:18.897Z" },
-    { url = "https://files.pythonhosted.org/packages/49/8b/70671c3ce9c0fca4a6cc3cc6ccbaa7e948875a2e62cbd146e04a4011899c/bcrypt-4.3.0-cp38-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:191354ebfe305e84f344c5964c7cd5f924a3bfc5d405c75ad07f232b6dffb49f", size = 312741, upload-time = "2025-02-28T01:23:21.041Z" },
-    { url = "https://files.pythonhosted.org/packages/27/fb/910d3a1caa2d249b6040a5caf9f9866c52114d51523ac2fb47578a27faee/bcrypt-4.3.0-cp38-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:41261d64150858eeb5ff43c753c4b216991e0ae16614a308a15d909503617732", size = 316472, upload-time = "2025-02-28T01:23:23.183Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/cf/7cf3a05b66ce466cfb575dbbda39718d45a609daa78500f57fa9f36fa3c0/bcrypt-4.3.0-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:33752b1ba962ee793fa2b6321404bf20011fe45b9afd2a842139de3011898fef", size = 343606, upload-time = "2025-02-28T01:23:25.361Z" },
-    { url = "https://files.pythonhosted.org/packages/e3/b8/e970ecc6d7e355c0d892b7f733480f4aa8509f99b33e71550242cf0b7e63/bcrypt-4.3.0-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:50e6e80a4bfd23a25f5c05b90167c19030cf9f87930f7cb2eacb99f45d1c3304", size = 362867, upload-time = "2025-02-28T01:23:26.875Z" },
-    { url = "https://files.pythonhosted.org/packages/a9/97/8d3118efd8354c555a3422d544163f40d9f236be5b96c714086463f11699/bcrypt-4.3.0-cp38-abi3-win32.whl", hash = "sha256:67a561c4d9fb9465ec866177e7aebcad08fe23aaf6fbd692a6fab69088abfc51", size = 160589, upload-time = "2025-02-28T01:23:28.381Z" },
-    { url = "https://files.pythonhosted.org/packages/29/07/416f0b99f7f3997c69815365babbc2e8754181a4b1899d921b3c7d5b6f12/bcrypt-4.3.0-cp38-abi3-win_amd64.whl", hash = "sha256:584027857bc2843772114717a7490a37f68da563b3620f78a849bcb54dc11e62", size = 152794, upload-time = "2025-02-28T01:23:30.187Z" },
-    { url = "https://files.pythonhosted.org/packages/6e/c1/3fa0e9e4e0bfd3fd77eb8b52ec198fd6e1fd7e9402052e43f23483f956dd/bcrypt-4.3.0-cp39-abi3-macosx_10_12_universal2.whl", hash = "sha256:0d3efb1157edebfd9128e4e46e2ac1a64e0c1fe46fb023158a407c7892b0f8c3", size = 498969, upload-time = "2025-02-28T01:23:31.945Z" },
-    { url = "https://files.pythonhosted.org/packages/ce/d4/755ce19b6743394787fbd7dff6bf271b27ee9b5912a97242e3caf125885b/bcrypt-4.3.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:08bacc884fd302b611226c01014eca277d48f0a05187666bca23aac0dad6fe24", size = 279158, upload-time = "2025-02-28T01:23:34.161Z" },
-    { url = "https://files.pythonhosted.org/packages/9b/5d/805ef1a749c965c46b28285dfb5cd272a7ed9fa971f970435a5133250182/bcrypt-4.3.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f6746e6fec103fcd509b96bacdfdaa2fbde9a553245dbada284435173a6f1aef", size = 284285, upload-time = "2025-02-28T01:23:35.765Z" },
-    { url = "https://files.pythonhosted.org/packages/ab/2b/698580547a4a4988e415721b71eb45e80c879f0fb04a62da131f45987b96/bcrypt-4.3.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:afe327968aaf13fc143a56a3360cb27d4ad0345e34da12c7290f1b00b8fe9a8b", size = 279583, upload-time = "2025-02-28T01:23:38.021Z" },
-    { url = "https://files.pythonhosted.org/packages/f2/87/62e1e426418204db520f955ffd06f1efd389feca893dad7095bf35612eec/bcrypt-4.3.0-cp39-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:d9af79d322e735b1fc33404b5765108ae0ff232d4b54666d46730f8ac1a43676", size = 297896, upload-time = "2025-02-28T01:23:39.575Z" },
-    { url = "https://files.pythonhosted.org/packages/cb/c6/8fedca4c2ada1b6e889c52d2943b2f968d3427e5d65f595620ec4c06fa2f/bcrypt-4.3.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:f1e3ffa1365e8702dc48c8b360fef8d7afeca482809c5e45e653af82ccd088c1", size = 284492, upload-time = "2025-02-28T01:23:40.901Z" },
-    { url = "https://files.pythonhosted.org/packages/4d/4d/c43332dcaaddb7710a8ff5269fcccba97ed3c85987ddaa808db084267b9a/bcrypt-4.3.0-cp39-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:3004df1b323d10021fda07a813fd33e0fd57bef0e9a480bb143877f6cba996fe", size = 279213, upload-time = "2025-02-28T01:23:42.653Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/7f/1e36379e169a7df3a14a1c160a49b7b918600a6008de43ff20d479e6f4b5/bcrypt-4.3.0-cp39-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:531457e5c839d8caea9b589a1bcfe3756b0547d7814e9ce3d437f17da75c32b0", size = 284162, upload-time = "2025-02-28T01:23:43.964Z" },
-    { url = "https://files.pythonhosted.org/packages/1c/0a/644b2731194b0d7646f3210dc4d80c7fee3ecb3a1f791a6e0ae6bb8684e3/bcrypt-4.3.0-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:17a854d9a7a476a89dcef6c8bd119ad23e0f82557afbd2c442777a16408e614f", size = 312856, upload-time = "2025-02-28T01:23:46.011Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/62/2a871837c0bb6ab0c9a88bf54de0fc021a6a08832d4ea313ed92a669d437/bcrypt-4.3.0-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:6fb1fd3ab08c0cbc6826a2e0447610c6f09e983a281b919ed721ad32236b8b23", size = 316726, upload-time = "2025-02-28T01:23:47.575Z" },
-    { url = "https://files.pythonhosted.org/packages/0c/a1/9898ea3faac0b156d457fd73a3cb9c2855c6fd063e44b8522925cdd8ce46/bcrypt-4.3.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:e965a9c1e9a393b8005031ff52583cedc15b7884fce7deb8b0346388837d6cfe", size = 343664, upload-time = "2025-02-28T01:23:49.059Z" },
-    { url = "https://files.pythonhosted.org/packages/40/f2/71b4ed65ce38982ecdda0ff20c3ad1b15e71949c78b2c053df53629ce940/bcrypt-4.3.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:79e70b8342a33b52b55d93b3a59223a844962bef479f6a0ea318ebbcadf71505", size = 363128, upload-time = "2025-02-28T01:23:50.399Z" },
-    { url = "https://files.pythonhosted.org/packages/11/99/12f6a58eca6dea4be992d6c681b7ec9410a1d9f5cf368c61437e31daa879/bcrypt-4.3.0-cp39-abi3-win32.whl", hash = "sha256:b4d4e57f0a63fd0b358eb765063ff661328f69a04494427265950c71b992a39a", size = 160598, upload-time = "2025-02-28T01:23:51.775Z" },
-    { url = "https://files.pythonhosted.org/packages/a9/cf/45fb5261ece3e6b9817d3d82b2f343a505fd58674a92577923bc500bd1aa/bcrypt-4.3.0-cp39-abi3-win_amd64.whl", hash = "sha256:e53e074b120f2877a35cc6c736b8eb161377caae8925c17688bd46ba56daaa5b", size = 152799, upload-time = "2025-02-28T01:23:53.139Z" },
+sdist = { url = "https://files.pythonhosted.org/packages/47/d7/5bbeb12c44d7c4f2fb5b56abce497eb5ed9f34d85701de869acedd602619/backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba", size = 17001, upload-time = "2022-10-05T19:19:32.061Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/df/73/b6e24bd22e6720ca8ee9a85a0c4a2971af8497d8f3193fa05390cbd46e09/backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8", size = 15148, upload-time = "2022-10-05T19:19:30.546Z" },
 ]
 
 [[package]]
@@ -316,6 +406,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/50/cd/30110dc0ffcf3b131156077b90e9f60ed75711223f306da4db08eff8403b/beautifulsoup4-4.13.4-py3-none-any.whl", hash = "sha256:9bbbb14bfde9d79f38b8cd5f8c7c85f4b8f2523190ebed90e950a8dea4cb1c4b", size = 187285, upload-time = "2025-04-15T17:05:12.221Z" },
 ]
 
+[[package]]
+name = "bitsandbytes"
+version = "0.45.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "numpy", marker = "(platform_machine != 'aarch64' and sys_platform != 'darwin') or sys_platform == 'win32'" },
+    { name = "torch", version = "2.8.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "(platform_machine != 'aarch64' and sys_platform != 'darwin') or sys_platform == 'win32'" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/07/b7/cb5ce4d1a382cf53c19ef06c5fc29e85f5e129b4da6527dd207d90a5b8ad/bitsandbytes-0.45.5-py3-none-manylinux_2_24_x86_64.whl", hash = "sha256:a5453f30cc6aab6ccaac364e6bf51a7808d3da5f71763dffeb6d9694c59136e4", size = 76059261, upload-time = "2025-04-07T13:32:52.573Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/4c/77b535e025ce780d2ada8271c1e481fb7337c1df2588a52fe1c9bd87d2e8/bitsandbytes-0.45.5-py3-none-win_amd64.whl", hash = "sha256:ed1c61b91d989d6a33fd05737d6edbf5086d8ebc89235ee632c7a19144085da2", size = 75430204, upload-time = "2025-04-07T13:32:57.553Z" },
+]
+
 [[package]]
 name = "blake3"
 version = "1.0.5"
@@ -385,12 +488,69 @@ wheels = [
 ]
 
 [[package]]
-name = "braceexpand"
-version = "0.1.7"
+name = "boto3"
+version = "1.40.38"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "botocore" },
+    { name = "jmespath" },
+    { name = "s3transfer" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/8d/c7/1442380ad7e211089a3c94b758ffb01079eab0183700fba9d5be417b5cb4/boto3-1.40.38.tar.gz", hash = "sha256:932ebdd8dbf8ab5694d233df86d5d0950291e0b146c27cb46da8adb4f00f6ca4", size = 111559, upload-time = "2025-09-24T19:23:25.7Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/06/a9/e7e5fe3fec60fb87bc9f8b3874c4c606e290a64b2ae8c157e08c3e69d755/boto3-1.40.38-py3-none-any.whl", hash = "sha256:fac337b4f0615e4d6ceee44686e662f51d8e57916ed2bc763468e3e8c611a658", size = 139345, upload-time = "2025-09-24T19:23:23.756Z" },
+]
+
+[[package]]
+name = "botocore"
+version = "1.40.38"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/54/93/badd4f5ccf25209f3fef2573073da9fe4a45a3da99fca2f800f942130c0f/braceexpand-0.1.7.tar.gz", hash = "sha256:e6e539bd20eaea53547472ff94f4fb5c3d3bf9d0a89388c4b56663aba765f705", size = 7777, upload-time = "2021-05-07T13:49:07.323Z" }
+dependencies = [
+    { name = "jmespath" },
+    { name = "python-dateutil" },
+    { name = "urllib3" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/d7/11/82a216e24f1af1ba5c3c358201fb9eba5e502242f504dd1f42eb18cbf2c5/botocore-1.40.38.tar.gz", hash = "sha256:18039009e1eca2bff12e576e8dd3c80cd9b312294f1469c831de03169582ad59", size = 14354395, upload-time = "2025-09-24T19:23:14.522Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/fa/93/e8c04e80e82391a6e51f218ca49720f64236bc824e92152a2633b74cf7ab/braceexpand-0.1.7-py2.py3-none-any.whl", hash = "sha256:91332d53de7828103dcae5773fb43bc34950b0c8160e35e0f44c4427a3b85014", size = 5923, upload-time = "2021-05-07T13:49:05.146Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/f0/ca5a00dd8fe3768ecff54756457dd0c69ed8e1cd09d0f7c21599477b5d5b/botocore-1.40.38-py3-none-any.whl", hash = "sha256:7d60a7557db3a58f9394e7ecec1f6b87495ce947eb713f29d53aee83a6e9dc71", size = 14025193, upload-time = "2025-09-24T19:23:11.093Z" },
+]
+
+[[package]]
+name = "brotli"
+version = "1.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/2f/c2/f9e977608bdf958650638c3f1e28f85a1b075f075ebbe77db8555463787b/Brotli-1.1.0.tar.gz", hash = "sha256:81de08ac11bcb85841e440c13611c00b67d3bf82698314928d0b676362546724", size = 7372270, upload-time = "2023-09-07T14:05:41.643Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5c/d0/5373ae13b93fe00095a58efcbce837fd470ca39f703a235d2a999baadfbc/Brotli-1.1.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:32d95b80260d79926f5fab3c41701dbb818fde1c9da590e77e571eefd14abe28", size = 815693, upload-time = "2024-10-18T12:32:23.824Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/48/f6e1cdf86751300c288c1459724bfa6917a80e30dbfc326f92cea5d3683a/Brotli-1.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b760c65308ff1e462f65d69c12e4ae085cff3b332d894637f6273a12a482d09f", size = 422489, upload-time = "2024-10-18T12:32:25.641Z" },
+    { url = "https://files.pythonhosted.org/packages/06/88/564958cedce636d0f1bed313381dfc4b4e3d3f6015a63dae6146e1b8c65c/Brotli-1.1.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:316cc9b17edf613ac76b1f1f305d2a748f1b976b033b049a6ecdfd5612c70409", size = 873081, upload-time = "2023-09-07T14:03:57.967Z" },
+    { url = "https://files.pythonhosted.org/packages/58/79/b7026a8bb65da9a6bb7d14329fd2bd48d2b7f86d7329d5cc8ddc6a90526f/Brotli-1.1.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:caf9ee9a5775f3111642d33b86237b05808dafcd6268faa492250e9b78046eb2", size = 446244, upload-time = "2023-09-07T14:03:59.319Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/18/c18c32ecea41b6c0004e15606e274006366fe19436b6adccc1ae7b2e50c2/Brotli-1.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:70051525001750221daa10907c77830bc889cb6d865cc0b813d9db7fefc21451", size = 2906505, upload-time = "2023-09-07T14:04:01.327Z" },
+    { url = "https://files.pythonhosted.org/packages/08/c8/69ec0496b1ada7569b62d85893d928e865df29b90736558d6c98c2031208/Brotli-1.1.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7f4bf76817c14aa98cc6697ac02f3972cb8c3da93e9ef16b9c66573a68014f91", size = 2944152, upload-time = "2023-09-07T14:04:03.033Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/fb/0517cea182219d6768113a38167ef6d4eb157a033178cc938033a552ed6d/Brotli-1.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d0c5516f0aed654134a2fc936325cc2e642f8a0e096d075209672eb321cff408", size = 2919252, upload-time = "2023-09-07T14:04:04.675Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/53/73a3431662e33ae61a5c80b1b9d2d18f58dfa910ae8dd696e57d39f1a2f5/Brotli-1.1.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6c3020404e0b5eefd7c9485ccf8393cfb75ec38ce75586e046573c9dc29967a0", size = 2845955, upload-time = "2023-09-07T14:04:06.585Z" },
+    { url = "https://files.pythonhosted.org/packages/55/ac/bd280708d9c5ebdbf9de01459e625a3e3803cce0784f47d633562cf40e83/Brotli-1.1.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:4ed11165dd45ce798d99a136808a794a748d5dc38511303239d4e2363c0695dc", size = 2914304, upload-time = "2023-09-07T14:04:08.668Z" },
+    { url = "https://files.pythonhosted.org/packages/76/58/5c391b41ecfc4527d2cc3350719b02e87cb424ef8ba2023fb662f9bf743c/Brotli-1.1.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:4093c631e96fdd49e0377a9c167bfd75b6d0bad2ace734c6eb20b348bc3ea180", size = 2814452, upload-time = "2023-09-07T14:04:10.736Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/4e/91b8256dfe99c407f174924b65a01f5305e303f486cc7a2e8a5d43c8bec3/Brotli-1.1.0-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:7e4c4629ddad63006efa0ef968c8e4751c5868ff0b1c5c40f76524e894c50248", size = 2938751, upload-time = "2023-09-07T14:04:12.875Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/a6/e2a39a5d3b412938362bbbeba5af904092bf3f95b867b4a3eb856104074e/Brotli-1.1.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:861bf317735688269936f755fa136a99d1ed526883859f86e41a5d43c61d8966", size = 2933757, upload-time = "2023-09-07T14:04:14.551Z" },
+    { url = "https://files.pythonhosted.org/packages/13/f0/358354786280a509482e0e77c1a5459e439766597d280f28cb097642fc26/Brotli-1.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:87a3044c3a35055527ac75e419dfa9f4f3667a1e887ee80360589eb8c90aabb9", size = 2936146, upload-time = "2024-10-18T12:32:27.257Z" },
+    { url = "https://files.pythonhosted.org/packages/80/f7/daf538c1060d3a88266b80ecc1d1c98b79553b3f117a485653f17070ea2a/Brotli-1.1.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:c5529b34c1c9d937168297f2c1fde7ebe9ebdd5e121297ff9c043bdb2ae3d6fb", size = 2848055, upload-time = "2024-10-18T12:32:29.376Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/cf/0eaa0585c4077d3c2d1edf322d8e97aabf317941d3a72d7b3ad8bce004b0/Brotli-1.1.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:ca63e1890ede90b2e4454f9a65135a4d387a4585ff8282bb72964fab893f2111", size = 3035102, upload-time = "2024-10-18T12:32:31.371Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/63/1c1585b2aa554fe6dbce30f0c18bdbc877fa9a1bf5ff17677d9cca0ac122/Brotli-1.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e79e6520141d792237c70bcd7a3b122d00f2613769ae0cb61c52e89fd3443839", size = 2930029, upload-time = "2024-10-18T12:32:33.293Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/3b/4e3fd1893eb3bbfef8e5a80d4508bec17a57bb92d586c85c12d28666bb13/Brotli-1.1.0-cp312-cp312-win32.whl", hash = "sha256:5f4d5ea15c9382135076d2fb28dde923352fe02951e66935a9efaac8f10e81b0", size = 333276, upload-time = "2023-09-07T14:04:16.49Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/d5/942051b45a9e883b5b6e98c041698b1eb2012d25e5948c58d6bf85b1bb43/Brotli-1.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:906bc3a79de8c4ae5b86d3d75a8b77e44404b0f4261714306e3ad248d8ab0951", size = 357255, upload-time = "2023-09-07T14:04:17.83Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/9f/fb37bb8ffc52a8da37b1c03c459a8cd55df7a57bdccd8831d500e994a0ca/Brotli-1.1.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8bf32b98b75c13ec7cf774164172683d6e7891088f6316e54425fde1efc276d5", size = 815681, upload-time = "2024-10-18T12:32:34.942Z" },
+    { url = "https://files.pythonhosted.org/packages/06/b3/dbd332a988586fefb0aa49c779f59f47cae76855c2d00f450364bb574cac/Brotli-1.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:7bc37c4d6b87fb1017ea28c9508b36bbcb0c3d18b4260fcdf08b200c74a6aee8", size = 422475, upload-time = "2024-10-18T12:32:36.485Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/80/6aaddc2f63dbcf2d93c2d204e49c11a9ec93a8c7c63261e2b4bd35198283/Brotli-1.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3c0ef38c7a7014ffac184db9e04debe495d317cc9c6fb10071f7fefd93100a4f", size = 2906173, upload-time = "2024-10-18T12:32:37.978Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/1d/e6ca79c96ff5b641df6097d299347507d39a9604bde8915e76bf026d6c77/Brotli-1.1.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:91d7cc2a76b5567591d12c01f019dd7afce6ba8cba6571187e21e2fc418ae648", size = 2943803, upload-time = "2024-10-18T12:32:39.606Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/a3/d98d2472e0130b7dd3acdbb7f390d478123dbf62b7d32bda5c830a96116d/Brotli-1.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a93dde851926f4f2678e704fadeb39e16c35d8baebd5252c9fd94ce8ce68c4a0", size = 2918946, upload-time = "2024-10-18T12:32:41.679Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/a5/c69e6d272aee3e1423ed005d8915a7eaa0384c7de503da987f2d224d0721/Brotli-1.1.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f0db75f47be8b8abc8d9e31bc7aad0547ca26f24a54e6fd10231d623f183d089", size = 2845707, upload-time = "2024-10-18T12:32:43.478Z" },
+    { url = "https://files.pythonhosted.org/packages/58/9f/4149d38b52725afa39067350696c09526de0125ebfbaab5acc5af28b42ea/Brotli-1.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6967ced6730aed543b8673008b5a391c3b1076d834ca438bbd70635c73775368", size = 2936231, upload-time = "2024-10-18T12:32:45.224Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/5a/145de884285611838a16bebfdb060c231c52b8f84dfbe52b852a15780386/Brotli-1.1.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:7eedaa5d036d9336c95915035fb57422054014ebdeb6f3b42eac809928e40d0c", size = 2848157, upload-time = "2024-10-18T12:32:46.894Z" },
+    { url = "https://files.pythonhosted.org/packages/50/ae/408b6bfb8525dadebd3b3dd5b19d631da4f7d46420321db44cd99dcf2f2c/Brotli-1.1.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:d487f5432bf35b60ed625d7e1b448e2dc855422e87469e3f450aa5552b0eb284", size = 3035122, upload-time = "2024-10-18T12:32:48.844Z" },
+    { url = "https://files.pythonhosted.org/packages/af/85/a94e5cfaa0ca449d8f91c3d6f78313ebf919a0dbd55a100c711c6e9655bc/Brotli-1.1.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:832436e59afb93e1836081a20f324cb185836c617659b07b129141a8426973c7", size = 2930206, upload-time = "2024-10-18T12:32:51.198Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/f0/a61d9262cd01351df22e57ad7c34f66794709acab13f34be2675f45bf89d/Brotli-1.1.0-cp313-cp313-win32.whl", hash = "sha256:43395e90523f9c23a3d5bdf004733246fba087f2948f87ab28015f12359ca6a0", size = 333804, upload-time = "2024-10-18T12:32:52.661Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/c1/ec214e9c94000d1c1974ec67ced1c970c148aa6b8d8373066123fc3dbf06/Brotli-1.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:9011560a466d2eb3f5a6e4929cf4a09be405c64154e12df0dd72713f6500e32b", size = 358517, upload-time = "2024-10-18T12:32:54.066Z" },
 ]
 
 [[package]]
@@ -403,54 +563,113 @@ wheels = [
 ]
 
 [[package]]
-name = "catalogue"
-version = "2.0.10"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/38/b4/244d58127e1cdf04cf2dc7d9566f0d24ef01d5ce21811bab088ecc62b5ea/catalogue-2.0.10.tar.gz", hash = "sha256:4f56daa940913d3f09d589c191c74e5a6d51762b3a9e37dd53b7437afd6cda15", size = 19561, upload-time = "2023-09-25T06:29:24.962Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/9e/96/d32b941a501ab566a16358d68b6eb4e4acc373fab3c3c4d7d9e649f7b4bb/catalogue-2.0.10-py3-none-any.whl", hash = "sha256:58c2de0020aa90f4a2da7dfad161bf7b3b054c86a5f09fcedc0b2b740c109a9f", size = 17325, upload-time = "2023-09-25T06:29:23.337Z" },
+name = "causal-conv1d"
+version = "1.5.0.post8"
+source = { git = "https://github.com/Dao-AILab/causal-conv1d?tag=v1.5.0.post8#82867a9d2e6907cc0f637ac6aff318f696838548" }
+dependencies = [
+    { name = "ninja" },
+    { name = "packaging" },
+    { name = "torch", version = "2.8.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
+    { name = "torch", version = "2.8.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin'" },
+]
+
+[[package]]
+name = "cbor2"
+version = "5.7.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/3a/89/01df16cdc9c60c07956756c90fe92c684021003079e358a78e213bce45a2/cbor2-5.7.0.tar.gz", hash = "sha256:3f6d843f4db4d0ec501c46453c22a4fbebb1abfb5b740e1bcab34c615cd7406b", size = 102374, upload-time = "2025-08-14T08:59:47.294Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b1/b1/d54c41b1bc71b8dea0bad3409d2a497df35f7b5ae5db70c1cc9ebc8d556d/cbor2-5.7.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7ad36f0537b75c1aa2c7a462cbdbeec5e8ba02802ea985e0b9fe5deee3b946f4", size = 69020, upload-time = "2025-08-14T08:59:02.276Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/e0/45368d5d78b520caaa9ca5a09f55365bc9933d43bce978a528922654ca9f/cbor2-5.7.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5fc9b335cf28e63d9eed4ae03d1e8f90f1a6b287cabc8d29bfddf73fa70643e9", size = 68950, upload-time = "2025-08-14T08:59:03.882Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/6a/9aed5b716407c1d48425ba55c6022a01a9abdbf58a691f50416461fa371d/cbor2-5.7.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:16bea83598a1eeedbd50c2e9fdf3685bae78ca9d9ec8cd8010777db14a315578", size = 285685, upload-time = "2025-08-14T08:59:05.165Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/6e/3499eaa2b858c7695a447b6311303f06ffc90fc2c45851337121661f1f5c/cbor2-5.7.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8e837825a16e60ace6e284095aa9fbe504bf87a8f4494bf7d95931e37fb01a70", size = 284948, upload-time = "2025-08-14T08:59:06.64Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/3e/ae67866ef65717665e0acf2873d466c5d4a1d965b0d0348f2269b73f28fb/cbor2-5.7.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:27396c5e275ff7c7cd87fe8aaadf781e6194903921f250934af7c86d5efec82e", size = 276375, upload-time = "2025-08-14T08:59:08.845Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/3d/2f8e9671111661dd571de206344ecd7706f6d458aab191e06834c89aa58e/cbor2-5.7.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c84bfef78c4e9c81eb0a10cec340222ba4e39498a63fc2e3d5f982a3f4efa4a7", size = 277680, upload-time = "2025-08-14T08:59:10.292Z" },
+    { url = "https://files.pythonhosted.org/packages/85/03/27a9fefa4e084c1129d7180727791a166629fdae39e0609508401d322626/cbor2-5.7.0-cp312-cp312-win_amd64.whl", hash = "sha256:f64270a24aaadb15dd31cbd64a98d99fca8e0398a65b1570ba07f3c259eb5516", size = 68354, upload-time = "2025-08-14T08:59:11.561Z" },
+    { url = "https://files.pythonhosted.org/packages/25/d9/b856d078696542a0d7486d1ece5c936e937bebe5b114674db18d76feb131/cbor2-5.7.0-cp312-cp312-win_arm64.whl", hash = "sha256:73ef321d7b580f08c9fadc41c3d2a218aa3f01e163be9793c6969aadee07f57a", size = 63896, upload-time = "2025-08-14T08:59:12.977Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/2f/25da2b08f7a3d7b3f72e678a373092619821ab706f3f720d29e567a426df/cbor2-5.7.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:7654e77b7f6be029fb37a074b175483a4a8ae3fe5e2a91008926625aa91aef2c", size = 69046, upload-time = "2025-08-14T08:59:14.123Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/b5/d324166a5a1feed61aeb32fed70182306796b67cedaf65c91671c8674ea2/cbor2-5.7.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9bd76624b090faa6900739025d798a4e3130da80dbae15391b42b3d4672a4022", size = 69061, upload-time = "2025-08-14T08:59:15.228Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/f9/180e953da537602d8530910f5a5f76c3d7215829d145d93f97fa43324dd7/cbor2-5.7.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:428d58b54a7b32ede869e79c294d686f826dcfdab9de7f92135dd3ce12e313b8", size = 284642, upload-time = "2025-08-14T08:59:16.511Z" },
+    { url = "https://files.pythonhosted.org/packages/17/eb/7d79831a5081d25002e36a1b2685210ae8783582d1a99fae350b2b1b899c/cbor2-5.7.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a91b6912e2ff64f33464f67ec6528cf2e26c06a5f3cc3fb1954f94aa58d68670", size = 283690, upload-time = "2025-08-14T08:59:17.989Z" },
+    { url = "https://files.pythonhosted.org/packages/38/43/1403610711ea6b9b957d86bd15fd0585a3917a3d9f8bafbb2cb1ad016361/cbor2-5.7.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9faeec4525fe3103a71f0fd3d6fe9a49ea6ff4ade8cb7cf1c395001b906a01e5", size = 276305, upload-time = "2025-08-14T08:59:19.43Z" },
+    { url = "https://files.pythonhosted.org/packages/77/06/df4a5c7c16df3b604bd560234aff686da443bf70a124c5e3f80dff954e5a/cbor2-5.7.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:361315ccd8022c44bb501877fd9b236479c975f1a7aed69c8541bd609c0a8908", size = 277416, upload-time = "2025-08-14T08:59:20.798Z" },
+    { url = "https://files.pythonhosted.org/packages/84/aa/62288bac4e501e25d04d50bb79ac46d4a6678ff9545941436a702c654eba/cbor2-5.7.0-cp313-cp313-win_amd64.whl", hash = "sha256:293c4a5d6a9a69fcecf595a47430dc3b11f4a3355089b1fe300d0ac48c5776c5", size = 68378, upload-time = "2025-08-14T08:59:22.227Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/d6/8358c144767731ffa03c16bb1222b59cb3be632833c70a2132cbe2ed8300/cbor2-5.7.0-cp313-cp313-win_arm64.whl", hash = "sha256:52d6e1a9b2f4475540063d7b966b1b2e93ac497e08ab9a1514fd6330f8db5b4c", size = 63966, upload-time = "2025-08-14T08:59:23.369Z" },
+    { url = "https://files.pythonhosted.org/packages/99/32/b653a2a3cfb283bdf0539dbd79d3bafa528aaa26fbe44796897d167e733d/cbor2-5.7.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:f4f0464425ff809b1dd737db8c65a937516aba5eb3794cb1433f7eb8eb7a6535", size = 68993, upload-time = "2025-08-14T08:59:24.497Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/90/79d38f7f645a33e44b87f9333f74c04d01006a11f5291d2e8686815fe731/cbor2-5.7.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:866d993ebc9c4e4018ab001503dafc4145bb6ec91e1eddf12b8d7b6898021201", size = 69248, upload-time = "2025-08-14T08:59:25.63Z" },
+    { url = "https://files.pythonhosted.org/packages/46/ca/59d65f12ef14c54c564f0e4363d9dd049a90d5b0e2a0dab0183062268a36/cbor2-5.7.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bc7a11433ea1c45b6d47484bef99e822fd8a40b4cfbcdc1e00378a7e8704e317", size = 283739, upload-time = "2025-08-14T08:59:26.856Z" },
+    { url = "https://files.pythonhosted.org/packages/19/51/5da8661b1aa7a4b7afe06724994b23eca6f7912d2cca705721dbd4aa764a/cbor2-5.7.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e33242570cb4542302dcb6cf429cc9abe315ff7ebb370de2828eed22a8b00fe8", size = 281246, upload-time = "2025-08-14T08:59:28.261Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/2f/565f5f215a9d4211c23e94c5b1761d697d248603ae11ecf83a9a70e99382/cbor2-5.7.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:855fe80517071028a5804a29b607864b8d90bbb2223331ab2d8cae94b979d61f", size = 275442, upload-time = "2025-08-14T08:59:29.794Z" },
+    { url = "https://files.pythonhosted.org/packages/84/11/307a558f6ddc3bd0fc539ac65696acb0253554c88bab5da7d459706eb20e/cbor2-5.7.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:87170260845c2ea3d74288f667e0bc81c8a6bbc72ff60265d19c59b3e76be266", size = 275372, upload-time = "2025-08-14T08:59:31.589Z" },
+    { url = "https://files.pythonhosted.org/packages/92/f0/960b7050a53b8d60f92e6e4c1ce670f9c50ab2ff48468e83b2bef0399b38/cbor2-5.7.0-cp314-cp314-win_amd64.whl", hash = "sha256:a2b591904555e51843c95776df2d6b161226af045e655f464c101d8ad8708e99", size = 70188, upload-time = "2025-08-14T08:59:32.827Z" },
+    { url = "https://files.pythonhosted.org/packages/a7/83/51805084b6208529f82e5a52261468a56b758728153ee2400c421fa845f4/cbor2-5.7.0-cp314-cp314-win_arm64.whl", hash = "sha256:4460164ffd0ceaf8cc3f5597e73dd99fd781541c7bba0ea64ac93043bf08bb6a", size = 66162, upload-time = "2025-08-14T08:59:34.35Z" },
+    { url = "https://files.pythonhosted.org/packages/41/cc/0ce73676d2a0c9e5a9330c301940c50eb325dacf5f6d9690fd43a8817fe9/cbor2-5.7.0-py3-none-any.whl", hash = "sha256:a871e7a6f7cba1ddb02503ea974f15f6524c95078fbfe0b860fd4193d7c8f27a", size = 23828, upload-time = "2025-08-14T08:59:46.129Z" },
 ]
 
 [[package]]
 name = "certifi"
-version = "2025.6.15"
+version = "2025.8.3"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/73/f7/f14b46d4bcd21092d7d3ccef689615220d8a08fb25e564b65d20738e672e/certifi-2025.6.15.tar.gz", hash = "sha256:d747aa5a8b9bbbb1bb8c22bb13e22bd1f18e9796defa16bab421f7f7a317323b", size = 158753, upload-time = "2025-06-15T02:45:51.329Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/dc/67/960ebe6bf230a96cda2e0abcf73af550ec4f090005363542f0765df162e0/certifi-2025.8.3.tar.gz", hash = "sha256:e564105f78ded564e3ae7c923924435e1daa7463faeab5bb932bc53ffae63407", size = 162386, upload-time = "2025-08-03T03:07:47.08Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/84/ae/320161bd181fc06471eed047ecce67b693fd7515b16d495d8932db763426/certifi-2025.6.15-py3-none-any.whl", hash = "sha256:2e0c7ce7cb5d8f8634ca55d2ba7e6ec2689a2fd6537d8dec1296a477a4910057", size = 157650, upload-time = "2025-06-15T02:45:49.977Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/48/1549795ba7742c948d2ad169c1c8cdbae65bc450d6cd753d124b17c8cd32/certifi-2025.8.3-py3-none-any.whl", hash = "sha256:f6c12493cfb1b06ba2ff328595af9350c65d6644968e5d3a2ffd78699af217a5", size = 161216, upload-time = "2025-08-03T03:07:45.777Z" },
 ]
 
 [[package]]
 name = "cffi"
-version = "1.17.1"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "pycparser" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/fc/97/c783634659c2920c3fc70419e3af40972dbaf758daa229a7d6ea6135c90d/cffi-1.17.1.tar.gz", hash = "sha256:1c39c6016c32bc48dd54561950ebd6836e1670f2ae46128f67cf49e789c52824", size = 516621, upload-time = "2024-09-04T20:45:21.852Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/5a/84/e94227139ee5fb4d600a7a4927f322e1d4aea6fdc50bd3fca8493caba23f/cffi-1.17.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:805b4371bf7197c329fcb3ead37e710d1bca9da5d583f5073b799d5c5bd1eee4", size = 183178, upload-time = "2024-09-04T20:44:12.232Z" },
-    { url = "https://files.pythonhosted.org/packages/da/ee/fb72c2b48656111c4ef27f0f91da355e130a923473bf5ee75c5643d00cca/cffi-1.17.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:733e99bc2df47476e3848417c5a4540522f234dfd4ef3ab7fafdf555b082ec0c", size = 178840, upload-time = "2024-09-04T20:44:13.739Z" },
-    { url = "https://files.pythonhosted.org/packages/cc/b6/db007700f67d151abadf508cbfd6a1884f57eab90b1bb985c4c8c02b0f28/cffi-1.17.1-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1257bdabf294dceb59f5e70c64a3e2f462c30c7ad68092d01bbbfb1c16b1ba36", size = 454803, upload-time = "2024-09-04T20:44:15.231Z" },
-    { url = "https://files.pythonhosted.org/packages/1a/df/f8d151540d8c200eb1c6fba8cd0dfd40904f1b0682ea705c36e6c2e97ab3/cffi-1.17.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da95af8214998d77a98cc14e3a3bd00aa191526343078b530ceb0bd710fb48a5", size = 478850, upload-time = "2024-09-04T20:44:17.188Z" },
-    { url = "https://files.pythonhosted.org/packages/28/c0/b31116332a547fd2677ae5b78a2ef662dfc8023d67f41b2a83f7c2aa78b1/cffi-1.17.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d63afe322132c194cf832bfec0dc69a99fb9bb6bbd550f161a49e9e855cc78ff", size = 485729, upload-time = "2024-09-04T20:44:18.688Z" },
-    { url = "https://files.pythonhosted.org/packages/91/2b/9a1ddfa5c7f13cab007a2c9cc295b70fbbda7cb10a286aa6810338e60ea1/cffi-1.17.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f79fc4fc25f1c8698ff97788206bb3c2598949bfe0fef03d299eb1b5356ada99", size = 471256, upload-time = "2024-09-04T20:44:20.248Z" },
-    { url = "https://files.pythonhosted.org/packages/b2/d5/da47df7004cb17e4955df6a43d14b3b4ae77737dff8bf7f8f333196717bf/cffi-1.17.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b62ce867176a75d03a665bad002af8e6d54644fad99a3c70905c543130e39d93", size = 479424, upload-time = "2024-09-04T20:44:21.673Z" },
-    { url = "https://files.pythonhosted.org/packages/0b/ac/2a28bcf513e93a219c8a4e8e125534f4f6db03e3179ba1c45e949b76212c/cffi-1.17.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:386c8bf53c502fff58903061338ce4f4950cbdcb23e2902d86c0f722b786bbe3", size = 484568, upload-time = "2024-09-04T20:44:23.245Z" },
-    { url = "https://files.pythonhosted.org/packages/d4/38/ca8a4f639065f14ae0f1d9751e70447a261f1a30fa7547a828ae08142465/cffi-1.17.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4ceb10419a9adf4460ea14cfd6bc43d08701f0835e979bf821052f1805850fe8", size = 488736, upload-time = "2024-09-04T20:44:24.757Z" },
-    { url = "https://files.pythonhosted.org/packages/86/c5/28b2d6f799ec0bdecf44dced2ec5ed43e0eb63097b0f58c293583b406582/cffi-1.17.1-cp312-cp312-win32.whl", hash = "sha256:a08d7e755f8ed21095a310a693525137cfe756ce62d066e53f502a83dc550f65", size = 172448, upload-time = "2024-09-04T20:44:26.208Z" },
-    { url = "https://files.pythonhosted.org/packages/50/b9/db34c4755a7bd1cb2d1603ac3863f22bcecbd1ba29e5ee841a4bc510b294/cffi-1.17.1-cp312-cp312-win_amd64.whl", hash = "sha256:51392eae71afec0d0c8fb1a53b204dbb3bcabcb3c9b807eedf3e1e6ccf2de903", size = 181976, upload-time = "2024-09-04T20:44:27.578Z" },
-    { url = "https://files.pythonhosted.org/packages/8d/f8/dd6c246b148639254dad4d6803eb6a54e8c85c6e11ec9df2cffa87571dbe/cffi-1.17.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f3a2b4222ce6b60e2e8b337bb9596923045681d71e5a082783484d845390938e", size = 182989, upload-time = "2024-09-04T20:44:28.956Z" },
-    { url = "https://files.pythonhosted.org/packages/8b/f1/672d303ddf17c24fc83afd712316fda78dc6fce1cd53011b839483e1ecc8/cffi-1.17.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0984a4925a435b1da406122d4d7968dd861c1385afe3b45ba82b750f229811e2", size = 178802, upload-time = "2024-09-04T20:44:30.289Z" },
-    { url = "https://files.pythonhosted.org/packages/0e/2d/eab2e858a91fdff70533cab61dcff4a1f55ec60425832ddfdc9cd36bc8af/cffi-1.17.1-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d01b12eeeb4427d3110de311e1774046ad344f5b1a7403101878976ecd7a10f3", size = 454792, upload-time = "2024-09-04T20:44:32.01Z" },
-    { url = "https://files.pythonhosted.org/packages/75/b2/fbaec7c4455c604e29388d55599b99ebcc250a60050610fadde58932b7ee/cffi-1.17.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:706510fe141c86a69c8ddc029c7910003a17353970cff3b904ff0686a5927683", size = 478893, upload-time = "2024-09-04T20:44:33.606Z" },
-    { url = "https://files.pythonhosted.org/packages/4f/b7/6e4a2162178bf1935c336d4da8a9352cccab4d3a5d7914065490f08c0690/cffi-1.17.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:de55b766c7aa2e2a3092c51e0483d700341182f08e67c63630d5b6f200bb28e5", size = 485810, upload-time = "2024-09-04T20:44:35.191Z" },
-    { url = "https://files.pythonhosted.org/packages/c7/8a/1d0e4a9c26e54746dc08c2c6c037889124d4f59dffd853a659fa545f1b40/cffi-1.17.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c59d6e989d07460165cc5ad3c61f9fd8f1b4796eacbd81cee78957842b834af4", size = 471200, upload-time = "2024-09-04T20:44:36.743Z" },
-    { url = "https://files.pythonhosted.org/packages/26/9f/1aab65a6c0db35f43c4d1b4f580e8df53914310afc10ae0397d29d697af4/cffi-1.17.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd398dbc6773384a17fe0d3e7eeb8d1a21c2200473ee6806bb5e6a8e62bb73dd", size = 479447, upload-time = "2024-09-04T20:44:38.492Z" },
-    { url = "https://files.pythonhosted.org/packages/5f/e4/fb8b3dd8dc0e98edf1135ff067ae070bb32ef9d509d6cb0f538cd6f7483f/cffi-1.17.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:3edc8d958eb099c634dace3c7e16560ae474aa3803a5df240542b305d14e14ed", size = 484358, upload-time = "2024-09-04T20:44:40.046Z" },
-    { url = "https://files.pythonhosted.org/packages/f1/47/d7145bf2dc04684935d57d67dff9d6d795b2ba2796806bb109864be3a151/cffi-1.17.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:72e72408cad3d5419375fc87d289076ee319835bdfa2caad331e377589aebba9", size = 488469, upload-time = "2024-09-04T20:44:41.616Z" },
-    { url = "https://files.pythonhosted.org/packages/bf/ee/f94057fa6426481d663b88637a9a10e859e492c73d0384514a17d78ee205/cffi-1.17.1-cp313-cp313-win32.whl", hash = "sha256:e03eab0a8677fa80d646b5ddece1cbeaf556c313dcfac435ba11f107ba117b5d", size = 172475, upload-time = "2024-09-04T20:44:43.733Z" },
-    { url = "https://files.pythonhosted.org/packages/7c/fc/6a8cb64e5f0324877d503c854da15d76c1e50eb722e320b15345c4d0c6de/cffi-1.17.1-cp313-cp313-win_amd64.whl", hash = "sha256:f6a16c31041f09ead72d69f583767292f750d24913dadacf5756b966aacb3f1a", size = 182009, upload-time = "2024-09-04T20:44:45.309Z" },
+version = "2.0.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pycparser", marker = "implementation_name != 'PyPy'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/eb/56/b1ba7935a17738ae8453301356628e8147c79dbb825bcbc73dc7401f9846/cffi-2.0.0.tar.gz", hash = "sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529", size = 523588, upload-time = "2025-09-08T23:24:04.541Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ea/47/4f61023ea636104d4f16ab488e268b93008c3d0bb76893b1b31db1f96802/cffi-2.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6d02d6655b0e54f54c4ef0b94eb6be0607b70853c45ce98bd278dc7de718be5d", size = 185271, upload-time = "2025-09-08T23:22:44.795Z" },
+    { url = "https://files.pythonhosted.org/packages/df/a2/781b623f57358e360d62cdd7a8c681f074a71d445418a776eef0aadb4ab4/cffi-2.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8eca2a813c1cb7ad4fb74d368c2ffbbb4789d377ee5bb8df98373c2cc0dee76c", size = 181048, upload-time = "2025-09-08T23:22:45.938Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/df/a4f0fbd47331ceeba3d37c2e51e9dfc9722498becbeec2bd8bc856c9538a/cffi-2.0.0-cp312-cp312-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:21d1152871b019407d8ac3985f6775c079416c282e431a4da6afe7aefd2bccbe", size = 212529, upload-time = "2025-09-08T23:22:47.349Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/72/12b5f8d3865bf0f87cf1404d8c374e7487dcf097a1c91c436e72e6badd83/cffi-2.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b21e08af67b8a103c71a250401c78d5e0893beff75e28c53c98f4de42f774062", size = 220097, upload-time = "2025-09-08T23:22:48.677Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/95/7a135d52a50dfa7c882ab0ac17e8dc11cec9d55d2c18dda414c051c5e69e/cffi-2.0.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:1e3a615586f05fc4065a8b22b8152f0c1b00cdbc60596d187c2a74f9e3036e4e", size = 207983, upload-time = "2025-09-08T23:22:50.06Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/c8/15cb9ada8895957ea171c62dc78ff3e99159ee7adb13c0123c001a2546c1/cffi-2.0.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:81afed14892743bbe14dacb9e36d9e0e504cd204e0b165062c488942b9718037", size = 206519, upload-time = "2025-09-08T23:22:51.364Z" },
+    { url = "https://files.pythonhosted.org/packages/78/2d/7fa73dfa841b5ac06c7b8855cfc18622132e365f5b81d02230333ff26e9e/cffi-2.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3e17ed538242334bf70832644a32a7aae3d83b57567f9fd60a26257e992b79ba", size = 219572, upload-time = "2025-09-08T23:22:52.902Z" },
+    { url = "https://files.pythonhosted.org/packages/07/e0/267e57e387b4ca276b90f0434ff88b2c2241ad72b16d31836adddfd6031b/cffi-2.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3925dd22fa2b7699ed2617149842d2e6adde22b262fcbfada50e3d195e4b3a94", size = 222963, upload-time = "2025-09-08T23:22:54.518Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/75/1f2747525e06f53efbd878f4d03bac5b859cbc11c633d0fb81432d98a795/cffi-2.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2c8f814d84194c9ea681642fd164267891702542f028a15fc97d4674b6206187", size = 221361, upload-time = "2025-09-08T23:22:55.867Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/2b/2b6435f76bfeb6bbf055596976da087377ede68df465419d192acf00c437/cffi-2.0.0-cp312-cp312-win32.whl", hash = "sha256:da902562c3e9c550df360bfa53c035b2f241fed6d9aef119048073680ace4a18", size = 172932, upload-time = "2025-09-08T23:22:57.188Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/ed/13bd4418627013bec4ed6e54283b1959cf6db888048c7cf4b4c3b5b36002/cffi-2.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:da68248800ad6320861f129cd9c1bf96ca849a2771a59e0344e88681905916f5", size = 183557, upload-time = "2025-09-08T23:22:58.351Z" },
+    { url = "https://files.pythonhosted.org/packages/95/31/9f7f93ad2f8eff1dbc1c3656d7ca5bfd8fb52c9d786b4dcf19b2d02217fa/cffi-2.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:4671d9dd5ec934cb9a73e7ee9676f9362aba54f7f34910956b84d727b0d73fb6", size = 177762, upload-time = "2025-09-08T23:22:59.668Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/8d/a0a47a0c9e413a658623d014e91e74a50cdd2c423f7ccfd44086ef767f90/cffi-2.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:00bdf7acc5f795150faa6957054fbbca2439db2f775ce831222b66f192f03beb", size = 185230, upload-time = "2025-09-08T23:23:00.879Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/d2/a6c0296814556c68ee32009d9c2ad4f85f2707cdecfd7727951ec228005d/cffi-2.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:45d5e886156860dc35862657e1494b9bae8dfa63bf56796f2fb56e1679fc0bca", size = 181043, upload-time = "2025-09-08T23:23:02.231Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/1e/d22cc63332bd59b06481ceaac49d6c507598642e2230f201649058a7e704/cffi-2.0.0-cp313-cp313-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:07b271772c100085dd28b74fa0cd81c8fb1a3ba18b21e03d7c27f3436a10606b", size = 212446, upload-time = "2025-09-08T23:23:03.472Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/f5/a2c23eb03b61a0b8747f211eb716446c826ad66818ddc7810cc2cc19b3f2/cffi-2.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d48a880098c96020b02d5a1f7d9251308510ce8858940e6fa99ece33f610838b", size = 220101, upload-time = "2025-09-08T23:23:04.792Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/7f/e6647792fc5850d634695bc0e6ab4111ae88e89981d35ac269956605feba/cffi-2.0.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f93fd8e5c8c0a4aa1f424d6173f14a892044054871c771f8566e4008eaa359d2", size = 207948, upload-time = "2025-09-08T23:23:06.127Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/1e/a5a1bd6f1fb30f22573f76533de12a00bf274abcdc55c8edab639078abb6/cffi-2.0.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:dd4f05f54a52fb558f1ba9f528228066954fee3ebe629fc1660d874d040ae5a3", size = 206422, upload-time = "2025-09-08T23:23:07.753Z" },
+    { url = "https://files.pythonhosted.org/packages/98/df/0a1755e750013a2081e863e7cd37e0cdd02664372c754e5560099eb7aa44/cffi-2.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c8d3b5532fc71b7a77c09192b4a5a200ea992702734a2e9279a37f2478236f26", size = 219499, upload-time = "2025-09-08T23:23:09.648Z" },
+    { url = "https://files.pythonhosted.org/packages/50/e1/a969e687fcf9ea58e6e2a928ad5e2dd88cc12f6f0ab477e9971f2309b57c/cffi-2.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d9b29c1f0ae438d5ee9acb31cadee00a58c46cc9c0b2f9038c6b0b3470877a8c", size = 222928, upload-time = "2025-09-08T23:23:10.928Z" },
+    { url = "https://files.pythonhosted.org/packages/36/54/0362578dd2c9e557a28ac77698ed67323ed5b9775ca9d3fe73fe191bb5d8/cffi-2.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6d50360be4546678fc1b79ffe7a66265e28667840010348dd69a314145807a1b", size = 221302, upload-time = "2025-09-08T23:23:12.42Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/6d/bf9bda840d5f1dfdbf0feca87fbdb64a918a69bca42cfa0ba7b137c48cb8/cffi-2.0.0-cp313-cp313-win32.whl", hash = "sha256:74a03b9698e198d47562765773b4a8309919089150a0bb17d829ad7b44b60d27", size = 172909, upload-time = "2025-09-08T23:23:14.32Z" },
+    { url = "https://files.pythonhosted.org/packages/37/18/6519e1ee6f5a1e579e04b9ddb6f1676c17368a7aba48299c3759bbc3c8b3/cffi-2.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:19f705ada2530c1167abacb171925dd886168931e0a7b78f5bffcae5c6b5be75", size = 183402, upload-time = "2025-09-08T23:23:15.535Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/0e/02ceeec9a7d6ee63bb596121c2c8e9b3a9e150936f4fbef6ca1943e6137c/cffi-2.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:256f80b80ca3853f90c21b23ee78cd008713787b1b1e93eae9f3d6a7134abd91", size = 177780, upload-time = "2025-09-08T23:23:16.761Z" },
+    { url = "https://files.pythonhosted.org/packages/92/c4/3ce07396253a83250ee98564f8d7e9789fab8e58858f35d07a9a2c78de9f/cffi-2.0.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:fc33c5141b55ed366cfaad382df24fe7dcbc686de5be719b207bb248e3053dc5", size = 185320, upload-time = "2025-09-08T23:23:18.087Z" },
+    { url = "https://files.pythonhosted.org/packages/59/dd/27e9fa567a23931c838c6b02d0764611c62290062a6d4e8ff7863daf9730/cffi-2.0.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c654de545946e0db659b3400168c9ad31b5d29593291482c43e3564effbcee13", size = 181487, upload-time = "2025-09-08T23:23:19.622Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/43/0e822876f87ea8a4ef95442c3d766a06a51fc5298823f884ef87aaad168c/cffi-2.0.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:24b6f81f1983e6df8db3adc38562c83f7d4a0c36162885ec7f7b77c7dcbec97b", size = 220049, upload-time = "2025-09-08T23:23:20.853Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/89/76799151d9c2d2d1ead63c2429da9ea9d7aac304603de0c6e8764e6e8e70/cffi-2.0.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:12873ca6cb9b0f0d3a0da705d6086fe911591737a59f28b7936bdfed27c0d47c", size = 207793, upload-time = "2025-09-08T23:23:22.08Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/dd/3465b14bb9e24ee24cb88c9e3730f6de63111fffe513492bf8c808a3547e/cffi-2.0.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:d9b97165e8aed9272a6bb17c01e3cc5871a594a446ebedc996e2397a1c1ea8ef", size = 206300, upload-time = "2025-09-08T23:23:23.314Z" },
+    { url = "https://files.pythonhosted.org/packages/47/d9/d83e293854571c877a92da46fdec39158f8d7e68da75bf73581225d28e90/cffi-2.0.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:afb8db5439b81cf9c9d0c80404b60c3cc9c3add93e114dcae767f1477cb53775", size = 219244, upload-time = "2025-09-08T23:23:24.541Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/0f/1f177e3683aead2bb00f7679a16451d302c436b5cbf2505f0ea8146ef59e/cffi-2.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:737fe7d37e1a1bffe70bd5754ea763a62a066dc5913ca57e957824b72a85e205", size = 222828, upload-time = "2025-09-08T23:23:26.143Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/0f/cafacebd4b040e3119dcb32fed8bdef8dfe94da653155f9d0b9dc660166e/cffi-2.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:38100abb9d1b1435bc4cc340bb4489635dc2f0da7456590877030c9b3d40b0c1", size = 220926, upload-time = "2025-09-08T23:23:27.873Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/aa/df335faa45b395396fcbc03de2dfcab242cd61a9900e914fe682a59170b1/cffi-2.0.0-cp314-cp314-win32.whl", hash = "sha256:087067fa8953339c723661eda6b54bc98c5625757ea62e95eb4898ad5e776e9f", size = 175328, upload-time = "2025-09-08T23:23:44.61Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/92/882c2d30831744296ce713f0feb4c1cd30f346ef747b530b5318715cc367/cffi-2.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:203a48d1fb583fc7d78a4c6655692963b860a417c0528492a6bc21f1aaefab25", size = 185650, upload-time = "2025-09-08T23:23:45.848Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/2c/98ece204b9d35a7366b5b2c6539c350313ca13932143e79dc133ba757104/cffi-2.0.0-cp314-cp314-win_arm64.whl", hash = "sha256:dbd5c7a25a7cb98f5ca55d258b103a2054f859a46ae11aaf23134f9cc0d356ad", size = 180687, upload-time = "2025-09-08T23:23:47.105Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/61/c768e4d548bfa607abcda77423448df8c471f25dbe64fb2ef6d555eae006/cffi-2.0.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:9a67fc9e8eb39039280526379fb3a70023d77caec1852002b4da7e8b270c4dd9", size = 188773, upload-time = "2025-09-08T23:23:29.347Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/ea/5f76bce7cf6fcd0ab1a1058b5af899bfbef198bea4d5686da88471ea0336/cffi-2.0.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7a66c7204d8869299919db4d5069a82f1561581af12b11b3c9f48c584eb8743d", size = 185013, upload-time = "2025-09-08T23:23:30.63Z" },
+    { url = "https://files.pythonhosted.org/packages/be/b4/c56878d0d1755cf9caa54ba71e5d049479c52f9e4afc230f06822162ab2f/cffi-2.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7cc09976e8b56f8cebd752f7113ad07752461f48a58cbba644139015ac24954c", size = 221593, upload-time = "2025-09-08T23:23:31.91Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/0d/eb704606dfe8033e7128df5e90fee946bbcb64a04fcdaa97321309004000/cffi-2.0.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:92b68146a71df78564e4ef48af17551a5ddd142e5190cdf2c5624d0c3ff5b2e8", size = 209354, upload-time = "2025-09-08T23:23:33.214Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/19/3c435d727b368ca475fb8742ab97c9cb13a0de600ce86f62eab7fa3eea60/cffi-2.0.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:b1e74d11748e7e98e2f426ab176d4ed720a64412b6a15054378afdb71e0f37dc", size = 208480, upload-time = "2025-09-08T23:23:34.495Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/44/681604464ed9541673e486521497406fadcc15b5217c3e326b061696899a/cffi-2.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:28a3a209b96630bca57cce802da70c266eb08c6e97e5afd61a75611ee6c64592", size = 221584, upload-time = "2025-09-08T23:23:36.096Z" },
+    { url = "https://files.pythonhosted.org/packages/25/8e/342a504ff018a2825d395d44d63a767dd8ebc927ebda557fecdaca3ac33a/cffi-2.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:7553fb2090d71822f02c629afe6042c299edf91ba1bf94951165613553984512", size = 224443, upload-time = "2025-09-08T23:23:37.328Z" },
+    { url = "https://files.pythonhosted.org/packages/e1/5e/b666bacbbc60fbf415ba9988324a132c9a7a0448a9a8f125074671c0f2c3/cffi-2.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:6c6c373cfc5c83a975506110d17457138c8c63016b563cc9ed6e056a82f13ce4", size = 223437, upload-time = "2025-09-08T23:23:38.945Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/1d/ec1a60bd1a10daa292d3cd6bb0b359a81607154fb8165f3ec95fe003b85c/cffi-2.0.0-cp314-cp314t-win32.whl", hash = "sha256:1fc9ea04857caf665289b7a75923f2c6ed559b8298a1b8c49e59f7dd95c8481e", size = 180487, upload-time = "2025-09-08T23:23:40.423Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/41/4c1168c74fac325c0c8156f04b6749c8b6a8f405bbf91413ba088359f60d/cffi-2.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:d68b6cef7827e8641e8ef16f4494edda8b36104d79773a334beaa1e3521430f6", size = 191726, upload-time = "2025-09-08T23:23:41.742Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/3a/dbeec9d1ee0844c679f6bb5d6ad4e9f198b1224f4e7a32825f47f6192b0c/cffi-2.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:0a1527a803f0a659de1af2e1fd700213caba79377e27e4693648c2923da066f9", size = 184195, upload-time = "2025-09-08T23:23:43.004Z" },
 ]
 
 [[package]]
@@ -464,37 +683,44 @@ wheels = [
 
 [[package]]
 name = "charset-normalizer"
-version = "3.4.2"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/e4/33/89c2ced2b67d1c2a61c19c6751aa8902d46ce3dacb23600a283619f5a12d/charset_normalizer-3.4.2.tar.gz", hash = "sha256:5baececa9ecba31eff645232d59845c07aa030f0c81ee70184a90d35099a0e63", size = 126367, upload-time = "2025-05-02T08:34:42.01Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/d7/a4/37f4d6035c89cac7930395a35cc0f1b872e652eaafb76a6075943754f095/charset_normalizer-3.4.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0c29de6a1a95f24b9a1aa7aefd27d2487263f00dfd55a77719b530788f75cff7", size = 199936, upload-time = "2025-05-02T08:32:33.712Z" },
-    { url = "https://files.pythonhosted.org/packages/ee/8a/1a5e33b73e0d9287274f899d967907cd0bf9c343e651755d9307e0dbf2b3/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cddf7bd982eaa998934a91f69d182aec997c6c468898efe6679af88283b498d3", size = 143790, upload-time = "2025-05-02T08:32:35.768Z" },
-    { url = "https://files.pythonhosted.org/packages/66/52/59521f1d8e6ab1482164fa21409c5ef44da3e9f653c13ba71becdd98dec3/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fcbe676a55d7445b22c10967bceaaf0ee69407fbe0ece4d032b6eb8d4565982a", size = 153924, upload-time = "2025-05-02T08:32:37.284Z" },
-    { url = "https://files.pythonhosted.org/packages/86/2d/fb55fdf41964ec782febbf33cb64be480a6b8f16ded2dbe8db27a405c09f/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d41c4d287cfc69060fa91cae9683eacffad989f1a10811995fa309df656ec214", size = 146626, upload-time = "2025-05-02T08:32:38.803Z" },
-    { url = "https://files.pythonhosted.org/packages/8c/73/6ede2ec59bce19b3edf4209d70004253ec5f4e319f9a2e3f2f15601ed5f7/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e594135de17ab3866138f496755f302b72157d115086d100c3f19370839dd3a", size = 148567, upload-time = "2025-05-02T08:32:40.251Z" },
-    { url = "https://files.pythonhosted.org/packages/09/14/957d03c6dc343c04904530b6bef4e5efae5ec7d7990a7cbb868e4595ee30/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cf713fe9a71ef6fd5adf7a79670135081cd4431c2943864757f0fa3a65b1fafd", size = 150957, upload-time = "2025-05-02T08:32:41.705Z" },
-    { url = "https://files.pythonhosted.org/packages/0d/c8/8174d0e5c10ccebdcb1b53cc959591c4c722a3ad92461a273e86b9f5a302/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a370b3e078e418187da8c3674eddb9d983ec09445c99a3a263c2011993522981", size = 145408, upload-time = "2025-05-02T08:32:43.709Z" },
-    { url = "https://files.pythonhosted.org/packages/58/aa/8904b84bc8084ac19dc52feb4f5952c6df03ffb460a887b42615ee1382e8/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a955b438e62efdf7e0b7b52a64dc5c3396e2634baa62471768a64bc2adb73d5c", size = 153399, upload-time = "2025-05-02T08:32:46.197Z" },
-    { url = "https://files.pythonhosted.org/packages/c2/26/89ee1f0e264d201cb65cf054aca6038c03b1a0c6b4ae998070392a3ce605/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:7222ffd5e4de8e57e03ce2cef95a4c43c98fcb72ad86909abdfc2c17d227fc1b", size = 156815, upload-time = "2025-05-02T08:32:48.105Z" },
-    { url = "https://files.pythonhosted.org/packages/fd/07/68e95b4b345bad3dbbd3a8681737b4338ff2c9df29856a6d6d23ac4c73cb/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:bee093bf902e1d8fc0ac143c88902c3dfc8941f7ea1d6a8dd2bcb786d33db03d", size = 154537, upload-time = "2025-05-02T08:32:49.719Z" },
-    { url = "https://files.pythonhosted.org/packages/77/1a/5eefc0ce04affb98af07bc05f3bac9094513c0e23b0562d64af46a06aae4/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:dedb8adb91d11846ee08bec4c8236c8549ac721c245678282dcb06b221aab59f", size = 149565, upload-time = "2025-05-02T08:32:51.404Z" },
-    { url = "https://files.pythonhosted.org/packages/37/a0/2410e5e6032a174c95e0806b1a6585eb21e12f445ebe239fac441995226a/charset_normalizer-3.4.2-cp312-cp312-win32.whl", hash = "sha256:db4c7bf0e07fc3b7d89ac2a5880a6a8062056801b83ff56d8464b70f65482b6c", size = 98357, upload-time = "2025-05-02T08:32:53.079Z" },
-    { url = "https://files.pythonhosted.org/packages/6c/4f/c02d5c493967af3eda9c771ad4d2bbc8df6f99ddbeb37ceea6e8716a32bc/charset_normalizer-3.4.2-cp312-cp312-win_amd64.whl", hash = "sha256:5a9979887252a82fefd3d3ed2a8e3b937a7a809f65dcb1e068b090e165bbe99e", size = 105776, upload-time = "2025-05-02T08:32:54.573Z" },
-    { url = "https://files.pythonhosted.org/packages/ea/12/a93df3366ed32db1d907d7593a94f1fe6293903e3e92967bebd6950ed12c/charset_normalizer-3.4.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:926ca93accd5d36ccdabd803392ddc3e03e6d4cd1cf17deff3b989ab8e9dbcf0", size = 199622, upload-time = "2025-05-02T08:32:56.363Z" },
-    { url = "https://files.pythonhosted.org/packages/04/93/bf204e6f344c39d9937d3c13c8cd5bbfc266472e51fc8c07cb7f64fcd2de/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eba9904b0f38a143592d9fc0e19e2df0fa2e41c3c3745554761c5f6447eedabf", size = 143435, upload-time = "2025-05-02T08:32:58.551Z" },
-    { url = "https://files.pythonhosted.org/packages/22/2a/ea8a2095b0bafa6c5b5a55ffdc2f924455233ee7b91c69b7edfcc9e02284/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3fddb7e2c84ac87ac3a947cb4e66d143ca5863ef48e4a5ecb83bd48619e4634e", size = 153653, upload-time = "2025-05-02T08:33:00.342Z" },
-    { url = "https://files.pythonhosted.org/packages/b6/57/1b090ff183d13cef485dfbe272e2fe57622a76694061353c59da52c9a659/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:98f862da73774290f251b9df8d11161b6cf25b599a66baf087c1ffe340e9bfd1", size = 146231, upload-time = "2025-05-02T08:33:02.081Z" },
-    { url = "https://files.pythonhosted.org/packages/e2/28/ffc026b26f441fc67bd21ab7f03b313ab3fe46714a14b516f931abe1a2d8/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c9379d65defcab82d07b2a9dfbfc2e95bc8fe0ebb1b176a3190230a3ef0e07c", size = 148243, upload-time = "2025-05-02T08:33:04.063Z" },
-    { url = "https://files.pythonhosted.org/packages/c0/0f/9abe9bd191629c33e69e47c6ef45ef99773320e9ad8e9cb08b8ab4a8d4cb/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e635b87f01ebc977342e2697d05b56632f5f879a4f15955dfe8cef2448b51691", size = 150442, upload-time = "2025-05-02T08:33:06.418Z" },
-    { url = "https://files.pythonhosted.org/packages/67/7c/a123bbcedca91d5916c056407f89a7f5e8fdfce12ba825d7d6b9954a1a3c/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1c95a1e2902a8b722868587c0e1184ad5c55631de5afc0eb96bc4b0d738092c0", size = 145147, upload-time = "2025-05-02T08:33:08.183Z" },
-    { url = "https://files.pythonhosted.org/packages/ec/fe/1ac556fa4899d967b83e9893788e86b6af4d83e4726511eaaad035e36595/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ef8de666d6179b009dce7bcb2ad4c4a779f113f12caf8dc77f0162c29d20490b", size = 153057, upload-time = "2025-05-02T08:33:09.986Z" },
-    { url = "https://files.pythonhosted.org/packages/2b/ff/acfc0b0a70b19e3e54febdd5301a98b72fa07635e56f24f60502e954c461/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:32fc0341d72e0f73f80acb0a2c94216bd704f4f0bce10aedea38f30502b271ff", size = 156454, upload-time = "2025-05-02T08:33:11.814Z" },
-    { url = "https://files.pythonhosted.org/packages/92/08/95b458ce9c740d0645feb0e96cea1f5ec946ea9c580a94adfe0b617f3573/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:289200a18fa698949d2b39c671c2cc7a24d44096784e76614899a7ccf2574b7b", size = 154174, upload-time = "2025-05-02T08:33:13.707Z" },
-    { url = "https://files.pythonhosted.org/packages/78/be/8392efc43487ac051eee6c36d5fbd63032d78f7728cb37aebcc98191f1ff/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4a476b06fbcf359ad25d34a057b7219281286ae2477cc5ff5e3f70a246971148", size = 149166, upload-time = "2025-05-02T08:33:15.458Z" },
-    { url = "https://files.pythonhosted.org/packages/44/96/392abd49b094d30b91d9fbda6a69519e95802250b777841cf3bda8fe136c/charset_normalizer-3.4.2-cp313-cp313-win32.whl", hash = "sha256:aaeeb6a479c7667fbe1099af9617c83aaca22182d6cf8c53966491a0f1b7ffb7", size = 98064, upload-time = "2025-05-02T08:33:17.06Z" },
-    { url = "https://files.pythonhosted.org/packages/e9/b0/0200da600134e001d91851ddc797809e2fe0ea72de90e09bec5a2fbdaccb/charset_normalizer-3.4.2-cp313-cp313-win_amd64.whl", hash = "sha256:aa6af9e7d59f9c12b33ae4e9450619cf2488e2bbe9b44030905877f0b2324980", size = 105641, upload-time = "2025-05-02T08:33:18.753Z" },
-    { url = "https://files.pythonhosted.org/packages/20/94/c5790835a017658cbfabd07f3bfb549140c3ac458cfc196323996b10095a/charset_normalizer-3.4.2-py3-none-any.whl", hash = "sha256:7f56930ab0abd1c45cd15be65cc741c28b1c9a34876ce8c17a2fa107810c0af0", size = 52626, upload-time = "2025-05-02T08:34:40.053Z" },
+version = "3.4.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/83/2d/5fd176ceb9b2fc619e63405525573493ca23441330fcdaee6bef9460e924/charset_normalizer-3.4.3.tar.gz", hash = "sha256:6fce4b8500244f6fcb71465d4a4930d132ba9ab8e71a7859e6a5d59851068d14", size = 122371, upload-time = "2025-08-09T07:57:28.46Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e9/5e/14c94999e418d9b87682734589404a25854d5f5d0408df68bc15b6ff54bb/charset_normalizer-3.4.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:e28e334d3ff134e88989d90ba04b47d84382a828c061d0d1027b1b12a62b39b1", size = 205655, upload-time = "2025-08-09T07:56:08.475Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/a8/c6ec5d389672521f644505a257f50544c074cf5fc292d5390331cd6fc9c3/charset_normalizer-3.4.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0cacf8f7297b0c4fcb74227692ca46b4a5852f8f4f24b3c766dd94a1075c4884", size = 146223, upload-time = "2025-08-09T07:56:09.708Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/eb/a2ffb08547f4e1e5415fb69eb7db25932c52a52bed371429648db4d84fb1/charset_normalizer-3.4.3-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c6fd51128a41297f5409deab284fecbe5305ebd7e5a1f959bee1c054622b7018", size = 159366, upload-time = "2025-08-09T07:56:11.326Z" },
+    { url = "https://files.pythonhosted.org/packages/82/10/0fd19f20c624b278dddaf83b8464dcddc2456cb4b02bb902a6da126b87a1/charset_normalizer-3.4.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3cfb2aad70f2c6debfbcb717f23b7eb55febc0bb23dcffc0f076009da10c6392", size = 157104, upload-time = "2025-08-09T07:56:13.014Z" },
+    { url = "https://files.pythonhosted.org/packages/16/ab/0233c3231af734f5dfcf0844aa9582d5a1466c985bbed6cedab85af9bfe3/charset_normalizer-3.4.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1606f4a55c0fd363d754049cdf400175ee96c992b1f8018b993941f221221c5f", size = 151830, upload-time = "2025-08-09T07:56:14.428Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/02/e29e22b4e02839a0e4a06557b1999d0a47db3567e82989b5bb21f3fbbd9f/charset_normalizer-3.4.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:027b776c26d38b7f15b26a5da1044f376455fb3766df8fc38563b4efbc515154", size = 148854, upload-time = "2025-08-09T07:56:16.051Z" },
+    { url = "https://files.pythonhosted.org/packages/05/6b/e2539a0a4be302b481e8cafb5af8792da8093b486885a1ae4d15d452bcec/charset_normalizer-3.4.3-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:42e5088973e56e31e4fa58eb6bd709e42fc03799c11c42929592889a2e54c491", size = 160670, upload-time = "2025-08-09T07:56:17.314Z" },
+    { url = "https://files.pythonhosted.org/packages/31/e7/883ee5676a2ef217a40ce0bffcc3d0dfbf9e64cbcfbdf822c52981c3304b/charset_normalizer-3.4.3-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:cc34f233c9e71701040d772aa7490318673aa7164a0efe3172b2981218c26d93", size = 158501, upload-time = "2025-08-09T07:56:18.641Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/35/6525b21aa0db614cf8b5792d232021dca3df7f90a1944db934efa5d20bb1/charset_normalizer-3.4.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:320e8e66157cc4e247d9ddca8e21f427efc7a04bbd0ac8a9faf56583fa543f9f", size = 153173, upload-time = "2025-08-09T07:56:20.289Z" },
+    { url = "https://files.pythonhosted.org/packages/50/ee/f4704bad8201de513fdc8aac1cabc87e38c5818c93857140e06e772b5892/charset_normalizer-3.4.3-cp312-cp312-win32.whl", hash = "sha256:fb6fecfd65564f208cbf0fba07f107fb661bcd1a7c389edbced3f7a493f70e37", size = 99822, upload-time = "2025-08-09T07:56:21.551Z" },
+    { url = "https://files.pythonhosted.org/packages/39/f5/3b3836ca6064d0992c58c7561c6b6eee1b3892e9665d650c803bd5614522/charset_normalizer-3.4.3-cp312-cp312-win_amd64.whl", hash = "sha256:86df271bf921c2ee3818f0522e9a5b8092ca2ad8b065ece5d7d9d0e9f4849bcc", size = 107543, upload-time = "2025-08-09T07:56:23.115Z" },
+    { url = "https://files.pythonhosted.org/packages/65/ca/2135ac97709b400c7654b4b764daf5c5567c2da45a30cdd20f9eefe2d658/charset_normalizer-3.4.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:14c2a87c65b351109f6abfc424cab3927b3bdece6f706e4d12faaf3d52ee5efe", size = 205326, upload-time = "2025-08-09T07:56:24.721Z" },
+    { url = "https://files.pythonhosted.org/packages/71/11/98a04c3c97dd34e49c7d247083af03645ca3730809a5509443f3c37f7c99/charset_normalizer-3.4.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:41d1fc408ff5fdfb910200ec0e74abc40387bccb3252f3f27c0676731df2b2c8", size = 146008, upload-time = "2025-08-09T07:56:26.004Z" },
+    { url = "https://files.pythonhosted.org/packages/60/f5/4659a4cb3c4ec146bec80c32d8bb16033752574c20b1252ee842a95d1a1e/charset_normalizer-3.4.3-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:1bb60174149316da1c35fa5233681f7c0f9f514509b8e399ab70fea5f17e45c9", size = 159196, upload-time = "2025-08-09T07:56:27.25Z" },
+    { url = "https://files.pythonhosted.org/packages/86/9e/f552f7a00611f168b9a5865a1414179b2c6de8235a4fa40189f6f79a1753/charset_normalizer-3.4.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:30d006f98569de3459c2fc1f2acde170b7b2bd265dc1943e87e1a4efe1b67c31", size = 156819, upload-time = "2025-08-09T07:56:28.515Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/95/42aa2156235cbc8fa61208aded06ef46111c4d3f0de233107b3f38631803/charset_normalizer-3.4.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:416175faf02e4b0810f1f38bcb54682878a4af94059a1cd63b8747244420801f", size = 151350, upload-time = "2025-08-09T07:56:29.716Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/a9/3865b02c56f300a6f94fc631ef54f0a8a29da74fb45a773dfd3dcd380af7/charset_normalizer-3.4.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6aab0f181c486f973bc7262a97f5aca3ee7e1437011ef0c2ec04b5a11d16c927", size = 148644, upload-time = "2025-08-09T07:56:30.984Z" },
+    { url = "https://files.pythonhosted.org/packages/77/d9/cbcf1a2a5c7d7856f11e7ac2d782aec12bdfea60d104e60e0aa1c97849dc/charset_normalizer-3.4.3-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:fdabf8315679312cfa71302f9bd509ded4f2f263fb5b765cf1433b39106c3cc9", size = 160468, upload-time = "2025-08-09T07:56:32.252Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/42/6f45efee8697b89fda4d50580f292b8f7f9306cb2971d4b53f8914e4d890/charset_normalizer-3.4.3-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:bd28b817ea8c70215401f657edef3a8aa83c29d447fb0b622c35403780ba11d5", size = 158187, upload-time = "2025-08-09T07:56:33.481Z" },
+    { url = "https://files.pythonhosted.org/packages/70/99/f1c3bdcfaa9c45b3ce96f70b14f070411366fa19549c1d4832c935d8e2c3/charset_normalizer-3.4.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:18343b2d246dc6761a249ba1fb13f9ee9a2bcd95decc767319506056ea4ad4dc", size = 152699, upload-time = "2025-08-09T07:56:34.739Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/ad/b0081f2f99a4b194bcbb1934ef3b12aa4d9702ced80a37026b7607c72e58/charset_normalizer-3.4.3-cp313-cp313-win32.whl", hash = "sha256:6fb70de56f1859a3f71261cbe41005f56a7842cc348d3aeb26237560bfa5e0ce", size = 99580, upload-time = "2025-08-09T07:56:35.981Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/8f/ae790790c7b64f925e5c953b924aaa42a243fb778fed9e41f147b2a5715a/charset_normalizer-3.4.3-cp313-cp313-win_amd64.whl", hash = "sha256:cf1ebb7d78e1ad8ec2a8c4732c7be2e736f6e5123a4146c5b89c9d1f585f8cef", size = 107366, upload-time = "2025-08-09T07:56:37.339Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/91/b5a06ad970ddc7a0e513112d40113e834638f4ca1120eb727a249fb2715e/charset_normalizer-3.4.3-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:3cd35b7e8aedeb9e34c41385fda4f73ba609e561faedfae0a9e75e44ac558a15", size = 204342, upload-time = "2025-08-09T07:56:38.687Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/ec/1edc30a377f0a02689342f214455c3f6c2fbedd896a1d2f856c002fc3062/charset_normalizer-3.4.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b89bc04de1d83006373429975f8ef9e7932534b8cc9ca582e4db7d20d91816db", size = 145995, upload-time = "2025-08-09T07:56:40.048Z" },
+    { url = "https://files.pythonhosted.org/packages/17/e5/5e67ab85e6d22b04641acb5399c8684f4d37caf7558a53859f0283a650e9/charset_normalizer-3.4.3-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2001a39612b241dae17b4687898843f254f8748b796a2e16f1051a17078d991d", size = 158640, upload-time = "2025-08-09T07:56:41.311Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/e5/38421987f6c697ee3722981289d554957c4be652f963d71c5e46a262e135/charset_normalizer-3.4.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8dcfc373f888e4fb39a7bc57e93e3b845e7f462dacc008d9749568b1c4ece096", size = 156636, upload-time = "2025-08-09T07:56:43.195Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/e4/5a075de8daa3ec0745a9a3b54467e0c2967daaaf2cec04c845f73493e9a1/charset_normalizer-3.4.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:18b97b8404387b96cdbd30ad660f6407799126d26a39ca65729162fd810a99aa", size = 150939, upload-time = "2025-08-09T07:56:44.819Z" },
+    { url = "https://files.pythonhosted.org/packages/02/f7/3611b32318b30974131db62b4043f335861d4d9b49adc6d57c1149cc49d4/charset_normalizer-3.4.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ccf600859c183d70eb47e05a44cd80a4ce77394d1ac0f79dbd2dd90a69a3a049", size = 148580, upload-time = "2025-08-09T07:56:46.684Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/61/19b36f4bd67f2793ab6a99b979b4e4f3d8fc754cbdffb805335df4337126/charset_normalizer-3.4.3-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:53cd68b185d98dde4ad8990e56a58dea83a4162161b1ea9272e5c9182ce415e0", size = 159870, upload-time = "2025-08-09T07:56:47.941Z" },
+    { url = "https://files.pythonhosted.org/packages/06/57/84722eefdd338c04cf3030ada66889298eaedf3e7a30a624201e0cbe424a/charset_normalizer-3.4.3-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:30a96e1e1f865f78b030d65241c1ee850cdf422d869e9028e2fc1d5e4db73b92", size = 157797, upload-time = "2025-08-09T07:56:49.756Z" },
+    { url = "https://files.pythonhosted.org/packages/72/2a/aff5dd112b2f14bcc3462c312dce5445806bfc8ab3a7328555da95330e4b/charset_normalizer-3.4.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d716a916938e03231e86e43782ca7878fb602a125a91e7acb8b5112e2e96ac16", size = 152224, upload-time = "2025-08-09T07:56:51.369Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/8c/9839225320046ed279c6e839d51f028342eb77c91c89b8ef2549f951f3ec/charset_normalizer-3.4.3-cp314-cp314-win32.whl", hash = "sha256:c6dbd0ccdda3a2ba7c2ecd9d77b37f3b5831687d8dc1b6ca5f56a4880cc7b7ce", size = 100086, upload-time = "2025-08-09T07:56:52.722Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/7a/36fbcf646e41f710ce0a563c1c9a343c6edf9be80786edeb15b6f62e17db/charset_normalizer-3.4.3-cp314-cp314-win_amd64.whl", hash = "sha256:73dc19b562516fc9bcf6e5d6e596df0b4eb98d87e4f79f3ae71840e6ed21361c", size = 107400, upload-time = "2025-08-09T07:56:55.172Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/1f/f041989e93b001bc4e44bb1669ccdcf54d3f00e628229a85b08d330615c5/charset_normalizer-3.4.3-py3-none-any.whl", hash = "sha256:ce571ab16d890d23b5c278547ba694193a45011ff86a9162a71307ed9f86759a", size = 53175, upload-time = "2025-08-09T07:57:26.864Z" },
 ]
 
 [[package]]
@@ -538,111 +764,160 @@ wheels = [
 
 [[package]]
 name = "colorful"
-version = "0.5.6"
+version = "0.5.7"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "colorama", marker = "sys_platform == 'win32'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/fa/5f/38e40c3bc4107c39e4062d943026b8ee25154cb4b185b882f274a1ab65da/colorful-0.5.6.tar.gz", hash = "sha256:b56d5c01db1dac4898308ea889edcb113fbee3e6ec5df4bacffd61d5241b5b8d", size = 209280, upload-time = "2024-01-07T19:38:54.904Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/0c/0c/d180ebf230b771907f46981023a80f62cf592d49673cc5f8a5993aa67bb6/colorful-0.5.7.tar.gz", hash = "sha256:c5452179b56601c178b03d468a5326cc1fe37d9be81d24d0d6bdab36c4b93ad8", size = 209487, upload-time = "2025-06-30T15:24:03.936Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/b3/61/39e7db0cb326c9c8f6a49fad4fc9c2f1241f05a4e10f0643fc31ce26a7e0/colorful-0.5.6-py2.py3-none-any.whl", hash = "sha256:eab8c1c809f5025ad2b5238a50bd691e26850da8cac8f90d660ede6ea1af9f1e", size = 201369, upload-time = "2024-01-07T19:38:53.29Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/98/0d791b3d1eaed89d7d370b5cf9b8079b124da0545559417f394ba21b5532/colorful-0.5.7-py2.py3-none-any.whl", hash = "sha256:495dd3a23151a9568cee8a90fc1174c902ad7ef06655f50b6bddf9e80008da69", size = 201475, upload-time = "2025-06-30T15:24:02.693Z" },
 ]
 
 [[package]]
 name = "compressed-tensors"
-version = "0.9.4"
+version = "0.11.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
+    { name = "frozendict" },
     { name = "pydantic" },
-    { name = "torch" },
+    { name = "torch", version = "2.8.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
+    { name = "torch", version = "2.8.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin'" },
     { name = "transformers" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/af/cb/1de205e8018cb21a4dc995324652115cf209357de5aaed8d6af101d26b42/compressed_tensors-0.9.4.tar.gz", hash = "sha256:34779417ffa31a207adb0cc4fd2a86cb75e239e504fb2068e494092f4b5703b7", size = 111223, upload-time = "2025-04-24T19:19:07.826Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/b8/99/3fdabfc95609d6efdf02fa7f1ed0245524cb1209d3d4a17109d3205d2eed/compressed_tensors-0.11.0.tar.gz", hash = "sha256:95ddf19699f775df6494dd864e5f52e8a24f8015496520190c1a22c6cfc44b1f", size = 187566, upload-time = "2025-08-19T18:59:31.854Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/d8/98/bf09fd8196e0b658e7b48404ed1b60544b5111f80731d76b378e3d8765bb/compressed_tensors-0.9.4-py3-none-any.whl", hash = "sha256:b12e3616f06243a074f61b736596882c6549cdc3669ac48434102a4a88e8002a", size = 100345, upload-time = "2025-04-24T19:19:05.415Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/81/e3073017a8f5c75169e79108eda209e6089e3f96c9f197d307cbda7df71c/compressed_tensors-0.11.0-py3-none-any.whl", hash = "sha256:e1cbc46e1ae032b7ceea915fe18c8d2de5a54d3a50a607969b6bdfe703b6cb83", size = 179951, upload-time = "2025-08-19T18:59:29.308Z" },
 ]
 
 [[package]]
 name = "contourpy"
-version = "1.3.2"
+version = "1.3.3"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "numpy" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/66/54/eb9bfc647b19f2009dd5c7f5ec51c4e6ca831725f1aea7a993034f483147/contourpy-1.3.2.tar.gz", hash = "sha256:b6945942715a034c671b7fc54f9588126b0b8bf23db2696e3ca8328f3ff0ab54", size = 13466130, upload-time = "2025-04-15T17:47:53.79Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/34/f7/44785876384eff370c251d58fd65f6ad7f39adce4a093c934d4a67a7c6b6/contourpy-1.3.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4caf2bcd2969402bf77edc4cb6034c7dd7c0803213b3523f111eb7460a51b8d2", size = 271580, upload-time = "2025-04-15T17:37:03.105Z" },
-    { url = "https://files.pythonhosted.org/packages/93/3b/0004767622a9826ea3d95f0e9d98cd8729015768075d61f9fea8eeca42a8/contourpy-1.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:82199cb78276249796419fe36b7386bd8d2cc3f28b3bc19fe2454fe2e26c4c15", size = 255530, upload-time = "2025-04-15T17:37:07.026Z" },
-    { url = "https://files.pythonhosted.org/packages/e7/bb/7bd49e1f4fa805772d9fd130e0d375554ebc771ed7172f48dfcd4ca61549/contourpy-1.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:106fab697af11456fcba3e352ad50effe493a90f893fca6c2ca5c033820cea92", size = 307688, upload-time = "2025-04-15T17:37:11.481Z" },
-    { url = "https://files.pythonhosted.org/packages/fc/97/e1d5dbbfa170725ef78357a9a0edc996b09ae4af170927ba8ce977e60a5f/contourpy-1.3.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d14f12932a8d620e307f715857107b1d1845cc44fdb5da2bc8e850f5ceba9f87", size = 347331, upload-time = "2025-04-15T17:37:18.212Z" },
-    { url = "https://files.pythonhosted.org/packages/6f/66/e69e6e904f5ecf6901be3dd16e7e54d41b6ec6ae3405a535286d4418ffb4/contourpy-1.3.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:532fd26e715560721bb0d5fc7610fce279b3699b018600ab999d1be895b09415", size = 318963, upload-time = "2025-04-15T17:37:22.76Z" },
-    { url = "https://files.pythonhosted.org/packages/a8/32/b8a1c8965e4f72482ff2d1ac2cd670ce0b542f203c8e1d34e7c3e6925da7/contourpy-1.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f26b383144cf2d2c29f01a1e8170f50dacf0eac02d64139dcd709a8ac4eb3cfe", size = 323681, upload-time = "2025-04-15T17:37:33.001Z" },
-    { url = "https://files.pythonhosted.org/packages/30/c6/12a7e6811d08757c7162a541ca4c5c6a34c0f4e98ef2b338791093518e40/contourpy-1.3.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c49f73e61f1f774650a55d221803b101d966ca0c5a2d6d5e4320ec3997489441", size = 1308674, upload-time = "2025-04-15T17:37:48.64Z" },
-    { url = "https://files.pythonhosted.org/packages/2a/8a/bebe5a3f68b484d3a2b8ffaf84704b3e343ef1addea528132ef148e22b3b/contourpy-1.3.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3d80b2c0300583228ac98d0a927a1ba6a2ba6b8a742463c564f1d419ee5b211e", size = 1380480, upload-time = "2025-04-15T17:38:06.7Z" },
-    { url = "https://files.pythonhosted.org/packages/34/db/fcd325f19b5978fb509a7d55e06d99f5f856294c1991097534360b307cf1/contourpy-1.3.2-cp312-cp312-win32.whl", hash = "sha256:90df94c89a91b7362e1142cbee7568f86514412ab8a2c0d0fca72d7e91b62912", size = 178489, upload-time = "2025-04-15T17:38:10.338Z" },
-    { url = "https://files.pythonhosted.org/packages/01/c8/fadd0b92ffa7b5eb5949bf340a63a4a496a6930a6c37a7ba0f12acb076d6/contourpy-1.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:8c942a01d9163e2e5cfb05cb66110121b8d07ad438a17f9e766317bcb62abf73", size = 223042, upload-time = "2025-04-15T17:38:14.239Z" },
-    { url = "https://files.pythonhosted.org/packages/2e/61/5673f7e364b31e4e7ef6f61a4b5121c5f170f941895912f773d95270f3a2/contourpy-1.3.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:de39db2604ae755316cb5967728f4bea92685884b1e767b7c24e983ef5f771cb", size = 271630, upload-time = "2025-04-15T17:38:19.142Z" },
-    { url = "https://files.pythonhosted.org/packages/ff/66/a40badddd1223822c95798c55292844b7e871e50f6bfd9f158cb25e0bd39/contourpy-1.3.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3f9e896f447c5c8618f1edb2bafa9a4030f22a575ec418ad70611450720b5b08", size = 255670, upload-time = "2025-04-15T17:38:23.688Z" },
-    { url = "https://files.pythonhosted.org/packages/1e/c7/cf9fdee8200805c9bc3b148f49cb9482a4e3ea2719e772602a425c9b09f8/contourpy-1.3.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:71e2bd4a1c4188f5c2b8d274da78faab884b59df20df63c34f74aa1813c4427c", size = 306694, upload-time = "2025-04-15T17:38:28.238Z" },
-    { url = "https://files.pythonhosted.org/packages/dd/e7/ccb9bec80e1ba121efbffad7f38021021cda5be87532ec16fd96533bb2e0/contourpy-1.3.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:de425af81b6cea33101ae95ece1f696af39446db9682a0b56daaa48cfc29f38f", size = 345986, upload-time = "2025-04-15T17:38:33.502Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/49/ca13bb2da90391fa4219fdb23b078d6065ada886658ac7818e5441448b78/contourpy-1.3.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:977e98a0e0480d3fe292246417239d2d45435904afd6d7332d8455981c408b85", size = 318060, upload-time = "2025-04-15T17:38:38.672Z" },
-    { url = "https://files.pythonhosted.org/packages/c8/65/5245ce8c548a8422236c13ffcdcdada6a2a812c361e9e0c70548bb40b661/contourpy-1.3.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:434f0adf84911c924519d2b08fc10491dd282b20bdd3fa8f60fd816ea0b48841", size = 322747, upload-time = "2025-04-15T17:38:43.712Z" },
-    { url = "https://files.pythonhosted.org/packages/72/30/669b8eb48e0a01c660ead3752a25b44fdb2e5ebc13a55782f639170772f9/contourpy-1.3.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c66c4906cdbc50e9cba65978823e6e00b45682eb09adbb78c9775b74eb222422", size = 1308895, upload-time = "2025-04-15T17:39:00.224Z" },
-    { url = "https://files.pythonhosted.org/packages/05/5a/b569f4250decee6e8d54498be7bdf29021a4c256e77fe8138c8319ef8eb3/contourpy-1.3.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8b7fc0cd78ba2f4695fd0a6ad81a19e7e3ab825c31b577f384aa9d7817dc3bef", size = 1379098, upload-time = "2025-04-15T17:43:29.649Z" },
-    { url = "https://files.pythonhosted.org/packages/19/ba/b227c3886d120e60e41b28740ac3617b2f2b971b9f601c835661194579f1/contourpy-1.3.2-cp313-cp313-win32.whl", hash = "sha256:15ce6ab60957ca74cff444fe66d9045c1fd3e92c8936894ebd1f3eef2fff075f", size = 178535, upload-time = "2025-04-15T17:44:44.532Z" },
-    { url = "https://files.pythonhosted.org/packages/12/6e/2fed56cd47ca739b43e892707ae9a13790a486a3173be063681ca67d2262/contourpy-1.3.2-cp313-cp313-win_amd64.whl", hash = "sha256:e1578f7eafce927b168752ed7e22646dad6cd9bca673c60bff55889fa236ebf9", size = 223096, upload-time = "2025-04-15T17:44:48.194Z" },
-    { url = "https://files.pythonhosted.org/packages/54/4c/e76fe2a03014a7c767d79ea35c86a747e9325537a8b7627e0e5b3ba266b4/contourpy-1.3.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0475b1f6604896bc7c53bb070e355e9321e1bc0d381735421a2d2068ec56531f", size = 285090, upload-time = "2025-04-15T17:43:34.084Z" },
-    { url = "https://files.pythonhosted.org/packages/7b/e2/5aba47debd55d668e00baf9651b721e7733975dc9fc27264a62b0dd26eb8/contourpy-1.3.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:c85bb486e9be652314bb5b9e2e3b0d1b2e643d5eec4992c0fbe8ac71775da739", size = 268643, upload-time = "2025-04-15T17:43:38.626Z" },
-    { url = "https://files.pythonhosted.org/packages/a1/37/cd45f1f051fe6230f751cc5cdd2728bb3a203f5619510ef11e732109593c/contourpy-1.3.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:745b57db7758f3ffc05a10254edd3182a2a83402a89c00957a8e8a22f5582823", size = 310443, upload-time = "2025-04-15T17:43:44.522Z" },
-    { url = "https://files.pythonhosted.org/packages/8b/a2/36ea6140c306c9ff6dd38e3bcec80b3b018474ef4d17eb68ceecd26675f4/contourpy-1.3.2-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:970e9173dbd7eba9b4e01aab19215a48ee5dd3f43cef736eebde064a171f89a5", size = 349865, upload-time = "2025-04-15T17:43:49.545Z" },
-    { url = "https://files.pythonhosted.org/packages/95/b7/2fc76bc539693180488f7b6cc518da7acbbb9e3b931fd9280504128bf956/contourpy-1.3.2-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c6c4639a9c22230276b7bffb6a850dfc8258a2521305e1faefe804d006b2e532", size = 321162, upload-time = "2025-04-15T17:43:54.203Z" },
-    { url = "https://files.pythonhosted.org/packages/f4/10/76d4f778458b0aa83f96e59d65ece72a060bacb20cfbee46cf6cd5ceba41/contourpy-1.3.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cc829960f34ba36aad4302e78eabf3ef16a3a100863f0d4eeddf30e8a485a03b", size = 327355, upload-time = "2025-04-15T17:44:01.025Z" },
-    { url = "https://files.pythonhosted.org/packages/43/a3/10cf483ea683f9f8ab096c24bad3cce20e0d1dd9a4baa0e2093c1c962d9d/contourpy-1.3.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:d32530b534e986374fc19eaa77fcb87e8a99e5431499949b828312bdcd20ac52", size = 1307935, upload-time = "2025-04-15T17:44:17.322Z" },
-    { url = "https://files.pythonhosted.org/packages/78/73/69dd9a024444489e22d86108e7b913f3528f56cfc312b5c5727a44188471/contourpy-1.3.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:e298e7e70cf4eb179cc1077be1c725b5fd131ebc81181bf0c03525c8abc297fd", size = 1372168, upload-time = "2025-04-15T17:44:33.43Z" },
-    { url = "https://files.pythonhosted.org/packages/0f/1b/96d586ccf1b1a9d2004dd519b25fbf104a11589abfd05484ff12199cca21/contourpy-1.3.2-cp313-cp313t-win32.whl", hash = "sha256:d0e589ae0d55204991450bb5c23f571c64fe43adaa53f93fc902a84c96f52fe1", size = 189550, upload-time = "2025-04-15T17:44:37.092Z" },
-    { url = "https://files.pythonhosted.org/packages/b0/e6/6000d0094e8a5e32ad62591c8609e269febb6e4db83a1c75ff8868b42731/contourpy-1.3.2-cp313-cp313t-win_amd64.whl", hash = "sha256:78e9253c3de756b3f6a5174d024c4835acd59eb3f8e2ca13e775dbffe1558f69", size = 238214, upload-time = "2025-04-15T17:44:40.827Z" },
+sdist = { url = "https://files.pythonhosted.org/packages/58/01/1253e6698a07380cd31a736d248a3f2a50a7c88779a1813da27503cadc2a/contourpy-1.3.3.tar.gz", hash = "sha256:083e12155b210502d0bca491432bb04d56dc3432f95a979b429f2848c3dbe880", size = 13466174, upload-time = "2025-07-26T12:03:12.549Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/be/45/adfee365d9ea3d853550b2e735f9d66366701c65db7855cd07621732ccfc/contourpy-1.3.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b08a32ea2f8e42cf1d4be3169a98dd4be32bafe4f22b6c4cb4ba810fa9e5d2cb", size = 293419, upload-time = "2025-07-26T12:01:21.16Z" },
+    { url = "https://files.pythonhosted.org/packages/53/3e/405b59cfa13021a56bba395a6b3aca8cec012b45bf177b0eaf7a202cde2c/contourpy-1.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:556dba8fb6f5d8742f2923fe9457dbdd51e1049c4a43fd3986a0b14a1d815fc6", size = 273979, upload-time = "2025-07-26T12:01:22.448Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/1c/a12359b9b2ca3a845e8f7f9ac08bdf776114eb931392fcad91743e2ea17b/contourpy-1.3.3-cp312-cp312-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:92d9abc807cf7d0e047b95ca5d957cf4792fcd04e920ca70d48add15c1a90ea7", size = 332653, upload-time = "2025-07-26T12:01:24.155Z" },
+    { url = "https://files.pythonhosted.org/packages/63/12/897aeebfb475b7748ea67b61e045accdfcf0d971f8a588b67108ed7f5512/contourpy-1.3.3-cp312-cp312-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b2e8faa0ed68cb29af51edd8e24798bb661eac3bd9f65420c1887b6ca89987c8", size = 379536, upload-time = "2025-07-26T12:01:25.91Z" },
+    { url = "https://files.pythonhosted.org/packages/43/8a/a8c584b82deb248930ce069e71576fc09bd7174bbd35183b7943fb1064fd/contourpy-1.3.3-cp312-cp312-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:626d60935cf668e70a5ce6ff184fd713e9683fb458898e4249b63be9e28286ea", size = 384397, upload-time = "2025-07-26T12:01:27.152Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/8f/ec6289987824b29529d0dfda0d74a07cec60e54b9c92f3c9da4c0ac732de/contourpy-1.3.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4d00e655fcef08aba35ec9610536bfe90267d7ab5ba944f7032549c55a146da1", size = 362601, upload-time = "2025-07-26T12:01:28.808Z" },
+    { url = "https://files.pythonhosted.org/packages/05/0a/a3fe3be3ee2dceb3e615ebb4df97ae6f3828aa915d3e10549ce016302bd1/contourpy-1.3.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:451e71b5a7d597379ef572de31eeb909a87246974d960049a9848c3bc6c41bf7", size = 1331288, upload-time = "2025-07-26T12:01:31.198Z" },
+    { url = "https://files.pythonhosted.org/packages/33/1d/acad9bd4e97f13f3e2b18a3977fe1b4a37ecf3d38d815333980c6c72e963/contourpy-1.3.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:459c1f020cd59fcfe6650180678a9993932d80d44ccde1fa1868977438f0b411", size = 1403386, upload-time = "2025-07-26T12:01:33.947Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/8f/5847f44a7fddf859704217a99a23a4f6417b10e5ab1256a179264561540e/contourpy-1.3.3-cp312-cp312-win32.whl", hash = "sha256:023b44101dfe49d7d53932be418477dba359649246075c996866106da069af69", size = 185018, upload-time = "2025-07-26T12:01:35.64Z" },
+    { url = "https://files.pythonhosted.org/packages/19/e8/6026ed58a64563186a9ee3f29f41261fd1828f527dd93d33b60feca63352/contourpy-1.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:8153b8bfc11e1e4d75bcb0bff1db232f9e10b274e0929de9d608027e0d34ff8b", size = 226567, upload-time = "2025-07-26T12:01:36.804Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/e2/f05240d2c39a1ed228d8328a78b6f44cd695f7ef47beb3e684cf93604f86/contourpy-1.3.3-cp312-cp312-win_arm64.whl", hash = "sha256:07ce5ed73ecdc4a03ffe3e1b3e3c1166db35ae7584be76f65dbbe28a7791b0cc", size = 193655, upload-time = "2025-07-26T12:01:37.999Z" },
+    { url = "https://files.pythonhosted.org/packages/68/35/0167aad910bbdb9599272bd96d01a9ec6852f36b9455cf2ca67bd4cc2d23/contourpy-1.3.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:177fb367556747a686509d6fef71d221a4b198a3905fe824430e5ea0fda54eb5", size = 293257, upload-time = "2025-07-26T12:01:39.367Z" },
+    { url = "https://files.pythonhosted.org/packages/96/e4/7adcd9c8362745b2210728f209bfbcf7d91ba868a2c5f40d8b58f54c509b/contourpy-1.3.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d002b6f00d73d69333dac9d0b8d5e84d9724ff9ef044fd63c5986e62b7c9e1b1", size = 274034, upload-time = "2025-07-26T12:01:40.645Z" },
+    { url = "https://files.pythonhosted.org/packages/73/23/90e31ceeed1de63058a02cb04b12f2de4b40e3bef5e082a7c18d9c8ae281/contourpy-1.3.3-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:348ac1f5d4f1d66d3322420f01d42e43122f43616e0f194fc1c9f5d830c5b286", size = 334672, upload-time = "2025-07-26T12:01:41.942Z" },
+    { url = "https://files.pythonhosted.org/packages/ed/93/b43d8acbe67392e659e1d984700e79eb67e2acb2bd7f62012b583a7f1b55/contourpy-1.3.3-cp313-cp313-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:655456777ff65c2c548b7c454af9c6f33f16c8884f11083244b5819cc214f1b5", size = 381234, upload-time = "2025-07-26T12:01:43.499Z" },
+    { url = "https://files.pythonhosted.org/packages/46/3b/bec82a3ea06f66711520f75a40c8fc0b113b2a75edb36aa633eb11c4f50f/contourpy-1.3.3-cp313-cp313-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:644a6853d15b2512d67881586bd03f462c7ab755db95f16f14d7e238f2852c67", size = 385169, upload-time = "2025-07-26T12:01:45.219Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/32/e0f13a1c5b0f8572d0ec6ae2f6c677b7991fafd95da523159c19eff0696a/contourpy-1.3.3-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4debd64f124ca62069f313a9cb86656ff087786016d76927ae2cf37846b006c9", size = 362859, upload-time = "2025-07-26T12:01:46.519Z" },
+    { url = "https://files.pythonhosted.org/packages/33/71/e2a7945b7de4e58af42d708a219f3b2f4cff7386e6b6ab0a0fa0033c49a9/contourpy-1.3.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a15459b0f4615b00bbd1e91f1b9e19b7e63aea7483d03d804186f278c0af2659", size = 1332062, upload-time = "2025-07-26T12:01:48.964Z" },
+    { url = "https://files.pythonhosted.org/packages/12/fc/4e87ac754220ccc0e807284f88e943d6d43b43843614f0a8afa469801db0/contourpy-1.3.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ca0fdcd73925568ca027e0b17ab07aad764be4706d0a925b89227e447d9737b7", size = 1403932, upload-time = "2025-07-26T12:01:51.979Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/2e/adc197a37443f934594112222ac1aa7dc9a98faf9c3842884df9a9d8751d/contourpy-1.3.3-cp313-cp313-win32.whl", hash = "sha256:b20c7c9a3bf701366556e1b1984ed2d0cedf999903c51311417cf5f591d8c78d", size = 185024, upload-time = "2025-07-26T12:01:53.245Z" },
+    { url = "https://files.pythonhosted.org/packages/18/0b/0098c214843213759692cc638fce7de5c289200a830e5035d1791d7a2338/contourpy-1.3.3-cp313-cp313-win_amd64.whl", hash = "sha256:1cadd8b8969f060ba45ed7c1b714fe69185812ab43bd6b86a9123fe8f99c3263", size = 226578, upload-time = "2025-07-26T12:01:54.422Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/9a/2f6024a0c5995243cd63afdeb3651c984f0d2bc727fd98066d40e141ad73/contourpy-1.3.3-cp313-cp313-win_arm64.whl", hash = "sha256:fd914713266421b7536de2bfa8181aa8c699432b6763a0ea64195ebe28bff6a9", size = 193524, upload-time = "2025-07-26T12:01:55.73Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/b3/f8a1a86bd3298513f500e5b1f5fd92b69896449f6cab6a146a5d52715479/contourpy-1.3.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:88df9880d507169449d434c293467418b9f6cbe82edd19284aa0409e7fdb933d", size = 306730, upload-time = "2025-07-26T12:01:57.051Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/11/4780db94ae62fc0c2053909b65dc3246bd7cecfc4f8a20d957ad43aa4ad8/contourpy-1.3.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:d06bb1f751ba5d417047db62bca3c8fde202b8c11fb50742ab3ab962c81e8216", size = 287897, upload-time = "2025-07-26T12:01:58.663Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/15/e59f5f3ffdd6f3d4daa3e47114c53daabcb18574a26c21f03dc9e4e42ff0/contourpy-1.3.3-cp313-cp313t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e4e6b05a45525357e382909a4c1600444e2a45b4795163d3b22669285591c1ae", size = 326751, upload-time = "2025-07-26T12:02:00.343Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/81/03b45cfad088e4770b1dcf72ea78d3802d04200009fb364d18a493857210/contourpy-1.3.3-cp313-cp313t-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ab3074b48c4e2cf1a960e6bbeb7f04566bf36b1861d5c9d4d8ac04b82e38ba20", size = 375486, upload-time = "2025-07-26T12:02:02.128Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/ba/49923366492ffbdd4486e970d421b289a670ae8cf539c1ea9a09822b371a/contourpy-1.3.3-cp313-cp313t-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:6c3d53c796f8647d6deb1abe867daeb66dcc8a97e8455efa729516b997b8ed99", size = 388106, upload-time = "2025-07-26T12:02:03.615Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/52/5b00ea89525f8f143651f9f03a0df371d3cbd2fccd21ca9b768c7a6500c2/contourpy-1.3.3-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:50ed930df7289ff2a8d7afeb9603f8289e5704755c7e5c3bbd929c90c817164b", size = 352548, upload-time = "2025-07-26T12:02:05.165Z" },
+    { url = "https://files.pythonhosted.org/packages/32/1d/a209ec1a3a3452d490f6b14dd92e72280c99ae3d1e73da74f8277d4ee08f/contourpy-1.3.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:4feffb6537d64b84877da813a5c30f1422ea5739566abf0bd18065ac040e120a", size = 1322297, upload-time = "2025-07-26T12:02:07.379Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/9e/46f0e8ebdd884ca0e8877e46a3f4e633f6c9c8c4f3f6e72be3fe075994aa/contourpy-1.3.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:2b7e9480ffe2b0cd2e787e4df64270e3a0440d9db8dc823312e2c940c167df7e", size = 1391023, upload-time = "2025-07-26T12:02:10.171Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/70/f308384a3ae9cd2209e0849f33c913f658d3326900d0ff5d378d6a1422d2/contourpy-1.3.3-cp313-cp313t-win32.whl", hash = "sha256:283edd842a01e3dcd435b1c5116798d661378d83d36d337b8dde1d16a5fc9ba3", size = 196157, upload-time = "2025-07-26T12:02:11.488Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/dd/880f890a6663b84d9e34a6f88cded89d78f0091e0045a284427cb6b18521/contourpy-1.3.3-cp313-cp313t-win_amd64.whl", hash = "sha256:87acf5963fc2b34825e5b6b048f40e3635dd547f590b04d2ab317c2619ef7ae8", size = 240570, upload-time = "2025-07-26T12:02:12.754Z" },
+    { url = "https://files.pythonhosted.org/packages/80/99/2adc7d8ffead633234817ef8e9a87115c8a11927a94478f6bb3d3f4d4f7d/contourpy-1.3.3-cp313-cp313t-win_arm64.whl", hash = "sha256:3c30273eb2a55024ff31ba7d052dde990d7d8e5450f4bbb6e913558b3d6c2301", size = 199713, upload-time = "2025-07-26T12:02:14.4Z" },
+    { url = "https://files.pythonhosted.org/packages/72/8b/4546f3ab60f78c514ffb7d01a0bd743f90de36f0019d1be84d0a708a580a/contourpy-1.3.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:fde6c716d51c04b1c25d0b90364d0be954624a0ee9d60e23e850e8d48353d07a", size = 292189, upload-time = "2025-07-26T12:02:16.095Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/e1/3542a9cb596cadd76fcef413f19c79216e002623158befe6daa03dbfa88c/contourpy-1.3.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:cbedb772ed74ff5be440fa8eee9bd49f64f6e3fc09436d9c7d8f1c287b121d77", size = 273251, upload-time = "2025-07-26T12:02:17.524Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/71/f93e1e9471d189f79d0ce2497007731c1e6bf9ef6d1d61b911430c3db4e5/contourpy-1.3.3-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:22e9b1bd7a9b1d652cd77388465dc358dafcd2e217d35552424aa4f996f524f5", size = 335810, upload-time = "2025-07-26T12:02:18.9Z" },
+    { url = "https://files.pythonhosted.org/packages/91/f9/e35f4c1c93f9275d4e38681a80506b5510e9327350c51f8d4a5a724d178c/contourpy-1.3.3-cp314-cp314-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a22738912262aa3e254e4f3cb079a95a67132fc5a063890e224393596902f5a4", size = 382871, upload-time = "2025-07-26T12:02:20.418Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/71/47b512f936f66a0a900d81c396a7e60d73419868fba959c61efed7a8ab46/contourpy-1.3.3-cp314-cp314-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:afe5a512f31ee6bd7d0dda52ec9864c984ca3d66664444f2d72e0dc4eb832e36", size = 386264, upload-time = "2025-07-26T12:02:21.916Z" },
+    { url = "https://files.pythonhosted.org/packages/04/5f/9ff93450ba96b09c7c2b3f81c94de31c89f92292f1380261bd7195bea4ea/contourpy-1.3.3-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f64836de09927cba6f79dcd00fdd7d5329f3fccc633468507079c829ca4db4e3", size = 363819, upload-time = "2025-07-26T12:02:23.759Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/a6/0b185d4cc480ee494945cde102cb0149ae830b5fa17bf855b95f2e70ad13/contourpy-1.3.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:1fd43c3be4c8e5fd6e4f2baeae35ae18176cf2e5cced681cca908addf1cdd53b", size = 1333650, upload-time = "2025-07-26T12:02:26.181Z" },
+    { url = "https://files.pythonhosted.org/packages/43/d7/afdc95580ca56f30fbcd3060250f66cedbde69b4547028863abd8aa3b47e/contourpy-1.3.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:6afc576f7b33cf00996e5c1102dc2a8f7cc89e39c0b55df93a0b78c1bd992b36", size = 1404833, upload-time = "2025-07-26T12:02:28.782Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/e2/366af18a6d386f41132a48f033cbd2102e9b0cf6345d35ff0826cd984566/contourpy-1.3.3-cp314-cp314-win32.whl", hash = "sha256:66c8a43a4f7b8df8b71ee1840e4211a3c8d93b214b213f590e18a1beca458f7d", size = 189692, upload-time = "2025-07-26T12:02:30.128Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/c2/57f54b03d0f22d4044b8afb9ca0e184f8b1afd57b4f735c2fa70883dc601/contourpy-1.3.3-cp314-cp314-win_amd64.whl", hash = "sha256:cf9022ef053f2694e31d630feaacb21ea24224be1c3ad0520b13d844274614fd", size = 232424, upload-time = "2025-07-26T12:02:31.395Z" },
+    { url = "https://files.pythonhosted.org/packages/18/79/a9416650df9b525737ab521aa181ccc42d56016d2123ddcb7b58e926a42c/contourpy-1.3.3-cp314-cp314-win_arm64.whl", hash = "sha256:95b181891b4c71de4bb404c6621e7e2390745f887f2a026b2d99e92c17892339", size = 198300, upload-time = "2025-07-26T12:02:32.956Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/42/38c159a7d0f2b7b9c04c64ab317042bb6952b713ba875c1681529a2932fe/contourpy-1.3.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:33c82d0138c0a062380332c861387650c82e4cf1747aaa6938b9b6516762e772", size = 306769, upload-time = "2025-07-26T12:02:34.2Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/6c/26a8205f24bca10974e77460de68d3d7c63e282e23782f1239f226fcae6f/contourpy-1.3.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:ea37e7b45949df430fe649e5de8351c423430046a2af20b1c1961cae3afcda77", size = 287892, upload-time = "2025-07-26T12:02:35.807Z" },
+    { url = "https://files.pythonhosted.org/packages/66/06/8a475c8ab718ebfd7925661747dbb3c3ee9c82ac834ccb3570be49d129f4/contourpy-1.3.3-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d304906ecc71672e9c89e87c4675dc5c2645e1f4269a5063b99b0bb29f232d13", size = 326748, upload-time = "2025-07-26T12:02:37.193Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/a3/c5ca9f010a44c223f098fccd8b158bb1cb287378a31ac141f04730dc49be/contourpy-1.3.3-cp314-cp314t-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ca658cd1a680a5c9ea96dc61cdbae1e85c8f25849843aa799dfd3cb370ad4fbe", size = 375554, upload-time = "2025-07-26T12:02:38.894Z" },
+    { url = "https://files.pythonhosted.org/packages/80/5b/68bd33ae63fac658a4145088c1e894405e07584a316738710b636c6d0333/contourpy-1.3.3-cp314-cp314t-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ab2fd90904c503739a75b7c8c5c01160130ba67944a7b77bbf36ef8054576e7f", size = 388118, upload-time = "2025-07-26T12:02:40.642Z" },
+    { url = "https://files.pythonhosted.org/packages/40/52/4c285a6435940ae25d7410a6c36bda5145839bc3f0beb20c707cda18b9d2/contourpy-1.3.3-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b7301b89040075c30e5768810bc96a8e8d78085b47d8be6e4c3f5a0b4ed478a0", size = 352555, upload-time = "2025-07-26T12:02:42.25Z" },
+    { url = "https://files.pythonhosted.org/packages/24/ee/3e81e1dd174f5c7fefe50e85d0892de05ca4e26ef1c9a59c2a57e43b865a/contourpy-1.3.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:2a2a8b627d5cc6b7c41a4beff6c5ad5eb848c88255fda4a8745f7e901b32d8e4", size = 1322295, upload-time = "2025-07-26T12:02:44.668Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/b2/6d913d4d04e14379de429057cd169e5e00f6c2af3bb13e1710bcbdb5da12/contourpy-1.3.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:fd6ec6be509c787f1caf6b247f0b1ca598bef13f4ddeaa126b7658215529ba0f", size = 1391027, upload-time = "2025-07-26T12:02:47.09Z" },
+    { url = "https://files.pythonhosted.org/packages/93/8a/68a4ec5c55a2971213d29a9374913f7e9f18581945a7a31d1a39b5d2dfe5/contourpy-1.3.3-cp314-cp314t-win32.whl", hash = "sha256:e74a9a0f5e3fff48fb5a7f2fd2b9b70a3fe014a67522f79b7cca4c0c7e43c9ae", size = 202428, upload-time = "2025-07-26T12:02:48.691Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/96/fd9f641ffedc4fa3ace923af73b9d07e869496c9cc7a459103e6e978992f/contourpy-1.3.3-cp314-cp314t-win_amd64.whl", hash = "sha256:13b68d6a62db8eafaebb8039218921399baf6e47bf85006fd8529f2a08ef33fc", size = 250331, upload-time = "2025-07-26T12:02:50.137Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/8c/469afb6465b853afff216f9528ffda78a915ff880ed58813ba4faf4ba0b6/contourpy-1.3.3-cp314-cp314t-win_arm64.whl", hash = "sha256:b7448cb5a725bb1e35ce88771b86fba35ef418952474492cf7c764059933ff8b", size = 203831, upload-time = "2025-07-26T12:02:51.449Z" },
 ]
 
 [[package]]
 name = "coverage"
-version = "7.9.1"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/e7/e0/98670a80884f64578f0c22cd70c5e81a6e07b08167721c7487b4d70a7ca0/coverage-7.9.1.tar.gz", hash = "sha256:6cf43c78c4282708a28e466316935ec7489a9c487518a77fa68f716c67909cec", size = 813650, upload-time = "2025-06-13T13:02:28.627Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/68/d9/7f66eb0a8f2fce222de7bdc2046ec41cb31fe33fb55a330037833fb88afc/coverage-7.9.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a8de12b4b87c20de895f10567639c0797b621b22897b0af3ce4b4e204a743626", size = 212336, upload-time = "2025-06-13T13:01:10.909Z" },
-    { url = "https://files.pythonhosted.org/packages/20/20/e07cb920ef3addf20f052ee3d54906e57407b6aeee3227a9c91eea38a665/coverage-7.9.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5add197315a054e92cee1b5f686a2bcba60c4c3e66ee3de77ace6c867bdee7cb", size = 212571, upload-time = "2025-06-13T13:01:12.518Z" },
-    { url = "https://files.pythonhosted.org/packages/78/f8/96f155de7e9e248ca9c8ff1a40a521d944ba48bec65352da9be2463745bf/coverage-7.9.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:600a1d4106fe66f41e5d0136dfbc68fe7200a5cbe85610ddf094f8f22e1b0300", size = 246377, upload-time = "2025-06-13T13:01:14.87Z" },
-    { url = "https://files.pythonhosted.org/packages/3e/cf/1d783bd05b7bca5c10ded5f946068909372e94615a4416afadfe3f63492d/coverage-7.9.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2a876e4c3e5a2a1715a6608906aa5a2e0475b9c0f68343c2ada98110512ab1d8", size = 243394, upload-time = "2025-06-13T13:01:16.23Z" },
-    { url = "https://files.pythonhosted.org/packages/02/dd/e7b20afd35b0a1abea09fb3998e1abc9f9bd953bee548f235aebd2b11401/coverage-7.9.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:81f34346dd63010453922c8e628a52ea2d2ccd73cb2487f7700ac531b247c8a5", size = 245586, upload-time = "2025-06-13T13:01:17.532Z" },
-    { url = "https://files.pythonhosted.org/packages/4e/38/b30b0006fea9d617d1cb8e43b1bc9a96af11eff42b87eb8c716cf4d37469/coverage-7.9.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:888f8eee13f2377ce86d44f338968eedec3291876b0b8a7289247ba52cb984cd", size = 245396, upload-time = "2025-06-13T13:01:19.164Z" },
-    { url = "https://files.pythonhosted.org/packages/31/e4/4d8ec1dc826e16791f3daf1b50943e8e7e1eb70e8efa7abb03936ff48418/coverage-7.9.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:9969ef1e69b8c8e1e70d591f91bbc37fc9a3621e447525d1602801a24ceda898", size = 243577, upload-time = "2025-06-13T13:01:22.433Z" },
-    { url = "https://files.pythonhosted.org/packages/25/f4/b0e96c5c38e6e40ef465c4bc7f138863e2909c00e54a331da335faf0d81a/coverage-7.9.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:60c458224331ee3f1a5b472773e4a085cc27a86a0b48205409d364272d67140d", size = 244809, upload-time = "2025-06-13T13:01:24.143Z" },
-    { url = "https://files.pythonhosted.org/packages/8a/65/27e0a1fa5e2e5079bdca4521be2f5dabf516f94e29a0defed35ac2382eb2/coverage-7.9.1-cp312-cp312-win32.whl", hash = "sha256:5f646a99a8c2b3ff4c6a6e081f78fad0dde275cd59f8f49dc4eab2e394332e74", size = 214724, upload-time = "2025-06-13T13:01:25.435Z" },
-    { url = "https://files.pythonhosted.org/packages/9b/a8/d5b128633fd1a5e0401a4160d02fa15986209a9e47717174f99dc2f7166d/coverage-7.9.1-cp312-cp312-win_amd64.whl", hash = "sha256:30f445f85c353090b83e552dcbbdad3ec84c7967e108c3ae54556ca69955563e", size = 215535, upload-time = "2025-06-13T13:01:27.861Z" },
-    { url = "https://files.pythonhosted.org/packages/a3/37/84bba9d2afabc3611f3e4325ee2c6a47cd449b580d4a606b240ce5a6f9bf/coverage-7.9.1-cp312-cp312-win_arm64.whl", hash = "sha256:af41da5dca398d3474129c58cb2b106a5d93bbb196be0d307ac82311ca234342", size = 213904, upload-time = "2025-06-13T13:01:29.202Z" },
-    { url = "https://files.pythonhosted.org/packages/d0/a7/a027970c991ca90f24e968999f7d509332daf6b8c3533d68633930aaebac/coverage-7.9.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:31324f18d5969feef7344a932c32428a2d1a3e50b15a6404e97cba1cc9b2c631", size = 212358, upload-time = "2025-06-13T13:01:30.909Z" },
-    { url = "https://files.pythonhosted.org/packages/f2/48/6aaed3651ae83b231556750280682528fea8ac7f1232834573472d83e459/coverage-7.9.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0c804506d624e8a20fb3108764c52e0eef664e29d21692afa375e0dd98dc384f", size = 212620, upload-time = "2025-06-13T13:01:32.256Z" },
-    { url = "https://files.pythonhosted.org/packages/6c/2a/f4b613f3b44d8b9f144847c89151992b2b6b79cbc506dee89ad0c35f209d/coverage-7.9.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ef64c27bc40189f36fcc50c3fb8f16ccda73b6a0b80d9bd6e6ce4cffcd810bbd", size = 245788, upload-time = "2025-06-13T13:01:33.948Z" },
-    { url = "https://files.pythonhosted.org/packages/04/d2/de4fdc03af5e4e035ef420ed26a703c6ad3d7a07aff2e959eb84e3b19ca8/coverage-7.9.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d4fe2348cc6ec372e25adec0219ee2334a68d2f5222e0cba9c0d613394e12d86", size = 243001, upload-time = "2025-06-13T13:01:35.285Z" },
-    { url = "https://files.pythonhosted.org/packages/f5/e8/eed18aa5583b0423ab7f04e34659e51101135c41cd1dcb33ac1d7013a6d6/coverage-7.9.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:34ed2186fe52fcc24d4561041979a0dec69adae7bce2ae8d1c49eace13e55c43", size = 244985, upload-time = "2025-06-13T13:01:36.712Z" },
-    { url = "https://files.pythonhosted.org/packages/17/f8/ae9e5cce8885728c934eaa58ebfa8281d488ef2afa81c3dbc8ee9e6d80db/coverage-7.9.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:25308bd3d00d5eedd5ae7d4357161f4df743e3c0240fa773ee1b0f75e6c7c0f1", size = 245152, upload-time = "2025-06-13T13:01:39.303Z" },
-    { url = "https://files.pythonhosted.org/packages/5a/c8/272c01ae792bb3af9b30fac14d71d63371db227980682836ec388e2c57c0/coverage-7.9.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:73e9439310f65d55a5a1e0564b48e34f5369bee943d72c88378f2d576f5a5751", size = 243123, upload-time = "2025-06-13T13:01:40.727Z" },
-    { url = "https://files.pythonhosted.org/packages/8c/d0/2819a1e3086143c094ab446e3bdf07138527a7b88cb235c488e78150ba7a/coverage-7.9.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:37ab6be0859141b53aa89412a82454b482c81cf750de4f29223d52268a86de67", size = 244506, upload-time = "2025-06-13T13:01:42.184Z" },
-    { url = "https://files.pythonhosted.org/packages/8b/4e/9f6117b89152df7b6112f65c7a4ed1f2f5ec8e60c4be8f351d91e7acc848/coverage-7.9.1-cp313-cp313-win32.whl", hash = "sha256:64bdd969456e2d02a8b08aa047a92d269c7ac1f47e0c977675d550c9a0863643", size = 214766, upload-time = "2025-06-13T13:01:44.482Z" },
-    { url = "https://files.pythonhosted.org/packages/27/0f/4b59f7c93b52c2c4ce7387c5a4e135e49891bb3b7408dcc98fe44033bbe0/coverage-7.9.1-cp313-cp313-win_amd64.whl", hash = "sha256:be9e3f68ca9edb897c2184ad0eee815c635565dbe7a0e7e814dc1f7cbab92c0a", size = 215568, upload-time = "2025-06-13T13:01:45.772Z" },
-    { url = "https://files.pythonhosted.org/packages/09/1e/9679826336f8c67b9c39a359352882b24a8a7aee48d4c9cad08d38d7510f/coverage-7.9.1-cp313-cp313-win_arm64.whl", hash = "sha256:1c503289ffef1d5105d91bbb4d62cbe4b14bec4d13ca225f9c73cde9bb46207d", size = 213939, upload-time = "2025-06-13T13:01:47.087Z" },
-    { url = "https://files.pythonhosted.org/packages/bb/5b/5c6b4e7a407359a2e3b27bf9c8a7b658127975def62077d441b93a30dbe8/coverage-7.9.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0b3496922cb5f4215bf5caaef4cf12364a26b0be82e9ed6d050f3352cf2d7ef0", size = 213079, upload-time = "2025-06-13T13:01:48.554Z" },
-    { url = "https://files.pythonhosted.org/packages/a2/22/1e2e07279fd2fd97ae26c01cc2186e2258850e9ec125ae87184225662e89/coverage-7.9.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:9565c3ab1c93310569ec0d86b017f128f027cab0b622b7af288696d7ed43a16d", size = 213299, upload-time = "2025-06-13T13:01:49.997Z" },
-    { url = "https://files.pythonhosted.org/packages/14/c0/4c5125a4b69d66b8c85986d3321520f628756cf524af810baab0790c7647/coverage-7.9.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2241ad5dbf79ae1d9c08fe52b36d03ca122fb9ac6bca0f34439e99f8327ac89f", size = 256535, upload-time = "2025-06-13T13:01:51.314Z" },
-    { url = "https://files.pythonhosted.org/packages/81/8b/e36a04889dda9960be4263e95e777e7b46f1bb4fc32202612c130a20c4da/coverage-7.9.1-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3bb5838701ca68b10ebc0937dbd0eb81974bac54447c55cd58dea5bca8451029", size = 252756, upload-time = "2025-06-13T13:01:54.403Z" },
-    { url = "https://files.pythonhosted.org/packages/98/82/be04eff8083a09a4622ecd0e1f31a2c563dbea3ed848069e7b0445043a70/coverage-7.9.1-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b30a25f814591a8c0c5372c11ac8967f669b97444c47fd794926e175c4047ece", size = 254912, upload-time = "2025-06-13T13:01:56.769Z" },
-    { url = "https://files.pythonhosted.org/packages/0f/25/c26610a2c7f018508a5ab958e5b3202d900422cf7cdca7670b6b8ca4e8df/coverage-7.9.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:2d04b16a6062516df97969f1ae7efd0de9c31eb6ebdceaa0d213b21c0ca1a683", size = 256144, upload-time = "2025-06-13T13:01:58.19Z" },
-    { url = "https://files.pythonhosted.org/packages/c5/8b/fb9425c4684066c79e863f1e6e7ecebb49e3a64d9f7f7860ef1688c56f4a/coverage-7.9.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:7931b9e249edefb07cd6ae10c702788546341d5fe44db5b6108a25da4dca513f", size = 254257, upload-time = "2025-06-13T13:01:59.645Z" },
-    { url = "https://files.pythonhosted.org/packages/93/df/27b882f54157fc1131e0e215b0da3b8d608d9b8ef79a045280118a8f98fe/coverage-7.9.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:52e92b01041151bf607ee858e5a56c62d4b70f4dac85b8c8cb7fb8a351ab2c10", size = 255094, upload-time = "2025-06-13T13:02:01.37Z" },
-    { url = "https://files.pythonhosted.org/packages/41/5f/cad1c3dbed8b3ee9e16fa832afe365b4e3eeab1fb6edb65ebbf745eabc92/coverage-7.9.1-cp313-cp313t-win32.whl", hash = "sha256:684e2110ed84fd1ca5f40e89aa44adf1729dc85444004111aa01866507adf363", size = 215437, upload-time = "2025-06-13T13:02:02.905Z" },
-    { url = "https://files.pythonhosted.org/packages/99/4d/fad293bf081c0e43331ca745ff63673badc20afea2104b431cdd8c278b4c/coverage-7.9.1-cp313-cp313t-win_amd64.whl", hash = "sha256:437c576979e4db840539674e68c84b3cda82bc824dd138d56bead1435f1cb5d7", size = 216605, upload-time = "2025-06-13T13:02:05.638Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/56/4ee027d5965fc7fc126d7ec1187529cc30cc7d740846e1ecb5e92d31b224/coverage-7.9.1-cp313-cp313t-win_arm64.whl", hash = "sha256:18a0912944d70aaf5f399e350445738a1a20b50fbea788f640751c2ed9208b6c", size = 214392, upload-time = "2025-06-13T13:02:07.642Z" },
-    { url = "https://files.pythonhosted.org/packages/08/b8/7ddd1e8ba9701dea08ce22029917140e6f66a859427406579fd8d0ca7274/coverage-7.9.1-py3-none-any.whl", hash = "sha256:66b974b145aa189516b6bf2d8423e888b742517d37872f6ee4c5be0073bd9a3c", size = 204000, upload-time = "2025-06-13T13:02:27.173Z" },
+version = "7.10.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d6/4e/08b493f1f1d8a5182df0044acc970799b58a8d289608e0d891a03e9d269a/coverage-7.10.4.tar.gz", hash = "sha256:25f5130af6c8e7297fd14634955ba9e1697f47143f289e2a23284177c0061d27", size = 823798, upload-time = "2025-08-17T00:26:43.314Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/9e/4a/781c9e4dd57cabda2a28e2ce5b00b6be416015265851060945a5ed4bd85e/coverage-7.10.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:a1f0264abcabd4853d4cb9b3d164adbf1565da7dab1da1669e93f3ea60162d79", size = 216706, upload-time = "2025-08-17T00:24:51.528Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/8c/51255202ca03d2e7b664770289f80db6f47b05138e06cce112b3957d5dfd/coverage-7.10.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:536cbe6b118a4df231b11af3e0f974a72a095182ff8ec5f4868c931e8043ef3e", size = 216939, upload-time = "2025-08-17T00:24:53.171Z" },
+    { url = "https://files.pythonhosted.org/packages/06/7f/df11131483698660f94d3c847dc76461369782d7a7644fcd72ac90da8fd0/coverage-7.10.4-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:9a4c0d84134797b7bf3f080599d0cd501471f6c98b715405166860d79cfaa97e", size = 248429, upload-time = "2025-08-17T00:24:54.934Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/fa/13ac5eda7300e160bf98f082e75f5c5b4189bf3a883dd1ee42dbedfdc617/coverage-7.10.4-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:7c155fc0f9cee8c9803ea0ad153ab6a3b956baa5d4cd993405dc0b45b2a0b9e0", size = 251178, upload-time = "2025-08-17T00:24:56.353Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/bc/f63b56a58ad0bec68a840e7be6b7ed9d6f6288d790760647bb88f5fea41e/coverage-7.10.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0a5f2ab6e451d4b07855d8bcf063adf11e199bff421a4ba57f5bb95b7444ca62", size = 252313, upload-time = "2025-08-17T00:24:57.692Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/b6/79338f1ea27b01266f845afb4485976211264ab92407d1c307babe3592a7/coverage-7.10.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:685b67d99b945b0c221be0780c336b303a7753b3e0ec0d618c795aada25d5e7a", size = 250230, upload-time = "2025-08-17T00:24:59.293Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/93/3b24f1da3e0286a4dc5832427e1d448d5296f8287464b1ff4a222abeeeb5/coverage-7.10.4-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:0c079027e50c2ae44da51c2e294596cbc9dbb58f7ca45b30651c7e411060fc23", size = 248351, upload-time = "2025-08-17T00:25:00.676Z" },
+    { url = "https://files.pythonhosted.org/packages/de/5f/d59412f869e49dcc5b89398ef3146c8bfaec870b179cc344d27932e0554b/coverage-7.10.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3749aa72b93ce516f77cf5034d8e3c0dfd45c6e8a163a602ede2dc5f9a0bb927", size = 249788, upload-time = "2025-08-17T00:25:02.354Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/52/04a3b733f40a0cc7c4a5b9b010844111dbf906df3e868b13e1ce7b39ac31/coverage-7.10.4-cp312-cp312-win32.whl", hash = "sha256:fecb97b3a52fa9bcd5a7375e72fae209088faf671d39fae67261f37772d5559a", size = 219131, upload-time = "2025-08-17T00:25:03.79Z" },
+    { url = "https://files.pythonhosted.org/packages/83/dd/12909fc0b83888197b3ec43a4ac7753589591c08d00d9deda4158df2734e/coverage-7.10.4-cp312-cp312-win_amd64.whl", hash = "sha256:26de58f355626628a21fe6a70e1e1fad95702dafebfb0685280962ae1449f17b", size = 219939, upload-time = "2025-08-17T00:25:05.494Z" },
+    { url = "https://files.pythonhosted.org/packages/83/c7/058bb3220fdd6821bada9685eadac2940429ab3c97025ce53549ff423cc1/coverage-7.10.4-cp312-cp312-win_arm64.whl", hash = "sha256:67e8885408f8325198862bc487038a4980c9277d753cb8812510927f2176437a", size = 218572, upload-time = "2025-08-17T00:25:06.897Z" },
+    { url = "https://files.pythonhosted.org/packages/46/b0/4a3662de81f2ed792a4e425d59c4ae50d8dd1d844de252838c200beed65a/coverage-7.10.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:2b8e1d2015d5dfdbf964ecef12944c0c8c55b885bb5c0467ae8ef55e0e151233", size = 216735, upload-time = "2025-08-17T00:25:08.617Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/e8/e2dcffea01921bfffc6170fb4406cffb763a3b43a047bbd7923566708193/coverage-7.10.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:25735c299439018d66eb2dccf54f625aceb78645687a05f9f848f6e6c751e169", size = 216982, upload-time = "2025-08-17T00:25:10.384Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/59/cc89bb6ac869704d2781c2f5f7957d07097c77da0e8fdd4fd50dbf2ac9c0/coverage-7.10.4-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:715c06cb5eceac4d9b7cdf783ce04aa495f6aff657543fea75c30215b28ddb74", size = 247981, upload-time = "2025-08-17T00:25:11.854Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/23/3da089aa177ceaf0d3f96754ebc1318597822e6387560914cc480086e730/coverage-7.10.4-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:e017ac69fac9aacd7df6dc464c05833e834dc5b00c914d7af9a5249fcccf07ef", size = 250584, upload-time = "2025-08-17T00:25:13.483Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/82/e8693c368535b4e5fad05252a366a1794d481c79ae0333ed943472fd778d/coverage-7.10.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bad180cc40b3fccb0f0e8c702d781492654ac2580d468e3ffc8065e38c6c2408", size = 251856, upload-time = "2025-08-17T00:25:15.27Z" },
+    { url = "https://files.pythonhosted.org/packages/56/19/8b9cb13292e602fa4135b10a26ac4ce169a7fc7c285ff08bedd42ff6acca/coverage-7.10.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:becbdcd14f685fada010a5f792bf0895675ecf7481304fe159f0cd3f289550bd", size = 250015, upload-time = "2025-08-17T00:25:16.759Z" },
+    { url = "https://files.pythonhosted.org/packages/10/e7/e5903990ce089527cf1c4f88b702985bd65c61ac245923f1ff1257dbcc02/coverage-7.10.4-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:0b485ca21e16a76f68060911f97ebbe3e0d891da1dbbce6af7ca1ab3f98b9097", size = 247908, upload-time = "2025-08-17T00:25:18.232Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/c9/7d464f116df1df7fe340669af1ddbe1a371fc60f3082ff3dc837c4f1f2ab/coverage-7.10.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6c1d098ccfe8e1e0a1ed9a0249138899948afd2978cbf48eb1cc3fcd38469690", size = 249525, upload-time = "2025-08-17T00:25:20.141Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/42/722e0cdbf6c19e7235c2020837d4e00f3b07820fd012201a983238cc3a30/coverage-7.10.4-cp313-cp313-win32.whl", hash = "sha256:8630f8af2ca84b5c367c3df907b1706621abe06d6929f5045fd628968d421e6e", size = 219173, upload-time = "2025-08-17T00:25:21.56Z" },
+    { url = "https://files.pythonhosted.org/packages/97/7e/aa70366f8275955cd51fa1ed52a521c7fcebcc0fc279f53c8c1ee6006dfe/coverage-7.10.4-cp313-cp313-win_amd64.whl", hash = "sha256:f68835d31c421736be367d32f179e14ca932978293fe1b4c7a6a49b555dff5b2", size = 219969, upload-time = "2025-08-17T00:25:23.501Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/96/c39d92d5aad8fec28d4606556bfc92b6fee0ab51e4a548d9b49fb15a777c/coverage-7.10.4-cp313-cp313-win_arm64.whl", hash = "sha256:6eaa61ff6724ca7ebc5326d1fae062d85e19b38dd922d50903702e6078370ae7", size = 218601, upload-time = "2025-08-17T00:25:25.295Z" },
+    { url = "https://files.pythonhosted.org/packages/79/13/34d549a6177bd80fa5db758cb6fd3057b7ad9296d8707d4ab7f480b0135f/coverage-7.10.4-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:702978108876bfb3d997604930b05fe769462cc3000150b0e607b7b444f2fd84", size = 217445, upload-time = "2025-08-17T00:25:27.129Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/c0/433da866359bf39bf595f46d134ff2d6b4293aeea7f3328b6898733b0633/coverage-7.10.4-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e8f978e8c5521d9c8f2086ac60d931d583fab0a16f382f6eb89453fe998e2484", size = 217676, upload-time = "2025-08-17T00:25:28.641Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/d7/2b99aa8737f7801fd95222c79a4ebc8c5dd4460d4bed7ef26b17a60c8d74/coverage-7.10.4-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:df0ac2ccfd19351411c45e43ab60932b74472e4648b0a9edf6a3b58846e246a9", size = 259002, upload-time = "2025-08-17T00:25:30.065Z" },
+    { url = "https://files.pythonhosted.org/packages/08/cf/86432b69d57debaef5abf19aae661ba8f4fcd2882fa762e14added4bd334/coverage-7.10.4-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:73a0d1aaaa3796179f336448e1576a3de6fc95ff4f07c2d7251d4caf5d18cf8d", size = 261178, upload-time = "2025-08-17T00:25:31.517Z" },
+    { url = "https://files.pythonhosted.org/packages/23/78/85176593f4aa6e869cbed7a8098da3448a50e3fac5cb2ecba57729a5220d/coverage-7.10.4-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:873da6d0ed6b3ffc0bc01f2c7e3ad7e2023751c0d8d86c26fe7322c314b031dc", size = 263402, upload-time = "2025-08-17T00:25:33.339Z" },
+    { url = "https://files.pythonhosted.org/packages/88/1d/57a27b6789b79abcac0cc5805b31320d7a97fa20f728a6a7c562db9a3733/coverage-7.10.4-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:c6446c75b0e7dda5daa876a1c87b480b2b52affb972fedd6c22edf1aaf2e00ec", size = 260957, upload-time = "2025-08-17T00:25:34.795Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/e5/3e5ddfd42835c6def6cd5b2bdb3348da2e34c08d9c1211e91a49e9fd709d/coverage-7.10.4-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:6e73933e296634e520390c44758d553d3b573b321608118363e52113790633b9", size = 258718, upload-time = "2025-08-17T00:25:36.259Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/0b/d364f0f7ef111615dc4e05a6ed02cac7b6f2ac169884aa57faeae9eb5fa0/coverage-7.10.4-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:52073d4b08d2cb571234c8a71eb32af3c6923149cf644a51d5957ac128cf6aa4", size = 259848, upload-time = "2025-08-17T00:25:37.754Z" },
+    { url = "https://files.pythonhosted.org/packages/10/c6/bbea60a3b309621162e53faf7fac740daaf083048ea22077418e1ecaba3f/coverage-7.10.4-cp313-cp313t-win32.whl", hash = "sha256:e24afb178f21f9ceb1aefbc73eb524769aa9b504a42b26857243f881af56880c", size = 219833, upload-time = "2025-08-17T00:25:39.252Z" },
+    { url = "https://files.pythonhosted.org/packages/44/a5/f9f080d49cfb117ddffe672f21eab41bd23a46179a907820743afac7c021/coverage-7.10.4-cp313-cp313t-win_amd64.whl", hash = "sha256:be04507ff1ad206f4be3d156a674e3fb84bbb751ea1b23b142979ac9eebaa15f", size = 220897, upload-time = "2025-08-17T00:25:40.772Z" },
+    { url = "https://files.pythonhosted.org/packages/46/89/49a3fc784fa73d707f603e586d84a18c2e7796707044e9d73d13260930b7/coverage-7.10.4-cp313-cp313t-win_arm64.whl", hash = "sha256:f3e3ff3f69d02b5dad67a6eac68cc9c71ae343b6328aae96e914f9f2f23a22e2", size = 219160, upload-time = "2025-08-17T00:25:42.229Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/22/525f84b4cbcff66024d29f6909d7ecde97223f998116d3677cfba0d115b5/coverage-7.10.4-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:a59fe0af7dd7211ba595cf7e2867458381f7e5d7b4cffe46274e0b2f5b9f4eb4", size = 216717, upload-time = "2025-08-17T00:25:43.875Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/58/213577f77efe44333a416d4bcb251471e7f64b19b5886bb515561b5ce389/coverage-7.10.4-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:3a6c35c5b70f569ee38dc3350cd14fdd0347a8b389a18bb37538cc43e6f730e6", size = 216994, upload-time = "2025-08-17T00:25:45.405Z" },
+    { url = "https://files.pythonhosted.org/packages/17/85/34ac02d0985a09472f41b609a1d7babc32df87c726c7612dc93d30679b5a/coverage-7.10.4-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:acb7baf49f513554c4af6ef8e2bd6e8ac74e6ea0c7386df8b3eb586d82ccccc4", size = 248038, upload-time = "2025-08-17T00:25:46.981Z" },
+    { url = "https://files.pythonhosted.org/packages/47/4f/2140305ec93642fdaf988f139813629cbb6d8efa661b30a04b6f7c67c31e/coverage-7.10.4-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:a89afecec1ed12ac13ed203238b560cbfad3522bae37d91c102e690b8b1dc46c", size = 250575, upload-time = "2025-08-17T00:25:48.613Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/b5/41b5784180b82a083c76aeba8f2c72ea1cb789e5382157b7dc852832aea2/coverage-7.10.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:480442727f464407d8ade6e677b7f21f3b96a9838ab541b9a28ce9e44123c14e", size = 251927, upload-time = "2025-08-17T00:25:50.881Z" },
+    { url = "https://files.pythonhosted.org/packages/78/ca/c1dd063e50b71f5aea2ebb27a1c404e7b5ecf5714c8b5301f20e4e8831ac/coverage-7.10.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:a89bf193707f4a17f1ed461504031074d87f035153239f16ce86dfb8f8c7ac76", size = 249930, upload-time = "2025-08-17T00:25:52.422Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/66/d8907408612ffee100d731798e6090aedb3ba766ecf929df296c1a7ee4fb/coverage-7.10.4-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:3ddd912c2fc440f0fb3229e764feec85669d5d80a988ff1b336a27d73f63c818", size = 247862, upload-time = "2025-08-17T00:25:54.316Z" },
+    { url = "https://files.pythonhosted.org/packages/29/db/53cd8ec8b1c9c52d8e22a25434785bfc2d1e70c0cfb4d278a1326c87f741/coverage-7.10.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:8a538944ee3a42265e61c7298aeba9ea43f31c01271cf028f437a7b4075592cf", size = 249360, upload-time = "2025-08-17T00:25:55.833Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/75/5ec0a28ae4a0804124ea5a5becd2b0fa3adf30967ac656711fb5cdf67c60/coverage-7.10.4-cp314-cp314-win32.whl", hash = "sha256:fd2e6002be1c62476eb862b8514b1ba7e7684c50165f2a8d389e77da6c9a2ebd", size = 219449, upload-time = "2025-08-17T00:25:57.984Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/ab/66e2ee085ec60672bf5250f11101ad8143b81f24989e8c0e575d16bb1e53/coverage-7.10.4-cp314-cp314-win_amd64.whl", hash = "sha256:ec113277f2b5cf188d95fb66a65c7431f2b9192ee7e6ec9b72b30bbfb53c244a", size = 220246, upload-time = "2025-08-17T00:25:59.868Z" },
+    { url = "https://files.pythonhosted.org/packages/37/3b/00b448d385f149143190846217797d730b973c3c0ec2045a7e0f5db3a7d0/coverage-7.10.4-cp314-cp314-win_arm64.whl", hash = "sha256:9744954bfd387796c6a091b50d55ca7cac3d08767795b5eec69ad0f7dbf12d38", size = 218825, upload-time = "2025-08-17T00:26:01.44Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/2e/55e20d3d1ce00b513efb6fd35f13899e1c6d4f76c6cbcc9851c7227cd469/coverage-7.10.4-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:5af4829904dda6aabb54a23879f0f4412094ba9ef153aaa464e3c1b1c9bc98e6", size = 217462, upload-time = "2025-08-17T00:26:03.014Z" },
+    { url = "https://files.pythonhosted.org/packages/47/b3/aab1260df5876f5921e2c57519e73a6f6eeacc0ae451e109d44ee747563e/coverage-7.10.4-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7bba5ed85e034831fac761ae506c0644d24fd5594727e174b5a73aff343a7508", size = 217675, upload-time = "2025-08-17T00:26:04.606Z" },
+    { url = "https://files.pythonhosted.org/packages/67/23/1cfe2aa50c7026180989f0bfc242168ac7c8399ccc66eb816b171e0ab05e/coverage-7.10.4-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:d57d555b0719834b55ad35045de6cc80fc2b28e05adb6b03c98479f9553b387f", size = 259176, upload-time = "2025-08-17T00:26:06.159Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/72/5882b6aeed3f9de7fc4049874fd7d24213bf1d06882f5c754c8a682606ec/coverage-7.10.4-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:ba62c51a72048bb1ea72db265e6bd8beaabf9809cd2125bbb5306c6ce105f214", size = 261341, upload-time = "2025-08-17T00:26:08.137Z" },
+    { url = "https://files.pythonhosted.org/packages/1b/70/a0c76e3087596ae155f8e71a49c2c534c58b92aeacaf4d9d0cbbf2dde53b/coverage-7.10.4-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0acf0c62a6095f07e9db4ec365cc58c0ef5babb757e54745a1aa2ea2a2564af1", size = 263600, upload-time = "2025-08-17T00:26:11.045Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/5f/27e4cd4505b9a3c05257fb7fc509acbc778c830c450cb4ace00bf2b7bda7/coverage-7.10.4-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e1033bf0f763f5cf49ffe6594314b11027dcc1073ac590b415ea93463466deec", size = 261036, upload-time = "2025-08-17T00:26:12.693Z" },
+    { url = "https://files.pythonhosted.org/packages/02/d6/cf2ae3a7f90ab226ea765a104c4e76c5126f73c93a92eaea41e1dc6a1892/coverage-7.10.4-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:92c29eff894832b6a40da1789b1f252305af921750b03ee4535919db9179453d", size = 258794, upload-time = "2025-08-17T00:26:14.261Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/b1/39f222eab0d78aa2001cdb7852aa1140bba632db23a5cfd832218b496d6c/coverage-7.10.4-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:822c4c830989c2093527e92acd97be4638a44eb042b1bdc0e7a278d84a070bd3", size = 259946, upload-time = "2025-08-17T00:26:15.899Z" },
+    { url = "https://files.pythonhosted.org/packages/74/b2/49d82acefe2fe7c777436a3097f928c7242a842538b190f66aac01f29321/coverage-7.10.4-cp314-cp314t-win32.whl", hash = "sha256:e694d855dac2e7cf194ba33653e4ba7aad7267a802a7b3fc4347d0517d5d65cd", size = 220226, upload-time = "2025-08-17T00:26:17.566Z" },
+    { url = "https://files.pythonhosted.org/packages/06/b0/afb942b6b2fc30bdbc7b05b087beae11c2b0daaa08e160586cf012b6ad70/coverage-7.10.4-cp314-cp314t-win_amd64.whl", hash = "sha256:efcc54b38ef7d5bfa98050f220b415bc5bb3d432bd6350a861cf6da0ede2cdcd", size = 221346, upload-time = "2025-08-17T00:26:19.311Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/66/e0531c9d1525cb6eac5b5733c76f27f3053ee92665f83f8899516fea6e76/coverage-7.10.4-cp314-cp314t-win_arm64.whl", hash = "sha256:6f3a3496c0fa26bfac4ebc458747b778cff201c8ae94fa05e1391bab0dbc473c", size = 219368, upload-time = "2025-08-17T00:26:21.011Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/78/983efd23200921d9edb6bd40512e1aa04af553d7d5a171e50f9b2b45d109/coverage-7.10.4-py3-none-any.whl", hash = "sha256:065d75447228d05121e5c938ca8f0e91eed60a1eb2d1258d42d5084fecfc3302", size = 208365, upload-time = "2025-08-17T00:26:41.479Z" },
 ]
 
 [[package]]
@@ -688,53 +963,124 @@ wheels = [
 
 [[package]]
 name = "cryptography"
-version = "42.0.8"
+version = "46.0.3"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "cffi", marker = "platform_python_implementation != 'PyPy'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/93/a7/1498799a2ea06148463a9a2c10ab2f6a921a74fb19e231b27dc412a748e2/cryptography-42.0.8.tar.gz", hash = "sha256:8d09d05439ce7baa8e9e95b07ec5b6c886f548deb7e0f69ef25f64b3bce842f2", size = 671250, upload-time = "2024-06-04T19:55:08.609Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/f9/8b/1b929ba8139430e09e140e6939c2b29c18df1f2fc2149e41bdbdcdaf5d1f/cryptography-42.0.8-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:81d8a521705787afe7a18d5bfb47ea9d9cc068206270aad0b96a725022e18d2e", size = 5899961, upload-time = "2024-06-04T19:53:57.933Z" },
-    { url = "https://files.pythonhosted.org/packages/fa/5d/31d833daa800e4fab33209843095df7adb4a78ea536929145534cbc15026/cryptography-42.0.8-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:961e61cefdcb06e0c6d7e3a1b22ebe8b996eb2bf50614e89384be54c48c6b63d", size = 3114353, upload-time = "2024-06-04T19:54:12.171Z" },
-    { url = "https://files.pythonhosted.org/packages/5d/32/f6326c70a9f0f258a201d3b2632bca586ea24d214cec3cf36e374040e273/cryptography-42.0.8-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e3ec3672626e1b9e55afd0df6d774ff0e953452886e06e0f1eb7eb0c832e8902", size = 3647773, upload-time = "2024-06-04T19:54:07.051Z" },
-    { url = "https://files.pythonhosted.org/packages/35/66/2d87e9ca95c82c7ee5f2c09716fc4c4242c1ae6647b9bd27e55e920e9f10/cryptography-42.0.8-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e599b53fd95357d92304510fb7bda8523ed1f79ca98dce2f43c115950aa78801", size = 3839763, upload-time = "2024-06-04T19:54:30.383Z" },
-    { url = "https://files.pythonhosted.org/packages/c2/de/8083fa2e68d403553a01a9323f4f8b9d7ffed09928ba25635c29fb28c1e7/cryptography-42.0.8-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:5226d5d21ab681f432a9c1cf8b658c0cb02533eece706b155e5fbd8a0cdd3949", size = 3632661, upload-time = "2024-06-04T19:54:32.955Z" },
-    { url = "https://files.pythonhosted.org/packages/07/40/d6f6819c62e808ea74639c3c640f7edd636b86cce62cb14943996a15df92/cryptography-42.0.8-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:6b7c4f03ce01afd3b76cf69a5455caa9cfa3de8c8f493e0d3ab7d20611c8dae9", size = 3851536, upload-time = "2024-06-04T19:53:53.131Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/46/de71d48abf2b6d3c808f4fbb0f4dc44a4e72786be23df0541aa2a3f6fd7e/cryptography-42.0.8-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:2346b911eb349ab547076f47f2e035fc8ff2c02380a7cbbf8d87114fa0f1c583", size = 3754209, upload-time = "2024-06-04T19:54:55.259Z" },
-    { url = "https://files.pythonhosted.org/packages/25/c9/86f04e150c5d5d5e4a731a2c1e0e43da84d901f388e3fea3d5de98d689a7/cryptography-42.0.8-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:ad803773e9df0b92e0a817d22fd8a3675493f690b96130a5e24f1b8fabbea9c7", size = 3923551, upload-time = "2024-06-04T19:54:16.46Z" },
-    { url = "https://files.pythonhosted.org/packages/53/c2/903014dafb7271fb148887d4355b2e90319cad6e810663be622b0c933fc9/cryptography-42.0.8-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:2f66d9cd9147ee495a8374a45ca445819f8929a3efcd2e3df6428e46c3cbb10b", size = 3739265, upload-time = "2024-06-04T19:54:23.194Z" },
-    { url = "https://files.pythonhosted.org/packages/95/26/82d704d988a193cbdc69ac3b41c687c36eaed1642cce52530ad810c35645/cryptography-42.0.8-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:d45b940883a03e19e944456a558b67a41160e367a719833c53de6911cabba2b7", size = 3937371, upload-time = "2024-06-04T19:55:04.303Z" },
-    { url = "https://files.pythonhosted.org/packages/cf/71/4e0d05c9acd638a225f57fb6162aa3d03613c11b76893c23ea4675bb28c5/cryptography-42.0.8-cp37-abi3-win32.whl", hash = "sha256:a0c5b2b0585b6af82d7e385f55a8bc568abff8923af147ee3c07bd8b42cda8b2", size = 2438849, upload-time = "2024-06-04T19:54:27.39Z" },
-    { url = "https://files.pythonhosted.org/packages/06/0f/78da3cad74f2ba6c45321dc90394d70420ea846730dc042ef527f5a224b5/cryptography-42.0.8-cp37-abi3-win_amd64.whl", hash = "sha256:57080dee41209e556a9a4ce60d229244f7a66ef52750f813bfbe18959770cfba", size = 2889090, upload-time = "2024-06-04T19:54:14.245Z" },
-    { url = "https://files.pythonhosted.org/packages/60/12/f064af29190cdb1d38fe07f3db6126091639e1dece7ec77c4ff037d49193/cryptography-42.0.8-cp39-abi3-macosx_10_12_universal2.whl", hash = "sha256:dea567d1b0e8bc5764b9443858b673b734100c2871dc93163f58c46a97a83d28", size = 5901232, upload-time = "2024-06-04T19:54:52.722Z" },
-    { url = "https://files.pythonhosted.org/packages/43/c2/4a3eef67e009a522711ebd8ac89424c3a7fe591ece7035d964419ad52a1d/cryptography-42.0.8-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c4783183f7cb757b73b2ae9aed6599b96338eb957233c58ca8f49a49cc32fd5e", size = 3648711, upload-time = "2024-06-04T19:54:44.323Z" },
-    { url = "https://files.pythonhosted.org/packages/49/1c/9f6d13cc8041c05eebff1154e4e71bedd1db8e174fff999054435994187a/cryptography-42.0.8-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a0608251135d0e03111152e41f0cc2392d1e74e35703960d4190b2e0f4ca9c70", size = 3841968, upload-time = "2024-06-04T19:54:57.911Z" },
-    { url = "https://files.pythonhosted.org/packages/5f/f9/c3d4f19b82bdb25a3d857fe96e7e571c981810e47e3f299cc13ac429066a/cryptography-42.0.8-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:dc0fdf6787f37b1c6b08e6dfc892d9d068b5bdb671198c72072828b80bd5fe4c", size = 3633032, upload-time = "2024-06-04T19:54:48.518Z" },
-    { url = "https://files.pythonhosted.org/packages/fa/e2/b7e6e8c261536c489d9cf908769880d94bd5d9a187e166b0dc838d2e6a56/cryptography-42.0.8-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:9c0c1716c8447ee7dbf08d6db2e5c41c688544c61074b54fc4564196f55c25a7", size = 3852478, upload-time = "2024-06-04T19:54:50.599Z" },
-    { url = "https://files.pythonhosted.org/packages/a2/68/e16751f6b859bc120f53fddbf3ebada5c34f0e9689d8af32884d8b2e4b4c/cryptography-42.0.8-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:fff12c88a672ab9c9c1cf7b0c80e3ad9e2ebd9d828d955c126be4fd3e5578c9e", size = 3754102, upload-time = "2024-06-04T19:54:46.231Z" },
-    { url = "https://files.pythonhosted.org/packages/0f/38/85c74d0ac4c540780e072b1e6f148ecb718418c1062edcb20d22f3ec5bbb/cryptography-42.0.8-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:cafb92b2bc622cd1aa6a1dce4b93307792633f4c5fe1f46c6b97cf67073ec961", size = 3925042, upload-time = "2024-06-04T19:54:34.767Z" },
-    { url = "https://files.pythonhosted.org/packages/89/f4/a8b982e88eb5350407ebdbf4717b55043271d878705329e107f4783555f2/cryptography-42.0.8-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:31f721658a29331f895a5a54e7e82075554ccfb8b163a18719d342f5ffe5ecb1", size = 3738833, upload-time = "2024-06-04T19:54:05.231Z" },
-    { url = "https://files.pythonhosted.org/packages/fd/2b/be327b580645927bb1a1f32d5a175b897a9b956bc085b095e15c40bac9ed/cryptography-42.0.8-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:b297f90c5723d04bcc8265fc2a0f86d4ea2e0f7ab4b6994459548d3a6b992a14", size = 3938751, upload-time = "2024-06-04T19:54:37.837Z" },
-    { url = "https://files.pythonhosted.org/packages/3c/d5/c6a78ffccdbe4516711ebaa9ed2c7eb6ac5dfa3dc920f2c7e920af2418b0/cryptography-42.0.8-cp39-abi3-win32.whl", hash = "sha256:2f88d197e66c65be5e42cd72e5c18afbfae3f741742070e3019ac8f4ac57262c", size = 2439281, upload-time = "2024-06-04T19:53:55.903Z" },
-    { url = "https://files.pythonhosted.org/packages/a2/7b/b0d330852dd5953daee6b15f742f15d9f18e9c0154eb4cfcc8718f0436da/cryptography-42.0.8-cp39-abi3-win_amd64.whl", hash = "sha256:fa76fbb7596cc5839320000cdd5d0955313696d9511debab7ee7278fc8b5c84a", size = 2886038, upload-time = "2024-06-04T19:54:18.707Z" },
+sdist = { url = "https://files.pythonhosted.org/packages/9f/33/c00162f49c0e2fe8064a62cb92b93e50c74a72bc370ab92f86112b33ff62/cryptography-46.0.3.tar.gz", hash = "sha256:a8b17438104fed022ce745b362294d9ce35b4c2e45c1d958ad4a4b019285f4a1", size = 749258, upload-time = "2025-10-15T23:18:31.74Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/1d/42/9c391dd801d6cf0d561b5890549d4b27bafcc53b39c31a817e69d87c625b/cryptography-46.0.3-cp311-abi3-macosx_10_9_universal2.whl", hash = "sha256:109d4ddfadf17e8e7779c39f9b18111a09efb969a301a31e987416a0191ed93a", size = 7225004, upload-time = "2025-10-15T23:16:52.239Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/67/38769ca6b65f07461eb200e85fc1639b438bdc667be02cf7f2cd6a64601c/cryptography-46.0.3-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:09859af8466b69bc3c27bdf4f5d84a665e0f7ab5088412e9e2ec49758eca5cbc", size = 4296667, upload-time = "2025-10-15T23:16:54.369Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/49/498c86566a1d80e978b42f0d702795f69887005548c041636df6ae1ca64c/cryptography-46.0.3-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:01ca9ff2885f3acc98c29f1860552e37f6d7c7d013d7334ff2a9de43a449315d", size = 4450807, upload-time = "2025-10-15T23:16:56.414Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/0a/863a3604112174c8624a2ac3c038662d9e59970c7f926acdcfaed8d61142/cryptography-46.0.3-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:6eae65d4c3d33da080cff9c4ab1f711b15c1d9760809dad6ea763f3812d254cb", size = 4299615, upload-time = "2025-10-15T23:16:58.442Z" },
+    { url = "https://files.pythonhosted.org/packages/64/02/b73a533f6b64a69f3cd3872acb6ebc12aef924d8d103133bb3ea750dc703/cryptography-46.0.3-cp311-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:e5bf0ed4490068a2e72ac03d786693adeb909981cc596425d09032d372bcc849", size = 4016800, upload-time = "2025-10-15T23:17:00.378Z" },
+    { url = "https://files.pythonhosted.org/packages/25/d5/16e41afbfa450cde85a3b7ec599bebefaef16b5c6ba4ec49a3532336ed72/cryptography-46.0.3-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:5ecfccd2329e37e9b7112a888e76d9feca2347f12f37918facbb893d7bb88ee8", size = 4984707, upload-time = "2025-10-15T23:17:01.98Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/56/e7e69b427c3878352c2fb9b450bd0e19ed552753491d39d7d0a2f5226d41/cryptography-46.0.3-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:a2c0cd47381a3229c403062f764160d57d4d175e022c1df84e168c6251a22eec", size = 4482541, upload-time = "2025-10-15T23:17:04.078Z" },
+    { url = "https://files.pythonhosted.org/packages/78/f6/50736d40d97e8483172f1bb6e698895b92a223dba513b0ca6f06b2365339/cryptography-46.0.3-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:549e234ff32571b1f4076ac269fcce7a808d3bf98b76c8dd560e42dbc66d7d91", size = 4299464, upload-time = "2025-10-15T23:17:05.483Z" },
+    { url = "https://files.pythonhosted.org/packages/00/de/d8e26b1a855f19d9994a19c702fa2e93b0456beccbcfe437eda00e0701f2/cryptography-46.0.3-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:c0a7bb1a68a5d3471880e264621346c48665b3bf1c3759d682fc0864c540bd9e", size = 4950838, upload-time = "2025-10-15T23:17:07.425Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/29/798fc4ec461a1c9e9f735f2fc58741b0daae30688f41b2497dcbc9ed1355/cryptography-46.0.3-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:10b01676fc208c3e6feeb25a8b83d81767e8059e1fe86e1dc62d10a3018fa926", size = 4481596, upload-time = "2025-10-15T23:17:09.343Z" },
+    { url = "https://files.pythonhosted.org/packages/15/8d/03cd48b20a573adfff7652b76271078e3045b9f49387920e7f1f631d125e/cryptography-46.0.3-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:0abf1ffd6e57c67e92af68330d05760b7b7efb243aab8377e583284dbab72c71", size = 4426782, upload-time = "2025-10-15T23:17:11.22Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/b1/ebacbfe53317d55cf33165bda24c86523497a6881f339f9aae5c2e13e57b/cryptography-46.0.3-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a04bee9ab6a4da801eb9b51f1b708a1b5b5c9eb48c03f74198464c66f0d344ac", size = 4698381, upload-time = "2025-10-15T23:17:12.829Z" },
+    { url = "https://files.pythonhosted.org/packages/96/92/8a6a9525893325fc057a01f654d7efc2c64b9de90413adcf605a85744ff4/cryptography-46.0.3-cp311-abi3-win32.whl", hash = "sha256:f260d0d41e9b4da1ed1e0f1ce571f97fe370b152ab18778e9e8f67d6af432018", size = 3055988, upload-time = "2025-10-15T23:17:14.65Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/bf/80fbf45253ea585a1e492a6a17efcb93467701fa79e71550a430c5e60df0/cryptography-46.0.3-cp311-abi3-win_amd64.whl", hash = "sha256:a9a3008438615669153eb86b26b61e09993921ebdd75385ddd748702c5adfddb", size = 3514451, upload-time = "2025-10-15T23:17:16.142Z" },
+    { url = "https://files.pythonhosted.org/packages/2e/af/9b302da4c87b0beb9db4e756386a7c6c5b8003cd0e742277888d352ae91d/cryptography-46.0.3-cp311-abi3-win_arm64.whl", hash = "sha256:5d7f93296ee28f68447397bf5198428c9aeeab45705a55d53a6343455dcb2c3c", size = 2928007, upload-time = "2025-10-15T23:17:18.04Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/e2/a510aa736755bffa9d2f75029c229111a1d02f8ecd5de03078f4c18d91a3/cryptography-46.0.3-cp314-cp314t-macosx_10_9_universal2.whl", hash = "sha256:00a5e7e87938e5ff9ff5447ab086a5706a957137e6e433841e9d24f38a065217", size = 7158012, upload-time = "2025-10-15T23:17:19.982Z" },
+    { url = "https://files.pythonhosted.org/packages/73/dc/9aa866fbdbb95b02e7f9d086f1fccfeebf8953509b87e3f28fff927ff8a0/cryptography-46.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c8daeb2d2174beb4575b77482320303f3d39b8e81153da4f0fb08eb5fe86a6c5", size = 4288728, upload-time = "2025-10-15T23:17:21.527Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/fd/bc1daf8230eaa075184cbbf5f8cd00ba9db4fd32d63fb83da4671b72ed8a/cryptography-46.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:39b6755623145ad5eff1dab323f4eae2a32a77a7abef2c5089a04a3d04366715", size = 4435078, upload-time = "2025-10-15T23:17:23.042Z" },
+    { url = "https://files.pythonhosted.org/packages/82/98/d3bd5407ce4c60017f8ff9e63ffee4200ab3e23fe05b765cab805a7db008/cryptography-46.0.3-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:db391fa7c66df6762ee3f00c95a89e6d428f4d60e7abc8328f4fe155b5ac6e54", size = 4293460, upload-time = "2025-10-15T23:17:24.885Z" },
+    { url = "https://files.pythonhosted.org/packages/26/e9/e23e7900983c2b8af7a08098db406cf989d7f09caea7897e347598d4cd5b/cryptography-46.0.3-cp314-cp314t-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:78a97cf6a8839a48c49271cdcbd5cf37ca2c1d6b7fdd86cc864f302b5e9bf459", size = 3995237, upload-time = "2025-10-15T23:17:26.449Z" },
+    { url = "https://files.pythonhosted.org/packages/91/15/af68c509d4a138cfe299d0d7ddb14afba15233223ebd933b4bbdbc7155d3/cryptography-46.0.3-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:dfb781ff7eaa91a6f7fd41776ec37c5853c795d3b358d4896fdbb5df168af422", size = 4967344, upload-time = "2025-10-15T23:17:28.06Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/e3/8643d077c53868b681af077edf6b3cb58288b5423610f21c62aadcbe99f4/cryptography-46.0.3-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:6f61efb26e76c45c4a227835ddeae96d83624fb0d29eb5df5b96e14ed1a0afb7", size = 4466564, upload-time = "2025-10-15T23:17:29.665Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/43/c1e8726fa59c236ff477ff2b5dc071e54b21e5a1e51aa2cee1676f1c986f/cryptography-46.0.3-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:23b1a8f26e43f47ceb6d6a43115f33a5a37d57df4ea0ca295b780ae8546e8044", size = 4292415, upload-time = "2025-10-15T23:17:31.686Z" },
+    { url = "https://files.pythonhosted.org/packages/42/f9/2f8fefdb1aee8a8e3256a0568cffc4e6d517b256a2fe97a029b3f1b9fe7e/cryptography-46.0.3-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:b419ae593c86b87014b9be7396b385491ad7f320bde96826d0dd174459e54665", size = 4931457, upload-time = "2025-10-15T23:17:33.478Z" },
+    { url = "https://files.pythonhosted.org/packages/79/30/9b54127a9a778ccd6d27c3da7563e9f2d341826075ceab89ae3b41bf5be2/cryptography-46.0.3-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:50fc3343ac490c6b08c0cf0d704e881d0d660be923fd3076db3e932007e726e3", size = 4466074, upload-time = "2025-10-15T23:17:35.158Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/68/b4f4a10928e26c941b1b6a179143af9f4d27d88fe84a6a3c53592d2e76bf/cryptography-46.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:22d7e97932f511d6b0b04f2bfd818d73dcd5928db509460aaf48384778eb6d20", size = 4420569, upload-time = "2025-10-15T23:17:37.188Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/49/3746dab4c0d1979888f125226357d3262a6dd40e114ac29e3d2abdf1ec55/cryptography-46.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:d55f3dffadd674514ad19451161118fd010988540cee43d8bc20675e775925de", size = 4681941, upload-time = "2025-10-15T23:17:39.236Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/30/27654c1dbaf7e4a3531fa1fc77986d04aefa4d6d78259a62c9dc13d7ad36/cryptography-46.0.3-cp314-cp314t-win32.whl", hash = "sha256:8a6e050cb6164d3f830453754094c086ff2d0b2f3a897a1d9820f6139a1f0914", size = 3022339, upload-time = "2025-10-15T23:17:40.888Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/30/640f34ccd4d2a1bc88367b54b926b781b5a018d65f404d409aba76a84b1c/cryptography-46.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:760f83faa07f8b64e9c33fc963d790a2edb24efb479e3520c14a45741cd9b2db", size = 3494315, upload-time = "2025-10-15T23:17:42.769Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/8b/88cc7e3bd0a8e7b861f26981f7b820e1f46aa9d26cc482d0feba0ecb4919/cryptography-46.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:516ea134e703e9fe26bcd1277a4b59ad30586ea90c365a87781d7887a646fe21", size = 2919331, upload-time = "2025-10-15T23:17:44.468Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/23/45fe7f376a7df8daf6da3556603b36f53475a99ce4faacb6ba2cf3d82021/cryptography-46.0.3-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:cb3d760a6117f621261d662bccc8ef5bc32ca673e037c83fbe565324f5c46936", size = 7218248, upload-time = "2025-10-15T23:17:46.294Z" },
+    { url = "https://files.pythonhosted.org/packages/27/32/b68d27471372737054cbd34c84981f9edbc24fe67ca225d389799614e27f/cryptography-46.0.3-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4b7387121ac7d15e550f5cb4a43aef2559ed759c35df7336c402bb8275ac9683", size = 4294089, upload-time = "2025-10-15T23:17:48.269Z" },
+    { url = "https://files.pythonhosted.org/packages/26/42/fa8389d4478368743e24e61eea78846a0006caffaf72ea24a15159215a14/cryptography-46.0.3-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:15ab9b093e8f09daab0f2159bb7e47532596075139dd74365da52ecc9cb46c5d", size = 4440029, upload-time = "2025-10-15T23:17:49.837Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/eb/f483db0ec5ac040824f269e93dd2bd8a21ecd1027e77ad7bdf6914f2fd80/cryptography-46.0.3-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:46acf53b40ea38f9c6c229599a4a13f0d46a6c3fa9ef19fc1a124d62e338dfa0", size = 4297222, upload-time = "2025-10-15T23:17:51.357Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/cf/da9502c4e1912cb1da3807ea3618a6829bee8207456fbbeebc361ec38ba3/cryptography-46.0.3-cp38-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:10ca84c4668d066a9878890047f03546f3ae0a6b8b39b697457b7757aaf18dbc", size = 4012280, upload-time = "2025-10-15T23:17:52.964Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/8f/9adb86b93330e0df8b3dcf03eae67c33ba89958fc2e03862ef1ac2b42465/cryptography-46.0.3-cp38-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:36e627112085bb3b81b19fed209c05ce2a52ee8b15d161b7c643a7d5a88491f3", size = 4978958, upload-time = "2025-10-15T23:17:54.965Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/a0/5fa77988289c34bdb9f913f5606ecc9ada1adb5ae870bd0d1054a7021cc4/cryptography-46.0.3-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:1000713389b75c449a6e979ffc7dcc8ac90b437048766cef052d4d30b8220971", size = 4473714, upload-time = "2025-10-15T23:17:56.754Z" },
+    { url = "https://files.pythonhosted.org/packages/14/e5/fc82d72a58d41c393697aa18c9abe5ae1214ff6f2a5c18ac470f92777895/cryptography-46.0.3-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:b02cf04496f6576afffef5ddd04a0cb7d49cf6be16a9059d793a30b035f6b6ac", size = 4296970, upload-time = "2025-10-15T23:17:58.588Z" },
+    { url = "https://files.pythonhosted.org/packages/78/06/5663ed35438d0b09056973994f1aec467492b33bd31da36e468b01ec1097/cryptography-46.0.3-cp38-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:71e842ec9bc7abf543b47cf86b9a743baa95f4677d22baa4c7d5c69e49e9bc04", size = 4940236, upload-time = "2025-10-15T23:18:00.897Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/59/873633f3f2dcd8a053b8dd1d38f783043b5fce589c0f6988bf55ef57e43e/cryptography-46.0.3-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:402b58fc32614f00980b66d6e56a5b4118e6cb362ae8f3fda141ba4689bd4506", size = 4472642, upload-time = "2025-10-15T23:18:02.749Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/39/8e71f3930e40f6877737d6f69248cf74d4e34b886a3967d32f919cc50d3b/cryptography-46.0.3-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:ef639cb3372f69ec44915fafcd6698b6cc78fbe0c2ea41be867f6ed612811963", size = 4423126, upload-time = "2025-10-15T23:18:04.85Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/c7/f65027c2810e14c3e7268353b1681932b87e5a48e65505d8cc17c99e36ae/cryptography-46.0.3-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:3b51b8ca4f1c6453d8829e1eb7299499ca7f313900dd4d89a24b8b87c0a780d4", size = 4686573, upload-time = "2025-10-15T23:18:06.908Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/6e/1c8331ddf91ca4730ab3086a0f1be19c65510a33b5a441cb334e7a2d2560/cryptography-46.0.3-cp38-abi3-win32.whl", hash = "sha256:6276eb85ef938dc035d59b87c8a7dc559a232f954962520137529d77b18ff1df", size = 3036695, upload-time = "2025-10-15T23:18:08.672Z" },
+    { url = "https://files.pythonhosted.org/packages/90/45/b0d691df20633eff80955a0fc7695ff9051ffce8b69741444bd9ed7bd0db/cryptography-46.0.3-cp38-abi3-win_amd64.whl", hash = "sha256:416260257577718c05135c55958b674000baef9a1c7d9e8f306ec60d71db850f", size = 3501720, upload-time = "2025-10-15T23:18:10.632Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/cb/2da4cc83f5edb9c3257d09e1e7ab7b23f049c7962cae8d842bbef0a9cec9/cryptography-46.0.3-cp38-abi3-win_arm64.whl", hash = "sha256:d89c3468de4cdc4f08a57e214384d0471911a3830fcdaf7a8cc587e42a866372", size = 2918740, upload-time = "2025-10-15T23:18:12.277Z" },
+]
+
+[[package]]
+name = "cuda-bindings"
+version = "13.0.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cuda-pathfinder" },
+    { name = "pywin32", marker = "sys_platform == 'win32'" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/85/b5/e90add0eb01d1ceaaae38c944c8a968090eb25dfbe3c81f5300e39c71739/cuda_bindings-13.0.1-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a06268a4226c867a7234f12ca183e186e7962a4971b53983c8de182dd62878a3", size = 11929946, upload-time = "2025-08-18T15:29:36.485Z" },
+    { url = "https://files.pythonhosted.org/packages/71/2e/51f77c396bb54128a63da74e299edf2c6c4c08ebfb15d48e43665b5fe3b3/cuda_bindings-13.0.1-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:12dd61b782b1558ac3e3790a02e3d9dc4827c6702a3315a9b79b5e1f6bed30f2", size = 12302099, upload-time = "2025-08-18T15:29:38.756Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/84/e1ccf4e52d60da76ae538f86c6e73425ae1dc226b4a528893ea2012e0646/cuda_bindings-13.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:06b4533e43c65bf2422db25eb86cd0813a818a2e3cb4b793f4afbdb2f801d894", size = 12046683, upload-time = "2025-08-18T15:29:41.267Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/8a/4112bc04f110a89d751b3d580189debe64240c94f6609b6d9e1bd16db16b/cuda_bindings-13.0.1-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a38090fa452f71c30b93b9007c26f1874c28abc5662b37c5caefbd41506a64f7", size = 11937804, upload-time = "2025-08-18T15:29:43.262Z" },
+    { url = "https://files.pythonhosted.org/packages/53/43/c3aa3637458edd10014cf16a4152faca17d8fb6cc233fec23d469eb042aa/cuda_bindings-13.0.1-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4bdea32e9ede085c72dbc670d2c6ecd68451aa06d0f1cfe597374483dcdd1657", size = 12316943, upload-time = "2025-08-18T15:29:45.261Z" },
+    { url = "https://files.pythonhosted.org/packages/ed/fd/033f669fdda93bab7c342d9e08b8bd97b9a3670bd8f5ee5dbc51054d54db/cuda_bindings-13.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:448bf908d17b29e3c5dfa55f848e37f3d4170e5c8644536323bf54a0785e6b98", size = 12009034, upload-time = "2025-08-18T15:29:47.343Z" },
+]
+
+[[package]]
+name = "cuda-pathfinder"
+version = "1.1.0"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/52/07/7978a4c4d8e70620170aa247ce16241a72d4cf6e4336bd3b296926baf7df/cuda_pathfinder-1.1.0-py3-none-any.whl", hash = "sha256:3e66fe0af8ead20eca25e077d2e0cb2dcc027d4297d550a74f99a0211e610799", size = 17673, upload-time = "2025-08-07T01:34:08.562Z" },
+]
+
+[[package]]
+name = "cuda-python"
+version = "13.0.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cuda-bindings" },
+    { name = "cuda-pathfinder" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/02/02/078f4cba58349faad5597306ca54bf0bf129f8c713b261e1def59468a505/cuda_python-13.0.1-py3-none-any.whl", hash = "sha256:9d8c021953cfbb2c1916a3977c04ad23846cc8ac7647916cb6a1bf4f3280412c", size = 7611, upload-time = "2025-08-18T15:39:40.456Z" },
 ]
 
 [[package]]
 name = "cupy-cuda12x"
-version = "13.4.1"
+version = "13.6.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "fastrlock", marker = "sys_platform != 'darwin'" },
     { name = "numpy", marker = "sys_platform != 'darwin'" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/b7/6d/a5e08d225b1664b400fb4a87262878d315267c310b93d43efd5b7b0b1f64/cupy_cuda12x-13.4.1-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:a714db3dae534b9d869951366ae2431f3e72036b07827927ffccd24076507ca8", size = 118354020, upload-time = "2025-03-21T07:25:10.378Z" },
-    { url = "https://files.pythonhosted.org/packages/56/58/5bfc83265455ff783d5be65451392a6920a90fe8996a091006ba02512848/cupy_cuda12x-13.4.1-cp312-cp312-manylinux2014_x86_64.whl", hash = "sha256:06103dd2dc2ff7f36c67d2d01cb658befd68da350fae78a0e113fbab6895755f", size = 105273045, upload-time = "2025-03-21T07:25:17.966Z" },
-    { url = "https://files.pythonhosted.org/packages/6f/e9/abc5ae5d8f6e05fb44c83105f8663d46c1bdfc9d0039fbaf21e79f51a985/cupy_cuda12x-13.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:7d73a32b3b49311cf384f2dd9c686cc9244435b2288d628568af6a77262964ad", size = 82066008, upload-time = "2025-03-21T07:25:24.372Z" },
-    { url = "https://files.pythonhosted.org/packages/cd/59/c5200651fc3c0e1e92393d4e582e7812d5f76f26607c1fb310399c335b21/cupy_cuda12x-13.4.1-cp313-cp313-manylinux2014_aarch64.whl", hash = "sha256:43f97bedd6e2385f61b939ee37faadff0e1fa701d35f2a328cdc13d5b1b74b48", size = 117957759, upload-time = "2025-03-21T07:25:31.363Z" },
-    { url = "https://files.pythonhosted.org/packages/13/33/de71853fcd28aaf961092d895d126bfe5ebecc56d89865ea41ad8e48e559/cupy_cuda12x-13.4.1-cp313-cp313-manylinux2014_x86_64.whl", hash = "sha256:d0d153ac5b24ad183a7bcbe83693a6df06840355bf94b30c1606c519added468", size = 105047230, upload-time = "2025-03-21T07:25:38.084Z" },
-    { url = "https://files.pythonhosted.org/packages/08/f6/38f02f85d6062868425180d9b36097bac05a3d222973be5b90aa3a8fd580/cupy_cuda12x-13.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:4ca400146ab1c5f65dad180bc2562b58b91e239b322d33689fafed7b6399e229", size = 82031139, upload-time = "2025-03-21T07:25:44.085Z" },
+    { url = "https://files.pythonhosted.org/packages/12/c5/7e7fc4816d0de0154e5d9053242c3a08a0ca8b43ee656a6f7b3b95055a7b/cupy_cuda12x-13.6.0-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:a6970ceefe40f9acbede41d7fe17416bd277b1bd2093adcde457b23b578c5a59", size = 127334633, upload-time = "2025-08-18T08:24:43.065Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/95/d7e1295141e7d530674a3cc567e13ed0eb6b81524cb122d797ed996b5bea/cupy_cuda12x-13.6.0-cp312-cp312-manylinux2014_x86_64.whl", hash = "sha256:79b0cacb5e8b190ef409f9e03f06ac8de1b021b0c0dda47674d446f5557e0eb1", size = 112886268, upload-time = "2025-08-18T08:24:49.294Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/8c/14555b63fd78cfac7b88af0094cea0a3cb845d243661ec7da69f7b3ea0de/cupy_cuda12x-13.6.0-cp312-cp312-win_amd64.whl", hash = "sha256:ca06fede7b8b83ca9ad80062544ef2e5bb8d4762d1c4fc3ac8349376de9c8a5e", size = 89785108, upload-time = "2025-08-18T08:24:54.527Z" },
+    { url = "https://files.pythonhosted.org/packages/19/ec/f62cb991f11fb41291c4c15b6936d7b67ffa71ddb344ad6e8894e06ce58d/cupy_cuda12x-13.6.0-cp313-cp313-manylinux2014_aarch64.whl", hash = "sha256:e5426ae3b1b9cf59927481e457a89e3f0b50a35b114a8034ec9110e7a833434c", size = 126904601, upload-time = "2025-08-18T08:24:59.951Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/b8/30127bcdac53a25f94ee201bf4802fcd8d012145567d77c54174d6d01c01/cupy_cuda12x-13.6.0-cp313-cp313-manylinux2014_x86_64.whl", hash = "sha256:52d9e7f83d920da7d81ec2e791c2c2c747fdaa1d7b811971b34865ce6371e98a", size = 112654824, upload-time = "2025-08-18T08:25:05.944Z" },
+    { url = "https://files.pythonhosted.org/packages/72/36/c9e24acb19f039f814faea880b3704a3661edaa6739456b73b27540663e3/cupy_cuda12x-13.6.0-cp313-cp313-win_amd64.whl", hash = "sha256:297b4268f839de67ef7865c2202d3f5a0fb8d20bd43360bc51b6e60cb4406447", size = 89750580, upload-time = "2025-08-18T08:25:10.972Z" },
+]
+
+[[package]]
+name = "cut-cross-entropy"
+version = "25.3.2"
+source = { git = "https://github.com/apple/ml-cross-entropy.git?rev=87a86ab#87a86aba72cfd2f0d8abecaf81c13c4528ea07d8" }
+dependencies = [
+    { name = "setuptools" },
+    { name = "torch", version = "2.8.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
+    { name = "torch", version = "2.8.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin'" },
+    { name = "triton", version = "3.4.0", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform == 'linux'" },
+    { name = "triton", version = "3.4.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'darwin' and sys_platform != 'linux'" },
 ]
 
 [[package]]
@@ -748,15 +1094,15 @@ wheels = [
 
 [[package]]
 name = "databricks-sdk"
-version = "0.59.0"
+version = "0.64.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "google-auth" },
     { name = "requests" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/1c/d9/b48531b1b2caa3ed559ece34bf2abff2536048bf88447592621daeaec5d5/databricks_sdk-0.59.0.tar.gz", hash = "sha256:f60a27f00ccdf57d8496dd4a2e46ad17bb9557add09a6b2e23d46f29c0bca613", size = 719165, upload-time = "2025-07-17T11:13:57.847Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/cf/31/18a655a4382851c897a84c94e547e3a8e1a0f2b51e4ee74227c982a53943/databricks_sdk-0.64.0.tar.gz", hash = "sha256:e21cce45bb4f1254ad5d22ea77fc30484378beb54b5b42db098d1f975c813e81", size = 746326, upload-time = "2025-08-20T11:47:22.469Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/1b/ac/1d97e438f86c26314227f7b2f0711476db79522a137b60533c5181ae481b/databricks_sdk-0.59.0-py3-none-any.whl", hash = "sha256:2ae4baefd1f7360c8314e2ebdc0a0a6d7e76a88805a65d0415ff73631c1e4c0d", size = 676213, upload-time = "2025-07-17T11:13:56.088Z" },
+    { url = "https://files.pythonhosted.org/packages/21/70/734d3b559e72c4231531c77685f204d8c14202ada640c4f16229a6456b57/databricks_sdk-0.64.0-py3-none-any.whl", hash = "sha256:3efb2a739deda3186d0380ad6ced7d4811ced7adcaf61cbf0f897eab52974a17", size = 703407, upload-time = "2025-08-20T11:47:20.509Z" },
 ]
 
 [[package]]
@@ -785,53 +1131,89 @@ wheels = [
 
 [[package]]
 name = "debugpy"
-version = "1.8.14"
+version = "1.8.16"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/bd/75/087fe07d40f490a78782ff3b0a30e3968936854105487decdb33446d4b0e/debugpy-1.8.14.tar.gz", hash = "sha256:7cd287184318416850aa8b60ac90105837bb1e59531898c07569d197d2ed5322", size = 1641444, upload-time = "2025-04-10T19:46:10.981Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/ca/d4/722d0bcc7986172ac2ef3c979ad56a1030e3afd44ced136d45f8142b1f4a/debugpy-1.8.16.tar.gz", hash = "sha256:31e69a1feb1cf6b51efbed3f6c9b0ef03bc46ff050679c4be7ea6d2e23540870", size = 1643809, upload-time = "2025-08-06T18:00:02.647Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/d9/2a/ac2df0eda4898f29c46eb6713a5148e6f8b2b389c8ec9e425a4a1d67bf07/debugpy-1.8.14-cp312-cp312-macosx_14_0_universal2.whl", hash = "sha256:8899c17920d089cfa23e6005ad9f22582fd86f144b23acb9feeda59e84405b84", size = 2501268, upload-time = "2025-04-10T19:46:26.044Z" },
-    { url = "https://files.pythonhosted.org/packages/10/53/0a0cb5d79dd9f7039169f8bf94a144ad3efa52cc519940b3b7dde23bcb89/debugpy-1.8.14-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f6bb5c0dcf80ad5dbc7b7d6eac484e2af34bdacdf81df09b6a3e62792b722826", size = 4221077, upload-time = "2025-04-10T19:46:27.464Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/d5/84e01821f362327bf4828728aa31e907a2eca7c78cd7c6ec062780d249f8/debugpy-1.8.14-cp312-cp312-win32.whl", hash = "sha256:281d44d248a0e1791ad0eafdbbd2912ff0de9eec48022a5bfbc332957487ed3f", size = 5255127, upload-time = "2025-04-10T19:46:29.467Z" },
-    { url = "https://files.pythonhosted.org/packages/33/16/1ed929d812c758295cac7f9cf3dab5c73439c83d9091f2d91871e648093e/debugpy-1.8.14-cp312-cp312-win_amd64.whl", hash = "sha256:5aa56ef8538893e4502a7d79047fe39b1dae08d9ae257074c6464a7b290b806f", size = 5297249, upload-time = "2025-04-10T19:46:31.538Z" },
-    { url = "https://files.pythonhosted.org/packages/4d/e4/395c792b243f2367d84202dc33689aa3d910fb9826a7491ba20fc9e261f5/debugpy-1.8.14-cp313-cp313-macosx_14_0_universal2.whl", hash = "sha256:329a15d0660ee09fec6786acdb6e0443d595f64f5d096fc3e3ccf09a4259033f", size = 2485676, upload-time = "2025-04-10T19:46:32.96Z" },
-    { url = "https://files.pythonhosted.org/packages/ba/f1/6f2ee3f991327ad9e4c2f8b82611a467052a0fb0e247390192580e89f7ff/debugpy-1.8.14-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f920c7f9af409d90f5fd26e313e119d908b0dd2952c2393cd3247a462331f15", size = 4217514, upload-time = "2025-04-10T19:46:34.336Z" },
-    { url = "https://files.pythonhosted.org/packages/79/28/b9d146f8f2dc535c236ee09ad3e5ac899adb39d7a19b49f03ac95d216beb/debugpy-1.8.14-cp313-cp313-win32.whl", hash = "sha256:3784ec6e8600c66cbdd4ca2726c72d8ca781e94bce2f396cc606d458146f8f4e", size = 5254756, upload-time = "2025-04-10T19:46:36.199Z" },
-    { url = "https://files.pythonhosted.org/packages/e0/62/a7b4a57013eac4ccaef6977966e6bec5c63906dd25a86e35f155952e29a1/debugpy-1.8.14-cp313-cp313-win_amd64.whl", hash = "sha256:684eaf43c95a3ec39a96f1f5195a7ff3d4144e4a18d69bb66beeb1a6de605d6e", size = 5297119, upload-time = "2025-04-10T19:46:38.141Z" },
-    { url = "https://files.pythonhosted.org/packages/97/1a/481f33c37ee3ac8040d3d51fc4c4e4e7e61cb08b8bc8971d6032acc2279f/debugpy-1.8.14-py2.py3-none-any.whl", hash = "sha256:5cd9a579d553b6cb9759a7908a41988ee6280b961f24f63336835d9418216a20", size = 5256230, upload-time = "2025-04-10T19:46:54.077Z" },
+    { url = "https://files.pythonhosted.org/packages/61/fb/0387c0e108d842c902801bc65ccc53e5b91d8c169702a9bbf4f7efcedf0c/debugpy-1.8.16-cp312-cp312-macosx_14_0_universal2.whl", hash = "sha256:b202e2843e32e80b3b584bcebfe0e65e0392920dc70df11b2bfe1afcb7a085e4", size = 2511822, upload-time = "2025-08-06T18:00:18.526Z" },
+    { url = "https://files.pythonhosted.org/packages/37/44/19e02745cae22bf96440141f94e15a69a1afaa3a64ddfc38004668fcdebf/debugpy-1.8.16-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64473c4a306ba11a99fe0bb14622ba4fbd943eb004847d9b69b107bde45aa9ea", size = 4230135, upload-time = "2025-08-06T18:00:19.997Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/0b/19b1ba5ee4412f303475a2c7ad5858efb99c90eae5ec627aa6275c439957/debugpy-1.8.16-cp312-cp312-win32.whl", hash = "sha256:833a61ed446426e38b0dd8be3e9d45ae285d424f5bf6cd5b2b559c8f12305508", size = 5281271, upload-time = "2025-08-06T18:00:21.281Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/e0/bc62e2dc141de53bd03e2c7cb9d7011de2e65e8bdcdaa26703e4d28656ba/debugpy-1.8.16-cp312-cp312-win_amd64.whl", hash = "sha256:75f204684581e9ef3dc2f67687c3c8c183fde2d6675ab131d94084baf8084121", size = 5323149, upload-time = "2025-08-06T18:00:23.033Z" },
+    { url = "https://files.pythonhosted.org/packages/62/66/607ab45cc79e60624df386e233ab64a6d8d39ea02e7f80e19c1d451345bb/debugpy-1.8.16-cp313-cp313-macosx_14_0_universal2.whl", hash = "sha256:85df3adb1de5258dca910ae0bb185e48c98801ec15018a263a92bb06be1c8787", size = 2496157, upload-time = "2025-08-06T18:00:24.361Z" },
+    { url = "https://files.pythonhosted.org/packages/4d/a0/c95baae08a75bceabb79868d663a0736655e427ab9c81fb848da29edaeac/debugpy-1.8.16-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bee89e948bc236a5c43c4214ac62d28b29388453f5fd328d739035e205365f0b", size = 4222491, upload-time = "2025-08-06T18:00:25.806Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/2f/1c8db6ddd8a257c3cd2c46413b267f1d5fa3df910401c899513ce30392d6/debugpy-1.8.16-cp313-cp313-win32.whl", hash = "sha256:cf358066650439847ec5ff3dae1da98b5461ea5da0173d93d5e10f477c94609a", size = 5281126, upload-time = "2025-08-06T18:00:27.207Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/ba/c3e154ab307366d6c5a9c1b68de04914e2ce7fa2f50d578311d8cc5074b2/debugpy-1.8.16-cp313-cp313-win_amd64.whl", hash = "sha256:b5aea1083f6f50023e8509399d7dc6535a351cc9f2e8827d1e093175e4d9fa4c", size = 5323094, upload-time = "2025-08-06T18:00:29.03Z" },
+    { url = "https://files.pythonhosted.org/packages/52/57/ecc9ae29fa5b2d90107cd1d9bf8ed19aacb74b2264d986ae9d44fe9bdf87/debugpy-1.8.16-py2.py3-none-any.whl", hash = "sha256:19c9521962475b87da6f673514f7fd610328757ec993bf7ec0d8c96f9a325f9e", size = 5287700, upload-time = "2025-08-06T18:00:42.333Z" },
 ]
 
 [[package]]
-name = "decorator"
-version = "5.2.1"
+name = "decord"
+version = "0.6.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/43/fa/6d96a0978d19e17b68d634497769987b16c8f4cd0a7a05048bec693caa6b/decorator-5.2.1.tar.gz", hash = "sha256:65f266143752f734b0a7cc83c46f4618af75b8c5911b00ccb61d0ac9b6da0360", size = 56711, upload-time = "2025-02-24T04:41:34.073Z" }
+dependencies = [
+    { name = "numpy" },
+]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/4e/8c/f3147f5c4b73e7550fe5f9352eaa956ae838d5c51eb58e7a25b9f3e2643b/decorator-5.2.1-py3-none-any.whl", hash = "sha256:d316bb415a2d9e2d2b3abcc4084c6502fc09240e292cd76a76afc106a1c8e04a", size = 9190, upload-time = "2025-02-24T04:41:32.565Z" },
+    { url = "https://files.pythonhosted.org/packages/11/79/936af42edf90a7bd4e41a6cac89c913d4b47fa48a26b042d5129a9242ee3/decord-0.6.0-py3-none-manylinux2010_x86_64.whl", hash = "sha256:51997f20be8958e23b7c4061ba45d0efcd86bffd5fe81c695d0befee0d442976", size = 13602299, upload-time = "2021-06-14T21:30:55.486Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/be/e15b5b866da452e62635a7b27513f31cb581fa2ea9cc9b768b535d62a955/decord-0.6.0-py3-none-win_amd64.whl", hash = "sha256:02665d7c4f1193a330205a791bc128f7e108eb6ae5b67144437a02f700943bad", size = 24733380, upload-time = "2021-06-14T21:30:57.766Z" },
 ]
 
 [[package]]
-name = "deprecated"
-version = "1.2.18"
-source = { registry = "https://pypi.org/simple" }
+name = "deep-ep"
+version = "1.1.0+e3908bf"
+source = { git = "https://github.com/deepseek-ai/DeepEP.git?rev=e3908bf5bd0cc6265bcb225d15cd8c996d4759ef#e3908bf5bd0cc6265bcb225d15cd8c996d4759ef" }
 dependencies = [
-    { name = "wrapt" },
+    { name = "ninja" },
+    { name = "packaging" },
+    { name = "torch", version = "2.8.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
+    { name = "torch", version = "2.8.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin'" },
+]
+
+[[package]]
+name = "deep-gemm"
+version = "2.0.0+7b6b556"
+source = { git = "https://github.com/deepseek-ai/DeepGEMM.git?rev=7b6b5563b9d4c1ae07ffbce7f78ad3ac9204827c#7b6b5563b9d4c1ae07ffbce7f78ad3ac9204827c" }
+dependencies = [
+    { name = "ninja" },
+    { name = "packaging" },
+    { name = "torch", version = "2.8.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
+    { name = "torch", version = "2.8.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/98/97/06afe62762c9a8a86af0cfb7bfdab22a43ad17138b07af5b1a58442690a2/deprecated-1.2.18.tar.gz", hash = "sha256:422b6f6d859da6f2ef57857761bfb392480502a64c3028ca9bbe86085d72115d", size = 2928744, upload-time = "2025-01-27T10:46:25.7Z" }
+
+[[package]]
+name = "defusedxml"
+version = "0.7.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/0f/d5/c66da9b79e5bdb124974bfe172b4daf3c984ebd9c2a06e2b8a4dc7331c72/defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69", size = 75520, upload-time = "2021-03-08T10:59:26.269Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/6e/c6/ac0b6c1e2d138f1002bcf799d330bd6d85084fece321e662a14223794041/Deprecated-1.2.18-py2.py3-none-any.whl", hash = "sha256:bd5011788200372a32418f888e326a09ff80d0214bd961147cfed01b5c018eec", size = 9998, upload-time = "2025-01-27T10:46:09.186Z" },
+    { url = "https://files.pythonhosted.org/packages/07/6c/aa3f2f849e01cb6a001cd8554a88d4c77c5c1a31c95bdf1cf9301e6d9ef4/defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61", size = 25604, upload-time = "2021-03-08T10:59:24.45Z" },
 ]
 
 [[package]]
 name = "depyf"
-version = "0.18.0"
+version = "0.19.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "astor" },
     { name = "dill" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/f9/ee/43a4cbba615abfc1eb2e5ff5eed3f80f38d58645b4d13d0ea06b9ca1909d/depyf-0.18.0.tar.gz", hash = "sha256:b99f0c383be949ae45d5d606fe444c71f375b55a57b8d6b20e7856670d52130d", size = 43050, upload-time = "2024-12-07T00:42:40.198Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/19/38/69157d711be575f1b9cf3177b64ef4ade44373fc02839f183fdd98ec2dd6/depyf-0.19.0.tar.gz", hash = "sha256:afed0916b32d141cc90fa6220df01885eda442ca43b297d5050eeb90b4a5cb44", size = 6171405, upload-time = "2025-04-20T08:07:41.224Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/28/4d/1192acbcdc5e843f5e5d51f6e8788f2b60a9fe0b578ac385ded67a0b0b26/depyf-0.19.0-py3-none-any.whl", hash = "sha256:040b35fc0997d49df024b7d094f2a7836f91e9ed02f49982dd37e70aa3285ad5", size = 39034, upload-time = "2025-04-20T08:07:37.036Z" },
+]
+
+[[package]]
+name = "devtools"
+version = "0.12.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "asttokens" },
+    { name = "executing" },
+    { name = "pygments" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/84/75/b78198620640d394bc435c17bb49db18419afdd6cfa3ed8bcfe14034ec80/devtools-0.12.2.tar.gz", hash = "sha256:efceab184cb35e3a11fa8e602cc4fadacaa2e859e920fc6f87bf130b69885507", size = 75005, upload-time = "2023-09-03T16:57:00.679Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/e7/d8/efc291d5c69a9905515055d23977643dd0d482ebfeb0dbabef1947ee75d8/depyf-0.18.0-py3-none-any.whl", hash = "sha256:007294d5bac19a38a0767d747be0f49b9ffdcea0394a822644142df22b33a3e1", size = 38839, upload-time = "2024-12-07T00:42:38.83Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/ae/afb1487556e2dc827a17097aac8158a25b433a345386f0e249f6d2694ccb/devtools-0.12.2-py3-none-any.whl", hash = "sha256:c366e3de1df4cdd635f1ad8cbcd3af01a384d7abda71900e68d43b04eb6aaca7", size = 19411, upload-time = "2023-09-03T16:56:59.049Z" },
 ]
 
 [[package]]
@@ -854,11 +1236,11 @@ wheels = [
 
 [[package]]
 name = "distlib"
-version = "0.3.9"
+version = "0.4.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/0d/dd/1bec4c5ddb504ca60fc29472f3d27e8d4da1257a854e1d96742f15c1d02d/distlib-0.3.9.tar.gz", hash = "sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403", size = 613923, upload-time = "2024-10-09T18:35:47.551Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/96/8e/709914eb2b5749865801041647dc7f4e6d00b549cfe88b65ca192995f07c/distlib-0.4.0.tar.gz", hash = "sha256:feec40075be03a04501a973d81f633735b4b69f98b05450592310c0f401a4e0d", size = 614605, upload-time = "2025-07-17T16:52:00.465Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/91/a1/cf2472db20f7ce4a6be1253a81cfdf85ad9c7885ffbed7047fb72c24cf87/distlib-0.3.9-py2.py3-none-any.whl", hash = "sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87", size = 468973, upload-time = "2024-10-09T18:35:44.272Z" },
+    { url = "https://files.pythonhosted.org/packages/33/6b/e0547afaf41bf2c42e52430072fa5658766e3d65bd4b03a563d1b6336f57/distlib-0.4.0-py2.py3-none-any.whl", hash = "sha256:9659f7d87e46584a30b5780e43ac7a2143098441670ff0a49d5f9034c54a6c16", size = 469047, upload-time = "2025-07-17T16:51:58.613Z" },
 ]
 
 [[package]]
@@ -894,13 +1276,10 @@ wheels = [
 ]
 
 [[package]]
-name = "docstring-parser"
-version = "0.16"
+name = "docopt"
+version = "0.6.2"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/08/12/9c22a58c0b1e29271051222d8906257616da84135af9ed167c9e28f85cb3/docstring_parser-0.16.tar.gz", hash = "sha256:538beabd0af1e2db0146b6bd3caa526c35a34d61af9fd2887f3a8a27a739aa6e", size = 26565, upload-time = "2024-03-15T10:39:44.419Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/d5/7c/e9fcff7623954d86bdc17782036cbf715ecab1bec4847c008557affe1ca8/docstring_parser-0.16-py3-none-any.whl", hash = "sha256:bf0a1387354d3691d102edef7ec124f219ef639982d096e26e3b60aeffa90637", size = 36533, upload-time = "2024-03-15T10:39:41.527Z" },
-]
+sdist = { url = "https://files.pythonhosted.org/packages/a2/55/8f8cab2afd404cf578136ef2cc5dfb50baa1761b68c9da1fb1e4eed343c9/docopt-0.6.2.tar.gz", hash = "sha256:49b3a825280bd66b3aa83585ef59c4a8c82f2c8a522dbe754a8bc8d08c85c491", size = 25901, upload-time = "2014-06-16T11:18:57.406Z" }
 
 [[package]]
 name = "docutils"
@@ -946,32 +1325,26 @@ wheels = [
 ]
 
 [[package]]
-name = "fabric"
-version = "3.2.2"
+name = "executing"
+version = "2.2.1"
 source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "decorator" },
-    { name = "deprecated" },
-    { name = "invoke" },
-    { name = "paramiko" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/0d/3f/337f278b70ba339c618a490f6b8033b7006c583bd197a897f12fbc468c51/fabric-3.2.2.tar.gz", hash = "sha256:8783ca42e3b0076f08b26901aac6b9d9b1f19c410074e7accfab902c184ff4a3", size = 183215, upload-time = "2023-08-31T01:42:05.55Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/cc/28/c14e053b6762b1044f34a13aab6859bbf40456d37d23aa286ac24cfd9a5d/executing-2.2.1.tar.gz", hash = "sha256:3632cc370565f6648cc328b32435bd120a1e4ebb20c77e3fdde9a13cd1e533c4", size = 1129488, upload-time = "2025-09-01T09:48:10.866Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/d6/1f/e99e23ee01847147fa194e8d41cfcf2535a2dbfcb51414c541cadb15c5d7/fabric-3.2.2-py3-none-any.whl", hash = "sha256:91c47c0be68b14936c88b34da8a1f55e5710fd28397dac5d4ff2e21558113a6f", size = 59417, upload-time = "2023-08-31T01:42:03.917Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/ea/53f2148663b321f21b5a606bd5f191517cf40b7072c0497d3c92c4a13b1e/executing-2.2.1-py2.py3-none-any.whl", hash = "sha256:760643d3452b4d777d295bb167ccc74c64a81df23fb5e08eff250c425a4b2017", size = 28317, upload-time = "2025-09-01T09:48:08.5Z" },
 ]
 
 [[package]]
 name = "fastapi"
-version = "0.115.13"
+version = "0.116.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "pydantic" },
     { name = "starlette" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/20/64/ec0788201b5554e2a87c49af26b77a4d132f807a0fa9675257ac92c6aa0e/fastapi-0.115.13.tar.gz", hash = "sha256:55d1d25c2e1e0a0a50aceb1c8705cd932def273c102bff0b1c1da88b3c6eb307", size = 295680, upload-time = "2025-06-17T11:49:45.575Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/78/d7/6c8b3bfe33eeffa208183ec037fee0cce9f7f024089ab1c5d12ef04bd27c/fastapi-0.116.1.tar.gz", hash = "sha256:ed52cbf946abfd70c5a0dccb24673f0670deeb517a88b3544d03c2a6bf283143", size = 296485, upload-time = "2025-07-11T16:22:32.057Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/59/4a/e17764385382062b0edbb35a26b7cf76d71e27e456546277a42ba6545c6e/fastapi-0.115.13-py3-none-any.whl", hash = "sha256:0a0cab59afa7bab22f5eb347f8c9864b681558c278395e94035a741fc10cd865", size = 95315, upload-time = "2025-06-17T11:49:44.106Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/47/d63c60f59a59467fda0f93f46335c9d18526d7071f025cb5b89d5353ea42/fastapi-0.116.1-py3-none-any.whl", hash = "sha256:c46ac7c312df840f0c9e220f7964bada936781bc4e2e6eb71f1c4d7553786565", size = 95631, upload-time = "2025-07-11T16:22:30.485Z" },
 ]
 
 [package.optional-dependencies]
@@ -986,22 +1359,41 @@ standard = [
 
 [[package]]
 name = "fastapi-cli"
-version = "0.0.7"
+version = "0.0.8"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "rich-toolkit" },
     { name = "typer" },
     { name = "uvicorn", extra = ["standard"] },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/fe/73/82a5831fbbf8ed75905bacf5b2d9d3dfd6f04d6968b29fe6f72a5ae9ceb1/fastapi_cli-0.0.7.tar.gz", hash = "sha256:02b3b65956f526412515907a0793c9094abd4bfb5457b389f645b0ea6ba3605e", size = 16753, upload-time = "2024-12-15T14:28:10.028Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/c6/94/3ef75d9c7c32936ecb539b9750ccbdc3d2568efd73b1cb913278375f4533/fastapi_cli-0.0.8.tar.gz", hash = "sha256:2360f2989b1ab4a3d7fc8b3a0b20e8288680d8af2e31de7c38309934d7f8a0ee", size = 16884, upload-time = "2025-07-07T14:44:09.326Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/a1/e6/5daefc851b514ce2287d8f5d358ae4341089185f78f3217a69d0ce3a390c/fastapi_cli-0.0.7-py3-none-any.whl", hash = "sha256:d549368ff584b2804336c61f192d86ddea080c11255f375959627911944804f4", size = 10705, upload-time = "2024-12-15T14:28:06.18Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/3f/6ad3103c5f59208baf4c798526daea6a74085bb35d1c161c501863470476/fastapi_cli-0.0.8-py3-none-any.whl", hash = "sha256:0ea95d882c85b9219a75a65ab27e8da17dac02873e456850fa0a726e96e985eb", size = 10770, upload-time = "2025-07-07T14:44:08.255Z" },
 ]
 
 [package.optional-dependencies]
 standard = [
+    { name = "fastapi-cloud-cli" },
+    { name = "uvicorn", extra = ["standard"] },
+]
+
+[[package]]
+name = "fastapi-cloud-cli"
+version = "0.1.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "httpx" },
+    { name = "pydantic", extra = ["email"] },
+    { name = "rich-toolkit" },
+    { name = "rignore" },
+    { name = "sentry-sdk" },
+    { name = "typer" },
     { name = "uvicorn", extra = ["standard"] },
 ]
+sdist = { url = "https://files.pythonhosted.org/packages/a9/2e/3b6e5016affc310e5109bc580f760586eabecea0c8a7ab067611cd849ac0/fastapi_cloud_cli-0.1.5.tar.gz", hash = "sha256:341ee585eb731a6d3c3656cb91ad38e5f39809bf1a16d41de1333e38635a7937", size = 22710, upload-time = "2025-07-28T13:30:48.216Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e5/a6/5aa862489a2918a096166fd98d9fe86b7fd53c607678b3fa9d8c432d88d5/fastapi_cloud_cli-0.1.5-py3-none-any.whl", hash = "sha256:d80525fb9c0e8af122370891f9fa83cf5d496e4ad47a8dd26c0496a6c85a012a", size = 18992, upload-time = "2025-07-28T13:30:47.427Z" },
+]
 
 [[package]]
 name = "fastrlock"
@@ -1022,45 +1414,40 @@ wheels = [
 ]
 
 [[package]]
-name = "fiddle"
-version = "0.3.0"
+name = "ffmpy"
+version = "0.6.2"
 source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "absl-py" },
-    { name = "graphviz" },
-    { name = "libcst" },
-    { name = "typing-extensions" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/73/36/7a4fac76351619b36bbc7937abf59f7b601326dc4efc253b3c16819f782a/fiddle-0.3.0.tar.gz", hash = "sha256:5d083d3299a479868345513385a6c5546141bd92086c15d3dcbf8008a90075d3", size = 277884, upload-time = "2024-04-09T17:23:58.974Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/b1/11/d1cf103779efaf9fd165b4ef5f7b8f44c4e692bb44638082d3980f83afc8/ffmpy-0.6.2.tar.gz", hash = "sha256:24fdb5d6476e7d73e6d5aaa95a6c34f66a214fb0f905d459609b3ba452dd940b", size = 4958, upload-time = "2025-10-08T06:34:50.063Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/3b/98/a38e949a91ff9e15874487fd8329ff53c25f3413c0cfc809eb6ff7eb7fa1/fiddle-0.3.0-py3-none-any.whl", hash = "sha256:f4824541c103a94a2f33f6c93eeddf6007c3a7300440087a95907f3e74362e61", size = 419830, upload-time = "2024-04-09T17:23:56.7Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/57/699c8b55080248027b97a910818de7ce32e87e2dd60de5e81cff3a353745/ffmpy-0.6.2-py3-none-any.whl", hash = "sha256:ce6b8582e236a272f39f341dc37e7ddf313cb061eca52f9acf1b60062fe8e2fe", size = 5495, upload-time = "2025-10-08T06:34:48.665Z" },
 ]
 
 [[package]]
 name = "filelock"
-version = "3.18.0"
+version = "3.19.1"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/0a/10/c23352565a6544bdc5353e0b15fc1c563352101f30e24bf500207a54df9a/filelock-3.18.0.tar.gz", hash = "sha256:adbc88eabb99d2fec8c9c1b229b171f18afa655400173ddc653d5d01501fb9f2", size = 18075, upload-time = "2025-03-14T07:11:40.47Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/40/bb/0ab3e58d22305b6f5440629d20683af28959bf793d98d11950e305c1c326/filelock-3.19.1.tar.gz", hash = "sha256:66eda1888b0171c998b35be2bcc0f6d75c388a7ce20c3f3f37aa8e96c2dddf58", size = 17687, upload-time = "2025-08-14T16:56:03.016Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/4d/36/2a115987e2d8c300a974597416d9de88f2444426de9571f4b59b2cca3acc/filelock-3.18.0-py3-none-any.whl", hash = "sha256:c401f4f8377c4464e6db25fff06205fd89bdd83b65eb0488ed1b160f780e21de", size = 16215, upload-time = "2025-03-14T07:11:39.145Z" },
+    { url = "https://files.pythonhosted.org/packages/42/14/42b2651a2f46b022ccd948bca9f2d5af0fd8929c4eec235b8d6d844fbe67/filelock-3.19.1-py3-none-any.whl", hash = "sha256:d38e30481def20772f5baf097c122c3babc4fcdb7e14e57049eb9d88c6dc017d", size = 15988, upload-time = "2025-08-14T16:56:01.633Z" },
 ]
 
 [[package]]
 name = "flash-attn"
-version = "2.7.4.post1"
+version = "2.8.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "einops" },
     { name = "ninja" },
     { name = "psutil" },
     { name = "setuptools" },
-    { name = "torch" },
+    { name = "torch", version = "2.8.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
+    { name = "torch", version = "2.8.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/11/34/9bf60e736ed7bbe15055ac2dab48ec67d9dbd088d2b4ae318fd77190ab4e/flash_attn-2.7.4.post1.tar.gz", hash = "sha256:f03485c9a49a4d68d0733acdcad80ab0e72afa025a777fdc2966ceccf9d51765", size = 5986610, upload-time = "2025-01-30T06:39:51.93Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/e8/6d/7066d160bdffa2f9da29a8c3957f266b17a03ca0b3bdc8fdae86d9881fe7/flash_attn-2.8.1.tar.gz", hash = "sha256:0ff003899fcb244f357905b04f622d5c9736887126dd6675f8f4bc52954e3923", size = 8166563, upload-time = "2025-07-10T05:16:39.729Z" }
 
 [[package]]
 name = "flask"
-version = "3.1.1"
+version = "3.1.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "blinker" },
@@ -1070,9 +1457,22 @@ dependencies = [
     { name = "markupsafe" },
     { name = "werkzeug" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/c0/de/e47735752347f4128bcf354e0da07ef311a78244eba9e3dc1d4a5ab21a98/flask-3.1.1.tar.gz", hash = "sha256:284c7b8f2f58cb737f0cf1c30fd7eaf0ccfcde196099d24ecede3fc2005aa59e", size = 753440, upload-time = "2025-05-13T15:01:17.447Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/dc/6d/cfe3c0fcc5e477df242b98bfe186a4c34357b4847e87ecaef04507332dab/flask-3.1.2.tar.gz", hash = "sha256:bf656c15c80190ed628ad08cdfd3aaa35beb087855e2f494910aa3774cc4fd87", size = 720160, upload-time = "2025-08-19T21:03:21.205Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ec/f9/7f9263c5695f4bd0023734af91bedb2ff8209e8de6ead162f35d8dc762fd/flask-3.1.2-py3-none-any.whl", hash = "sha256:ca1d8112ec8a6158cc29ea4858963350011b5c846a414cdb7a954aa9e967d03c", size = 103308, upload-time = "2025-08-19T21:03:19.499Z" },
+]
+
+[[package]]
+name = "flask-cors"
+version = "6.0.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "flask" },
+    { name = "werkzeug" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/76/37/bcfa6c7d5eec777c4c7cf45ce6b27631cebe5230caf88d85eadd63edd37a/flask_cors-6.0.1.tar.gz", hash = "sha256:d81bcb31f07b0985be7f48406247e9243aced229b7747219160a0559edd678db", size = 13463, upload-time = "2025-06-11T01:32:08.518Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/3d/68/9d4508e893976286d2ead7f8f571314af6c2037af34853a30fd769c02e9d/flask-3.1.1-py3-none-any.whl", hash = "sha256:07aae2bb5eaf77993ef57e357491839f5fd9f4dc281593a81a9e4d79a24f295c", size = 103305, upload-time = "2025-05-13T15:01:15.591Z" },
+    { url = "https://files.pythonhosted.org/packages/17/f8/01bf35a3afd734345528f98d0353f2a978a476528ad4d7e78b70c4d149dd/flask_cors-6.0.1-py3-none-any.whl", hash = "sha256:c7b2cbfb1a31aa0d2e5341eea03a6805349f7a61647daee1a15c46bbe981494c", size = 13244, upload-time = "2025-06-11T01:32:07.352Z" },
 ]
 
 [[package]]
@@ -1092,27 +1492,54 @@ wheels = [
 
 [[package]]
 name = "fonttools"
-version = "4.58.4"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/2e/5a/1124b2c8cb3a8015faf552e92714040bcdbc145dfa29928891b02d147a18/fonttools-4.58.4.tar.gz", hash = "sha256:928a8009b9884ed3aae17724b960987575155ca23c6f0b8146e400cc9e0d44ba", size = 3525026, upload-time = "2025-06-13T17:25:15.426Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/04/3c/1d1792bfe91ef46f22a3d23b4deb514c325e73c17d4f196b385b5e2faf1c/fonttools-4.58.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:462211c0f37a278494e74267a994f6be9a2023d0557aaa9ecbcbfce0f403b5a6", size = 2754082, upload-time = "2025-06-13T17:24:24.862Z" },
-    { url = "https://files.pythonhosted.org/packages/2a/1f/2b261689c901a1c3bc57a6690b0b9fc21a9a93a8b0c83aae911d3149f34e/fonttools-4.58.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:0c7a12fb6f769165547f00fcaa8d0df9517603ae7e04b625e5acb8639809b82d", size = 2321677, upload-time = "2025-06-13T17:24:26.815Z" },
-    { url = "https://files.pythonhosted.org/packages/fe/6b/4607add1755a1e6581ae1fc0c9a640648e0d9cdd6591cc2d581c2e07b8c3/fonttools-4.58.4-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:2d42c63020a922154add0a326388a60a55504629edc3274bc273cd3806b4659f", size = 4896354, upload-time = "2025-06-13T17:24:28.428Z" },
-    { url = "https://files.pythonhosted.org/packages/cd/95/34b4f483643d0cb11a1f830b72c03fdd18dbd3792d77a2eb2e130a96fada/fonttools-4.58.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8f2b4e6fd45edc6805f5f2c355590b092ffc7e10a945bd6a569fc66c1d2ae7aa", size = 4941633, upload-time = "2025-06-13T17:24:30.568Z" },
-    { url = "https://files.pythonhosted.org/packages/81/ac/9bafbdb7694059c960de523e643fa5a61dd2f698f3f72c0ca18ae99257c7/fonttools-4.58.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f155b927f6efb1213a79334e4cb9904d1e18973376ffc17a0d7cd43d31981f1e", size = 4886170, upload-time = "2025-06-13T17:24:32.724Z" },
-    { url = "https://files.pythonhosted.org/packages/ae/44/a3a3b70d5709405f7525bb7cb497b4e46151e0c02e3c8a0e40e5e9fe030b/fonttools-4.58.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e38f687d5de97c7fb7da3e58169fb5ba349e464e141f83c3c2e2beb91d317816", size = 5037851, upload-time = "2025-06-13T17:24:35.034Z" },
-    { url = "https://files.pythonhosted.org/packages/21/cb/e8923d197c78969454eb876a4a55a07b59c9c4c46598f02b02411dc3b45c/fonttools-4.58.4-cp312-cp312-win32.whl", hash = "sha256:636c073b4da9db053aa683db99580cac0f7c213a953b678f69acbca3443c12cc", size = 2187428, upload-time = "2025-06-13T17:24:36.996Z" },
-    { url = "https://files.pythonhosted.org/packages/46/e6/fe50183b1a0e1018e7487ee740fa8bb127b9f5075a41e20d017201e8ab14/fonttools-4.58.4-cp312-cp312-win_amd64.whl", hash = "sha256:82e8470535743409b30913ba2822e20077acf9ea70acec40b10fcf5671dceb58", size = 2236649, upload-time = "2025-06-13T17:24:38.985Z" },
-    { url = "https://files.pythonhosted.org/packages/d4/4f/c05cab5fc1a4293e6bc535c6cb272607155a0517700f5418a4165b7f9ec8/fonttools-4.58.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:5f4a64846495c543796fa59b90b7a7a9dff6839bd852741ab35a71994d685c6d", size = 2745197, upload-time = "2025-06-13T17:24:40.645Z" },
-    { url = "https://files.pythonhosted.org/packages/3e/d3/49211b1f96ae49308f4f78ca7664742377a6867f00f704cdb31b57e4b432/fonttools-4.58.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e80661793a5d4d7ad132a2aa1eae2e160fbdbb50831a0edf37c7c63b2ed36574", size = 2317272, upload-time = "2025-06-13T17:24:43.428Z" },
-    { url = "https://files.pythonhosted.org/packages/b2/11/c9972e46a6abd752a40a46960e431c795ad1f306775fc1f9e8c3081a1274/fonttools-4.58.4-cp313-cp313-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:fe5807fc64e4ba5130f1974c045a6e8d795f3b7fb6debfa511d1773290dbb76b", size = 4877184, upload-time = "2025-06-13T17:24:45.527Z" },
-    { url = "https://files.pythonhosted.org/packages/ea/24/5017c01c9ef8df572cc9eaf9f12be83ad8ed722ff6dc67991d3d752956e4/fonttools-4.58.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b610b9bef841cb8f4b50472494158b1e347d15cad56eac414c722eda695a6cfd", size = 4939445, upload-time = "2025-06-13T17:24:47.647Z" },
-    { url = "https://files.pythonhosted.org/packages/79/b0/538cc4d0284b5a8826b4abed93a69db52e358525d4b55c47c8cef3669767/fonttools-4.58.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:2daa7f0e213c38f05f054eb5e1730bd0424aebddbeac094489ea1585807dd187", size = 4878800, upload-time = "2025-06-13T17:24:49.766Z" },
-    { url = "https://files.pythonhosted.org/packages/5a/9b/a891446b7a8250e65bffceb248508587958a94db467ffd33972723ab86c9/fonttools-4.58.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:66cccb6c0b944496b7f26450e9a66e997739c513ffaac728d24930df2fd9d35b", size = 5021259, upload-time = "2025-06-13T17:24:51.754Z" },
-    { url = "https://files.pythonhosted.org/packages/17/b2/c4d2872cff3ace3ddd1388bf15b76a1d8d5313f0a61f234e9aed287e674d/fonttools-4.58.4-cp313-cp313-win32.whl", hash = "sha256:94d2aebb5ca59a5107825520fde596e344652c1f18170ef01dacbe48fa60c889", size = 2185824, upload-time = "2025-06-13T17:24:54.324Z" },
-    { url = "https://files.pythonhosted.org/packages/98/57/cddf8bcc911d4f47dfca1956c1e3aeeb9f7c9b8e88b2a312fe8c22714e0b/fonttools-4.58.4-cp313-cp313-win_amd64.whl", hash = "sha256:b554bd6e80bba582fd326ddab296e563c20c64dca816d5e30489760e0c41529f", size = 2236382, upload-time = "2025-06-13T17:24:56.291Z" },
-    { url = "https://files.pythonhosted.org/packages/0b/2f/c536b5b9bb3c071e91d536a4d11f969e911dbb6b227939f4c5b0bca090df/fonttools-4.58.4-py3-none-any.whl", hash = "sha256:a10ce13a13f26cbb9f37512a4346bb437ad7e002ff6fa966a7ce7ff5ac3528bd", size = 1114660, upload-time = "2025-06-13T17:25:13.321Z" },
+version = "4.59.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/11/7f/29c9c3fe4246f6ad96fee52b88d0dc3a863c7563b0afc959e36d78b965dc/fonttools-4.59.1.tar.gz", hash = "sha256:74995b402ad09822a4c8002438e54940d9f1ecda898d2bb057729d7da983e4cb", size = 3534394, upload-time = "2025-08-14T16:28:14.266Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ac/fe/6e069cc4cb8881d164a9bd956e9df555bc62d3eb36f6282e43440200009c/fonttools-4.59.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:43ab814bbba5f02a93a152ee61a04182bb5809bd2bc3609f7822e12c53ae2c91", size = 2769172, upload-time = "2025-08-14T16:26:45.729Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/98/ec4e03f748fefa0dd72d9d95235aff6fef16601267f4a2340f0e16b9330f/fonttools-4.59.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4f04c3ffbfa0baafcbc550657cf83657034eb63304d27b05cff1653b448ccff6", size = 2337281, upload-time = "2025-08-14T16:26:47.921Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/b1/890360a7e3d04a30ba50b267aca2783f4c1364363797e892e78a4f036076/fonttools-4.59.1-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:d601b153e51a5a6221f0d4ec077b6bfc6ac35bfe6c19aeaa233d8990b2b71726", size = 4909215, upload-time = "2025-08-14T16:26:49.682Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/ec/2490599550d6c9c97a44c1e36ef4de52d6acf742359eaa385735e30c05c4/fonttools-4.59.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c735e385e30278c54f43a0d056736942023c9043f84ee1021eff9fd616d17693", size = 4951958, upload-time = "2025-08-14T16:26:51.616Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/40/bd053f6f7634234a9b9805ff8ae4f32df4f2168bee23cafd1271ba9915a9/fonttools-4.59.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1017413cdc8555dce7ee23720da490282ab7ec1cf022af90a241f33f9a49afc4", size = 4894738, upload-time = "2025-08-14T16:26:53.836Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/a1/3cd12a010d288325a7cfcf298a84825f0f9c29b01dee1baba64edfe89257/fonttools-4.59.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5c6d8d773470a5107052874341ed3c487c16ecd179976d81afed89dea5cd7406", size = 5045983, upload-time = "2025-08-14T16:26:56.153Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/af/8a2c3f6619cc43cf87951405337cc8460d08a4e717bb05eaa94b335d11dc/fonttools-4.59.1-cp312-cp312-win32.whl", hash = "sha256:2a2d0d33307f6ad3a2086a95dd607c202ea8852fa9fb52af9b48811154d1428a", size = 2203407, upload-time = "2025-08-14T16:26:58.165Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/f2/a19b874ddbd3ebcf11d7e25188ef9ac3f68b9219c62263acb34aca8cde05/fonttools-4.59.1-cp312-cp312-win_amd64.whl", hash = "sha256:0b9e4fa7eaf046ed6ac470f6033d52c052481ff7a6e0a92373d14f556f298dc0", size = 2251561, upload-time = "2025-08-14T16:27:00.646Z" },
+    { url = "https://files.pythonhosted.org/packages/19/5e/94a4d7f36c36e82f6a81e0064d148542e0ad3e6cf51fc5461ca128f3658d/fonttools-4.59.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:89d9957b54246c6251345297dddf77a84d2c19df96af30d2de24093bbdf0528b", size = 2760192, upload-time = "2025-08-14T16:27:03.024Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/a5/f50712fc33ef9d06953c660cefaf8c8fe4b8bc74fa21f44ee5e4f9739439/fonttools-4.59.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8156b11c0d5405810d216f53907bd0f8b982aa5f1e7e3127ab3be1a4062154ff", size = 2332694, upload-time = "2025-08-14T16:27:04.883Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/a2/5a9fc21c354bf8613215ce233ab0d933bd17d5ff4c29693636551adbc7b3/fonttools-4.59.1-cp313-cp313-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:8387876a8011caec52d327d5e5bca705d9399ec4b17afb8b431ec50d47c17d23", size = 4889254, upload-time = "2025-08-14T16:27:07.02Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/e5/54a6dc811eba018d022ca2e8bd6f2969291f9586ccf9a22a05fc55f91250/fonttools-4.59.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fb13823a74b3a9204a8ed76d3d6d5ec12e64cc5bc44914eb9ff1cdac04facd43", size = 4949109, upload-time = "2025-08-14T16:27:09.3Z" },
+    { url = "https://files.pythonhosted.org/packages/db/15/b05c72a248a95bea0fd05fbd95acdf0742945942143fcf961343b7a3663a/fonttools-4.59.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e1ca10da138c300f768bb68e40e5b20b6ecfbd95f91aac4cc15010b6b9d65455", size = 4888428, upload-time = "2025-08-14T16:27:11.514Z" },
+    { url = "https://files.pythonhosted.org/packages/63/71/c7d6840f858d695adc0c4371ec45e3fb1c8e060b276ba944e2800495aca4/fonttools-4.59.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:2beb5bfc4887a3130f8625349605a3a45fe345655ce6031d1bac11017454b943", size = 5032668, upload-time = "2025-08-14T16:27:13.872Z" },
+    { url = "https://files.pythonhosted.org/packages/90/54/57be4aca6f1312e2bc4d811200dd822325794e05bdb26eeff0976edca651/fonttools-4.59.1-cp313-cp313-win32.whl", hash = "sha256:419f16d750d78e6d704bfe97b48bba2f73b15c9418f817d0cb8a9ca87a5b94bf", size = 2201832, upload-time = "2025-08-14T16:27:16.126Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/1f/1899a6175a5f900ed8730a0d64f53ca1b596ed7609bfda033cf659114258/fonttools-4.59.1-cp313-cp313-win_amd64.whl", hash = "sha256:c536f8a852e8d3fa71dde1ec03892aee50be59f7154b533f0bf3c1174cfd5126", size = 2250673, upload-time = "2025-08-14T16:27:18.033Z" },
+    { url = "https://files.pythonhosted.org/packages/15/07/f6ba82c22f118d9985c37fea65d8d715ca71300d78b6c6e90874dc59f11d/fonttools-4.59.1-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:d5c3bfdc9663f3d4b565f9cb3b8c1efb3e178186435b45105bde7328cfddd7fe", size = 2758606, upload-time = "2025-08-14T16:27:20.064Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/81/84aa3d0ce27b0112c28b67b637ff7a47cf401cf5fbfee6476e4bc9777580/fonttools-4.59.1-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:ea03f1da0d722fe3c2278a05957e6550175571a4894fbf9d178ceef4a3783d2b", size = 2330187, upload-time = "2025-08-14T16:27:22.42Z" },
+    { url = "https://files.pythonhosted.org/packages/17/41/b3ba43f78afb321e2e50232c87304c8d0f5ab39b64389b8286cc39cdb824/fonttools-4.59.1-cp314-cp314-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:57a3708ca6bfccb790f585fa6d8f29432ec329618a09ff94c16bcb3c55994643", size = 4832020, upload-time = "2025-08-14T16:27:24.214Z" },
+    { url = "https://files.pythonhosted.org/packages/67/b1/3af871c7fb325a68938e7ce544ca48bfd2c6bb7b357f3c8252933b29100a/fonttools-4.59.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:729367c91eb1ee84e61a733acc485065a00590618ca31c438e7dd4d600c01486", size = 4930687, upload-time = "2025-08-14T16:27:26.484Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/4f/299fc44646b30d9ef03ffaa78b109c7bd32121f0d8f10009ee73ac4514bc/fonttools-4.59.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:8f8ef66ac6db450193ed150e10b3b45dde7aded10c5d279968bc63368027f62b", size = 4875794, upload-time = "2025-08-14T16:27:28.887Z" },
+    { url = "https://files.pythonhosted.org/packages/90/cf/a0a3d763ab58f5f81ceff104ddb662fd9da94248694862b9c6cbd509fdd5/fonttools-4.59.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:075f745d539a998cd92cb84c339a82e53e49114ec62aaea8307c80d3ad3aef3a", size = 4985780, upload-time = "2025-08-14T16:27:30.858Z" },
+    { url = "https://files.pythonhosted.org/packages/72/c5/ba76511aaae143d89c29cd32ce30bafb61c477e8759a1590b8483f8065f8/fonttools-4.59.1-cp314-cp314-win32.whl", hash = "sha256:c2b0597522d4c5bb18aa5cf258746a2d4a90f25878cbe865e4d35526abd1b9fc", size = 2205610, upload-time = "2025-08-14T16:27:32.578Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/65/b250e69d6caf35bc65cddbf608be0662d741c248f2e7503ab01081fc267e/fonttools-4.59.1-cp314-cp314-win_amd64.whl", hash = "sha256:e9ad4ce044e3236f0814c906ccce8647046cc557539661e35211faadf76f283b", size = 2255376, upload-time = "2025-08-14T16:27:34.653Z" },
+    { url = "https://files.pythonhosted.org/packages/11/f3/0bc63a23ac0f8175e23d82f85d6ee693fbd849de7ad739f0a3622182ad29/fonttools-4.59.1-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:652159e8214eb4856e8387ebcd6b6bd336ee258cbeb639c8be52005b122b9609", size = 2826546, upload-time = "2025-08-14T16:27:36.783Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/46/a3968205590e068fdf60e926be329a207782576cb584d3b7dcd2d2844957/fonttools-4.59.1-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:43d177cd0e847ea026fedd9f099dc917da136ed8792d142298a252836390c478", size = 2359771, upload-time = "2025-08-14T16:27:39.678Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/ff/d14b4c283879e8cb57862d9624a34fe6522b6fcdd46ccbfc58900958794a/fonttools-4.59.1-cp314-cp314t-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:e54437651e1440ee53a95e6ceb6ee440b67a3d348c76f45f4f48de1a5ecab019", size = 4831575, upload-time = "2025-08-14T16:27:41.885Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/04/a277d9a584a49d98ca12d3b2c6663bdf333ae97aaa83bd0cdabf7c5a6c84/fonttools-4.59.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6065fdec8ff44c32a483fd44abe5bcdb40dd5e2571a5034b555348f2b3a52cea", size = 5069962, upload-time = "2025-08-14T16:27:44.284Z" },
+    { url = "https://files.pythonhosted.org/packages/16/6f/3d2ae69d96c4cdee6dfe7598ca5519a1514487700ca3d7c49c5a1ad65308/fonttools-4.59.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:42052b56d176f8b315fbc09259439c013c0cb2109df72447148aeda677599612", size = 4942926, upload-time = "2025-08-14T16:27:46.523Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/d3/c17379e0048d03ce26b38e4ab0e9a98280395b00529e093fe2d663ac0658/fonttools-4.59.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:bcd52eaa5c4c593ae9f447c1d13e7e4a00ca21d755645efa660b6999425b3c88", size = 4958678, upload-time = "2025-08-14T16:27:48.555Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/3f/c5543a1540abdfb4d375e3ebeb84de365ab9b153ec14cb7db05f537dd1e7/fonttools-4.59.1-cp314-cp314t-win32.whl", hash = "sha256:02e4fdf27c550dded10fe038a5981c29f81cb9bc649ff2eaa48e80dab8998f97", size = 2266706, upload-time = "2025-08-14T16:27:50.556Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/99/85bff6e674226bc8402f983e365f07e76d990e7220ba72bcc738fef52391/fonttools-4.59.1-cp314-cp314t-win_amd64.whl", hash = "sha256:412a5fd6345872a7c249dac5bcce380393f40c1c316ac07f447bc17d51900922", size = 2329994, upload-time = "2025-08-14T16:27:52.36Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/64/9d606e66d498917cd7a2ff24f558010d42d6fd4576d9dd57f0bd98333f5a/fonttools-4.59.1-py3-none-any.whl", hash = "sha256:647db657073672a8330608970a984d51573557f328030566521bc03415535042", size = 1130094, upload-time = "2025-08-14T16:28:12.048Z" },
+]
+
+[[package]]
+name = "frozendict"
+version = "2.4.6"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/bb/59/19eb300ba28e7547538bdf603f1c6c34793240a90e1a7b61b65d8517e35e/frozendict-2.4.6.tar.gz", hash = "sha256:df7cd16470fbd26fc4969a208efadc46319334eb97def1ddf48919b351192b8e", size = 316416, upload-time = "2024-10-13T12:15:32.449Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/04/13/d9839089b900fa7b479cce495d62110cddc4bd5630a04d8469916c0e79c5/frozendict-2.4.6-py311-none-any.whl", hash = "sha256:d065db6a44db2e2375c23eac816f1a022feb2fa98cbb50df44a9e83700accbea", size = 16148, upload-time = "2024-10-13T12:15:26.839Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/d0/d482c39cee2ab2978a892558cf130681d4574ea208e162da8958b31e9250/frozendict-2.4.6-py312-none-any.whl", hash = "sha256:49344abe90fb75f0f9fdefe6d4ef6d4894e640fadab71f11009d52ad97f370b9", size = 16146, upload-time = "2024-10-13T12:15:28.16Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/8e/b6bf6a0de482d7d7d7a2aaac8fdc4a4d0bb24a809f5ddd422aa7060eb3d2/frozendict-2.4.6-py313-none-any.whl", hash = "sha256:7134a2bb95d4a16556bb5f2b9736dceb6ea848fa5b6f3f6c2d6dba93b44b4757", size = 16146, upload-time = "2024-10-13T12:15:29.495Z" },
 ]
 
 [[package]]
@@ -1217,14 +1644,14 @@ wheels = [
 
 [[package]]
 name = "gitpython"
-version = "3.1.44"
+version = "3.1.45"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "gitdb" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/c0/89/37df0b71473153574a5cdef8f242de422a0f5d26d7a9e231e6f169b4ad14/gitpython-3.1.44.tar.gz", hash = "sha256:c87e30b26253bf5418b01b0660f818967f3c503193838337fe5e573331249269", size = 214196, upload-time = "2025-01-02T07:32:43.59Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/9a/c8/dd58967d119baab745caec2f9d853297cec1989ec1d63f677d3880632b88/gitpython-3.1.45.tar.gz", hash = "sha256:85b0ee964ceddf211c41b9f27a49086010a190fd8132a24e21f362a4b36a791c", size = 215076, upload-time = "2025-07-24T03:45:54.871Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/1d/9a/4114a9057db2f1462d5c8f8390ab7383925fe1ac012eaa42402ad65c2963/GitPython-3.1.44-py3-none-any.whl", hash = "sha256:9e0e10cda9bed1ee64bc9a6de50e7e38a9c9943241cd7f585f6df3ed28011110", size = 207599, upload-time = "2025-01-02T07:32:40.731Z" },
+    { url = "https://files.pythonhosted.org/packages/01/61/d4b89fec821f72385526e1b9d9a3a0385dda4a72b206d28049e2c7cd39b8/gitpython-3.1.45-py3-none-any.whl", hash = "sha256:8908cb2e02fb3b93b7eb0f2827125cb699869470432cc885f019b8fd0fccff77", size = 208168, upload-time = "2025-07-24T03:45:52.517Z" },
 ]
 
 [[package]]
@@ -1269,6 +1696,63 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/86/f1/62a193f0227cf15a920390abe675f386dec35f7ae3ffe6da582d3ade42c7/googleapis_common_protos-1.70.0-py3-none-any.whl", hash = "sha256:b8bfcca8c25a2bb253e0e0b0adaf8c00773e5e6af6fd92397576680b807e0fd8", size = 294530, upload-time = "2025-04-14T10:17:01.271Z" },
 ]
 
+[[package]]
+name = "gradio"
+version = "5.49.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "aiofiles" },
+    { name = "anyio" },
+    { name = "audioop-lts", marker = "python_full_version >= '3.13'" },
+    { name = "brotli" },
+    { name = "fastapi" },
+    { name = "ffmpy" },
+    { name = "gradio-client" },
+    { name = "groovy" },
+    { name = "httpx" },
+    { name = "huggingface-hub" },
+    { name = "jinja2" },
+    { name = "markupsafe" },
+    { name = "numpy" },
+    { name = "orjson" },
+    { name = "packaging" },
+    { name = "pandas" },
+    { name = "pillow" },
+    { name = "pydantic" },
+    { name = "pydub" },
+    { name = "python-multipart" },
+    { name = "pyyaml" },
+    { name = "ruff" },
+    { name = "safehttpx" },
+    { name = "semantic-version" },
+    { name = "starlette" },
+    { name = "tomlkit" },
+    { name = "typer" },
+    { name = "typing-extensions" },
+    { name = "uvicorn" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/83/67/17b3969a686f204dfb8f06bd34d1423bcba1df8a2f3674f115ca427188b7/gradio-5.49.1.tar.gz", hash = "sha256:c06faa324ae06c3892c8b4b4e73c706c4520d380f6b9e52a3c02dc53a7627ba9", size = 73784504, upload-time = "2025-10-08T20:18:40.4Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/8d/95/1c25fbcabfa201ab79b016c8716a4ac0f846121d4bbfd2136ffb6d87f31e/gradio-5.49.1-py3-none-any.whl", hash = "sha256:1b19369387801a26a6ba7fd2f74d46c5b0e2ac9ddef14f24ddc0d11fb19421b7", size = 63523840, upload-time = "2025-10-08T20:18:34.585Z" },
+]
+
+[[package]]
+name = "gradio-client"
+version = "1.13.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "fsspec" },
+    { name = "httpx" },
+    { name = "huggingface-hub" },
+    { name = "packaging" },
+    { name = "typing-extensions" },
+    { name = "websockets" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/3e/a9/a3beb0ece8c05c33e6376b790fa42e0dd157abca8220cf639b249a597467/gradio_client-1.13.3.tar.gz", hash = "sha256:869b3e67e0f7a0f40df8c48c94de99183265cf4b7b1d9bd4623e336d219ffbe7", size = 323253, upload-time = "2025-09-26T19:51:21.7Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/6e/0b/337b74504681b5dde39f20d803bb09757f9973ecdc65fd4e819d4b11faf7/gradio_client-1.13.3-py3-none-any.whl", hash = "sha256:3f63e4d33a2899c1a12b10fe3cf77b82a6919ff1a1fb6391f6aa225811aa390c", size = 325350, upload-time = "2025-09-26T19:51:20.288Z" },
+]
+
 [[package]]
 name = "graphene"
 version = "3.4.3"
@@ -1306,73 +1790,127 @@ wheels = [
 ]
 
 [[package]]
-name = "graphviz"
-version = "0.21"
+name = "greenlet"
+version = "3.2.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/03/b8/704d753a5a45507a7aab61f18db9509302ed3d0a27ac7e0359ec2905b1a6/greenlet-3.2.4.tar.gz", hash = "sha256:0dca0d95ff849f9a364385f36ab49f50065d76964944638be9691e1832e9f86d", size = 188260, upload-time = "2025-08-07T13:24:33.51Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/44/69/9b804adb5fd0671f367781560eb5eb586c4d495277c93bde4307b9e28068/greenlet-3.2.4-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:3b67ca49f54cede0186854a008109d6ee71f66bd57bb36abd6d0a0267b540cdd", size = 274079, upload-time = "2025-08-07T13:15:45.033Z" },
+    { url = "https://files.pythonhosted.org/packages/46/e9/d2a80c99f19a153eff70bc451ab78615583b8dac0754cfb942223d2c1a0d/greenlet-3.2.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ddf9164e7a5b08e9d22511526865780a576f19ddd00d62f8a665949327fde8bb", size = 640997, upload-time = "2025-08-07T13:42:56.234Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/16/035dcfcc48715ccd345f3a93183267167cdd162ad123cd93067d86f27ce4/greenlet-3.2.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f28588772bb5fb869a8eb331374ec06f24a83a9c25bfa1f38b6993afe9c1e968", size = 655185, upload-time = "2025-08-07T13:45:27.624Z" },
+    { url = "https://files.pythonhosted.org/packages/31/da/0386695eef69ffae1ad726881571dfe28b41970173947e7c558d9998de0f/greenlet-3.2.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:5c9320971821a7cb77cfab8d956fa8e39cd07ca44b6070db358ceb7f8797c8c9", size = 649926, upload-time = "2025-08-07T13:53:15.251Z" },
+    { url = "https://files.pythonhosted.org/packages/68/88/69bf19fd4dc19981928ceacbc5fd4bb6bc2215d53199e367832e98d1d8fe/greenlet-3.2.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c60a6d84229b271d44b70fb6e5fa23781abb5d742af7b808ae3f6efd7c9c60f6", size = 651839, upload-time = "2025-08-07T13:18:30.281Z" },
+    { url = "https://files.pythonhosted.org/packages/19/0d/6660d55f7373b2ff8152401a83e02084956da23ae58cddbfb0b330978fe9/greenlet-3.2.4-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3b3812d8d0c9579967815af437d96623f45c0f2ae5f04e366de62a12d83a8fb0", size = 607586, upload-time = "2025-08-07T13:18:28.544Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/1a/c953fdedd22d81ee4629afbb38d2f9d71e37d23caace44775a3a969147d4/greenlet-3.2.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:abbf57b5a870d30c4675928c37278493044d7c14378350b3aa5d484fa65575f0", size = 1123281, upload-time = "2025-08-07T13:42:39.858Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/c7/12381b18e21aef2c6bd3a636da1088b888b97b7a0362fac2e4de92405f97/greenlet-3.2.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:20fb936b4652b6e307b8f347665e2c615540d4b42b3b4c8a321d8286da7e520f", size = 1151142, upload-time = "2025-08-07T13:18:22.981Z" },
+    { url = "https://files.pythonhosted.org/packages/27/45/80935968b53cfd3f33cf99ea5f08227f2646e044568c9b1555b58ffd61c2/greenlet-3.2.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ee7a6ec486883397d70eec05059353b8e83eca9168b9f3f9a361971e77e0bcd0", size = 1564846, upload-time = "2025-11-04T12:42:15.191Z" },
+    { url = "https://files.pythonhosted.org/packages/69/02/b7c30e5e04752cb4db6202a3858b149c0710e5453b71a3b2aec5d78a1aab/greenlet-3.2.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:326d234cbf337c9c3def0676412eb7040a35a768efc92504b947b3e9cfc7543d", size = 1633814, upload-time = "2025-11-04T12:42:17.175Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/08/b0814846b79399e585f974bbeebf5580fbe59e258ea7be64d9dfb253c84f/greenlet-3.2.4-cp312-cp312-win_amd64.whl", hash = "sha256:a7d4e128405eea3814a12cc2605e0e6aedb4035bf32697f72deca74de4105e02", size = 299899, upload-time = "2025-08-07T13:38:53.448Z" },
+    { url = "https://files.pythonhosted.org/packages/49/e8/58c7f85958bda41dafea50497cbd59738c5c43dbbea5ee83d651234398f4/greenlet-3.2.4-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:1a921e542453fe531144e91e1feedf12e07351b1cf6c9e8a3325ea600a715a31", size = 272814, upload-time = "2025-08-07T13:15:50.011Z" },
+    { url = "https://files.pythonhosted.org/packages/62/dd/b9f59862e9e257a16e4e610480cfffd29e3fae018a68c2332090b53aac3d/greenlet-3.2.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cd3c8e693bff0fff6ba55f140bf390fa92c994083f838fece0f63be121334945", size = 641073, upload-time = "2025-08-07T13:42:57.23Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/0b/bc13f787394920b23073ca3b6c4a7a21396301ed75a655bcb47196b50e6e/greenlet-3.2.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:710638eb93b1fa52823aa91bf75326f9ecdfd5e0466f00789246a5280f4ba0fc", size = 655191, upload-time = "2025-08-07T13:45:29.752Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/d6/6adde57d1345a8d0f14d31e4ab9c23cfe8e2cd39c3baf7674b4b0338d266/greenlet-3.2.4-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:c5111ccdc9c88f423426df3fd1811bfc40ed66264d35aa373420a34377efc98a", size = 649516, upload-time = "2025-08-07T13:53:16.314Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/3b/3a3328a788d4a473889a2d403199932be55b1b0060f4ddd96ee7cdfcad10/greenlet-3.2.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d76383238584e9711e20ebe14db6c88ddcedc1829a9ad31a584389463b5aa504", size = 652169, upload-time = "2025-08-07T13:18:32.861Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/43/3cecdc0349359e1a527cbf2e3e28e5f8f06d3343aaf82ca13437a9aa290f/greenlet-3.2.4-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:23768528f2911bcd7e475210822ffb5254ed10d71f4028387e5a99b4c6699671", size = 610497, upload-time = "2025-08-07T13:18:31.636Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/19/06b6cf5d604e2c382a6f31cafafd6f33d5dea706f4db7bdab184bad2b21d/greenlet-3.2.4-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:00fadb3fedccc447f517ee0d3fd8fe49eae949e1cd0f6a611818f4f6fb7dc83b", size = 1121662, upload-time = "2025-08-07T13:42:41.117Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/15/0d5e4e1a66fab130d98168fe984c509249c833c1a3c16806b90f253ce7b9/greenlet-3.2.4-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:d25c5091190f2dc0eaa3f950252122edbbadbb682aa7b1ef2f8af0f8c0afefae", size = 1149210, upload-time = "2025-08-07T13:18:24.072Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/53/f9c440463b3057485b8594d7a638bed53ba531165ef0ca0e6c364b5cc807/greenlet-3.2.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6e343822feb58ac4d0a1211bd9399de2b3a04963ddeec21530fc426cc121f19b", size = 1564759, upload-time = "2025-11-04T12:42:19.395Z" },
+    { url = "https://files.pythonhosted.org/packages/47/e4/3bb4240abdd0a8d23f4f88adec746a3099f0d86bfedb623f063b2e3b4df0/greenlet-3.2.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ca7f6f1f2649b89ce02f6f229d7c19f680a6238af656f61e0115b24857917929", size = 1634288, upload-time = "2025-11-04T12:42:21.174Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/55/2321e43595e6801e105fcfdee02b34c0f996eb71e6ddffca6b10b7e1d771/greenlet-3.2.4-cp313-cp313-win_amd64.whl", hash = "sha256:554b03b6e73aaabec3745364d6239e9e012d64c68ccd0b8430c64ccc14939a8b", size = 299685, upload-time = "2025-08-07T13:24:38.824Z" },
+    { url = "https://files.pythonhosted.org/packages/22/5c/85273fd7cc388285632b0498dbbab97596e04b154933dfe0f3e68156c68c/greenlet-3.2.4-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:49a30d5fda2507ae77be16479bdb62a660fa51b1eb4928b524975b3bde77b3c0", size = 273586, upload-time = "2025-08-07T13:16:08.004Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/75/10aeeaa3da9332c2e761e4c50d4c3556c21113ee3f0afa2cf5769946f7a3/greenlet-3.2.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:299fd615cd8fc86267b47597123e3f43ad79c9d8a22bebdce535e53550763e2f", size = 686346, upload-time = "2025-08-07T13:42:59.944Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/aa/687d6b12ffb505a4447567d1f3abea23bd20e73a5bed63871178e0831b7a/greenlet-3.2.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:c17b6b34111ea72fc5a4e4beec9711d2226285f0386ea83477cbb97c30a3f3a5", size = 699218, upload-time = "2025-08-07T13:45:30.969Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/8b/29aae55436521f1d6f8ff4e12fb676f3400de7fcf27fccd1d4d17fd8fecd/greenlet-3.2.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:b4a1870c51720687af7fa3e7cda6d08d801dae660f75a76f3845b642b4da6ee1", size = 694659, upload-time = "2025-08-07T13:53:17.759Z" },
+    { url = "https://files.pythonhosted.org/packages/92/2e/ea25914b1ebfde93b6fc4ff46d6864564fba59024e928bdc7de475affc25/greenlet-3.2.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:061dc4cf2c34852b052a8620d40f36324554bc192be474b9e9770e8c042fd735", size = 695355, upload-time = "2025-08-07T13:18:34.517Z" },
+    { url = "https://files.pythonhosted.org/packages/72/60/fc56c62046ec17f6b0d3060564562c64c862948c9d4bc8aa807cf5bd74f4/greenlet-3.2.4-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:44358b9bf66c8576a9f57a590d5f5d6e72fa4228b763d0e43fee6d3b06d3a337", size = 657512, upload-time = "2025-08-07T13:18:33.969Z" },
+    { url = "https://files.pythonhosted.org/packages/23/6e/74407aed965a4ab6ddd93a7ded3180b730d281c77b765788419484cdfeef/greenlet-3.2.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:2917bdf657f5859fbf3386b12d68ede4cf1f04c90c3a6bc1f013dd68a22e2269", size = 1612508, upload-time = "2025-11-04T12:42:23.427Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/da/343cd760ab2f92bac1845ca07ee3faea9fe52bee65f7bcb19f16ad7de08b/greenlet-3.2.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:015d48959d4add5d6c9f6c5210ee3803a830dce46356e3bc326d6776bde54681", size = 1680760, upload-time = "2025-11-04T12:42:25.341Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/a5/6ddab2b4c112be95601c13428db1d8b6608a8b6039816f2ba09c346c08fc/greenlet-3.2.4-cp314-cp314-win_amd64.whl", hash = "sha256:e37ab26028f12dbb0ff65f29a8d3d44a765c61e729647bf2ddfbbed621726f01", size = 303425, upload-time = "2025-08-07T13:32:27.59Z" },
+]
+
+[[package]]
+name = "grimp"
+version = "3.9"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/f8/b3/3ac91e9be6b761a4b30d66ff165e54439dcd48b83f4e20d644867215f6ca/graphviz-0.21.tar.gz", hash = "sha256:20743e7183be82aaaa8ad6c93f8893c923bd6658a04c32ee115edb3c8a835f78", size = 200434, upload-time = "2025-06-15T09:35:05.824Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/91/4c/e0ce1ef95d4000ebc1c11801f9b944fa5910ecc15b5e351865763d8657f8/graphviz-0.21-py3-none-any.whl", hash = "sha256:54f33de9f4f911d7e84e4191749cac8cc5653f815b06738c54db9a15ab8b1e42", size = 47300, upload-time = "2025-06-15T09:35:04.433Z" },
+dependencies = [
+    { name = "joblib" },
+    { name = "typing-extensions" },
 ]
-
-[[package]]
-name = "greenlet"
-version = "3.2.3"
+sdist = { url = "https://files.pythonhosted.org/packages/f5/a4/b5109e7457e647e859c3f68cab22c55139f30dbc5549f62b0f216a00e3f1/grimp-3.9.tar.gz", hash = "sha256:b677ac17301d7e0f1e19cc7057731bd7956a2121181eb5057e51efb44301fb0a", size = 840675, upload-time = "2025-05-05T13:46:49.069Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a8/dd/6b528f821d98d240f4654d7ad947be078e27e55f6d1128207b313213cdde/grimp-3.9-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:c19a27aa7541b620df94ceafde89d6ebf9ee1b263e80d278ea45bdd504fec769", size = 1783791, upload-time = "2025-05-05T13:45:40.592Z" },
+    { url = "https://files.pythonhosted.org/packages/74/a6/646828c8afe6b30b4270b43f1a550f7d3a2334867a002bf3f6b035a37255/grimp-3.9-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f68e7a771c9eb4459106decd6cc4f11313202b10d943a1a8bed463b528889dd0", size = 1710400, upload-time = "2025-05-05T13:45:32.833Z" },
+    { url = "https://files.pythonhosted.org/packages/99/62/b12ed166268e73d676b72accde5493ff6a7781b284f7830a596af2b7fb98/grimp-3.9-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8290eb4561dc29c590fc099f2bdac4827a9b86a018e146428854f9742ab480ef", size = 1858308, upload-time = "2025-05-05T13:44:13.816Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/6a/da220f9fdb4ceed9bd03f624b00c493e7357387257b695a0624be6d6cf11/grimp-3.9-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4574c0d135e6af8cddc31ac9617c00aac3181bb4d476f5aea173a5f2ac8c7479", size = 1823353, upload-time = "2025-05-05T13:44:28.538Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/93/1eb6615f9c12a4eb752ea29e3880c5313ad3d7c771150f544e53e10fa807/grimp-3.9-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c5e4110bd0aedd7da899e44ec0d4a93529e93f2d03e5786e3469a5f7562e11e9", size = 1948889, upload-time = "2025-05-05T13:45:12.57Z" },
+    { url = "https://files.pythonhosted.org/packages/86/7e/e5d3a2ee933e2c83b412a89efc4f939dbf5bf5098c78717e6a432401b206/grimp-3.9-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4d098f6e10c0e42c6be0eca2726a7d7218e90ba020141fa3f88426a5f7d09d71", size = 2025587, upload-time = "2025-05-05T13:44:42.212Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/59/ead04d7658b977ffafcc3b382c54bc0231f03b5298343db9d4cc547edcde/grimp-3.9-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:69573ecc5cc84bb175e5aa5af2fe09dfb2f33a399c59c025f5f3d7d2f6f202fe", size = 2119002, upload-time = "2025-05-05T13:44:57.901Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/80/790e40d77703f846082d6a7f2f37ceec481e9ebe2763551d591083c84e4d/grimp-3.9-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:63e4bdb4382fb0afd52216e70a0e4da3f0500de8f9e40ee8d2b68a16a35c40c4", size = 1922590, upload-time = "2025-05-05T13:45:22.985Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/31/c490b387298540ef5fe1960df13879cab7a56b37af0f6b4a7d351e131c15/grimp-3.9-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1ddde011e9bb2fa1abb816373bd8898d1a486cf4f4b13dc46a11ddcd57406e1b", size = 2032993, upload-time = "2025-05-05T13:45:48.831Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/46/f02ebadff9ddddbf9f930b78bf3011d038380c059a4b3e0395ed38894c42/grimp-3.9-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:fa32eed6fb383ec4e54b4073e8ce75a5b151bb1f1d11be66be18aee04d3c9c4b", size = 2087494, upload-time = "2025-05-05T13:46:04.07Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/10/93c4d705126c3978b247a28f90510489f3f3cb477cbcf8a2a851cd18a0ae/grimp-3.9-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:e9cc09977f8688839e0c9873fd214e11c971f5df38bffb31d402d04803dfff92", size = 2069454, upload-time = "2025-05-05T13:46:20.056Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/ae/2afb75600941f6e09cfb91762704e85a420678f5de6b97e1e2a34ad53e60/grimp-3.9-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3a732b461db86403aa3c8154ffab85d1964c8c6adaa763803ce260abbc504b6f", size = 2092176, upload-time = "2025-05-05T13:46:35.619Z" },
+    { url = "https://files.pythonhosted.org/packages/51/de/c5b12fd191e39c9888a57be8d5a62892ee25fa5e61017d2b5835fbf28076/grimp-3.9-cp312-cp312-win32.whl", hash = "sha256:829d60b4c1c8c6bfb1c7348cf3e30b87f462a7d9316ced9d8265146a2153a0cd", size = 1494790, upload-time = "2025-05-05T13:47:01.642Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/31/3faf755b0cde71f1d3e7f6069d873586f9293930fadd3fca5f21c4ee35b8/grimp-3.9-cp312-cp312-win_amd64.whl", hash = "sha256:556ab4fbf943299fd90e467d481803b8e1a57d28c24af5867012559f51435ceb", size = 1598355, upload-time = "2025-05-05T13:46:53.461Z" },
+    { url = "https://files.pythonhosted.org/packages/47/51/469735ff46942adace8b5723d4d64e81c8c14ab429c49b75d0421cfde9ca/grimp-3.9-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:867b476677b1d2f89b6c9ca0d7c47b279fe9d0230087f621c6aba94331411690", size = 1783474, upload-time = "2025-05-05T13:45:42.151Z" },
+    { url = "https://files.pythonhosted.org/packages/11/8c/5647fb256216f7f7fd960a29ece28a736f859a80cc36793e103602b81828/grimp-3.9-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:faf5dd2cc7012a6024e743976674d55e66c6e556eaffd30e5843a88cc4623c16", size = 1709699, upload-time = "2025-05-05T13:45:34.622Z" },
+    { url = "https://files.pythonhosted.org/packages/26/40/b02a8226c80aa8130e583ae62e12563476d74b909944e80092fe73ba7f9b/grimp-3.9-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ff6c0de2e9cffed8f7ec1a9c80888f01017806cfb9acf9c3d8fc3137a629d51", size = 1857628, upload-time = "2025-05-05T13:44:15.268Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/a0/936147329ceb0398c848fdb80a96d32805afccdd382772a9cd553c91b5ed/grimp-3.9-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e38f92a650756f9b00198991cb60c5e3add9d68475425fb4fe0960d1586660ce", size = 1822818, upload-time = "2025-05-05T13:44:29.895Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/44/afdd11a6ece8f801a0af8653adb6bfaa64d2652da564e9f53137392f4e8c/grimp-3.9-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f4e1ef77c7841b15d9f5002c767da1060ec42cb477fa7ae33d7f9dffb4705dc0", size = 1948678, upload-time = "2025-05-05T13:45:14.026Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/44/2b9ba423068f88a3ea177e0c5633afb0154f677885647dd5b98737fa56f6/grimp-3.9-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:19a9bb0b05d1b0738920c604cdc544c9073df6edd71f31963054576647c8f897", size = 2025146, upload-time = "2025-05-05T13:44:44.044Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/7a/97fc0ecd9e91fe5bd18a01de7dc70c11fc8b06954ee83d82df306f14f644/grimp-3.9-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9f9d5e6182859900610f15704847897115707b28ca2c9b5c754ef3bef9adb485", size = 2118665, upload-time = "2025-05-05T13:44:59.385Z" },
+    { url = "https://files.pythonhosted.org/packages/37/c4/fa75d6ffc4b87d9d920ec912b24f6af61aff8b26b0ebb0d8f5d8b2a66cc4/grimp-3.9-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b4e63efe9c2df2e8efe98142fa754ef9140e3aa3ce942ef55f52bb7a177a0822", size = 1921756, upload-time = "2025-05-05T13:45:24.356Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/43/af4590755aab31ffa1227a6560f34bfa575d1dc606dff6d3dc15b7200ced/grimp-3.9-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e204b17675763a7091fd5e8b7c58c83c8383505d90b6aea6a5e0d5bb737cb856", size = 2032640, upload-time = "2025-05-05T13:45:50.304Z" },
+    { url = "https://files.pythonhosted.org/packages/06/d3/d627d9678f6074cc6bb614cfaa5208f352e32523cd26c61a282d6c07aadf/grimp-3.9-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:15d23a90d34d3f94e5437c7bc29ad1b82d059ed9b039c84d6ef20d83b826ca88", size = 2086606, upload-time = "2025-05-05T13:46:06.064Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/ae/8ffa1377d45bca60a25d2120258b5d9738eb23c25eb8bb702dcffbe222d3/grimp-3.9-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:04ed7f682ac07aee6e8cd99c1ea3d0ba26ea8167b71b4b79f05640982c1b1fa3", size = 2069295, upload-time = "2025-05-05T13:46:21.513Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/5a/f42bd065775927d47e7281f49bc85ccc639e97fba5842e6f348da8249acc/grimp-3.9-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:75f33e7b98652ce17fc9a5d0dce0bc5f4ba68fd73a15f10dd4cd1ea511bab0c1", size = 2091251, upload-time = "2025-05-05T13:46:37.529Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/87/d35867fe1791450fe802d0dc6e04bfc7601c289357910455912c8c0e7a4b/grimp-3.9-cp313-cp313-win32.whl", hash = "sha256:72921d8727a508b34393a330748db91fca62fa506b86f5a4c457f713a6468c15", size = 1494320, upload-time = "2025-05-05T13:47:03.099Z" },
+    { url = "https://files.pythonhosted.org/packages/95/c9/b25441ecb3b8a317d5cf5aee708a76adc7eb11e09ac2b7abf41a8e53effa/grimp-3.9-cp313-cp313-win_amd64.whl", hash = "sha256:cd65bc6d030d9d788a1794e01cdc3b4abce2971cc821e2e7dc02d09c45febc56", size = 1597627, upload-time = "2025-05-05T13:46:55.321Z" },
+    { url = "https://files.pythonhosted.org/packages/86/e0/a906b3f8136b761b955e4a8b4576b648c53ae096d3af50ee3a69849df202/grimp-3.9-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:057d4f7e4b9f62909406701d5bab773b39e1fd8591043c6b19dba3ab3b275625", size = 1855680, upload-time = "2025-05-05T13:44:16.812Z" },
+    { url = "https://files.pythonhosted.org/packages/14/ee/a9aa98f692feddee20463d2572d1ae7b7e274a2e66be9d8159e0c926fd8e/grimp-3.9-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0c660f1222b7c11d725d298bce09b85376b0084d5515b8364a7a70c0547a0992", size = 1822232, upload-time = "2025-05-05T13:44:31.726Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/00/78c1cb3a2792d00ef3ecf5e2b4df92dc8faac92c71755c05ba160b1beddf/grimp-3.9-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:78662f2c0ae4e7ff3eacff051e6b3110ed026135545a1825a53a858d4e966ebb", size = 2022814, upload-time = "2025-05-05T13:44:45.458Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/4f/2fde4f9b3cde995af35bef9b7496d8e76f661ac2b747caa69d5d62cc34a2/grimp-3.9-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1b57b20f51ce7765adaffd80b3a17a365b770a5d237a772a2a8a74cc19c186f2", size = 2118021, upload-time = "2025-05-05T13:45:00.758Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/e0/9a7a56bc8b2789cae9d4fa32a809e060ddeb681dec84d8344a48f9b10298/grimp-3.9-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:335511ad698e2a7d6e15dccdb843afc6ad4bde79f213479c799f67c98ce36002", size = 2031477, upload-time = "2025-05-05T13:45:51.908Z" },
+    { url = "https://files.pythonhosted.org/packages/89/fc/63bb580ccbd015a37ff3f0841f17957f14e3cfee096b94837e2f43f7c422/grimp-3.9-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:574c94895d4fcac2e5ae794636fe687fb80b9ca59fe3bb8458d7a64bc3b3ed9e", size = 2086058, upload-time = "2025-05-05T13:46:07.948Z" },
+    { url = "https://files.pythonhosted.org/packages/02/ad/8a90b922b52525279c3eb22d578b6b2580fafffed9e48ff788cceb34ef62/grimp-3.9-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:84c95f9df61ddaffd8f41a4181aa652f3fdf9932b26634cd8273d4dcd926321e", size = 2068266, upload-time = "2025-05-05T13:46:22.971Z" },
+    { url = "https://files.pythonhosted.org/packages/34/b2/056fd4642637cd4627d59ccf2be3f62dd41b8da98e49300eeecd8d4faaa5/grimp-3.9-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:9ddcbfd11d6e6b813121db1116f6b3c4930ab433a949522b5e80542c5da3d805", size = 2092059, upload-time = "2025-05-05T13:46:41.095Z" },
+]
+
+[[package]]
+name = "groovy"
+version = "0.1.2"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/c9/92/bb85bd6e80148a4d2e0c59f7c0c2891029f8fd510183afc7d8d2feeed9b6/greenlet-3.2.3.tar.gz", hash = "sha256:8b0dd8ae4c0d6f5e54ee55ba935eeb3d735a9b58a8a1e5b5cbab64e01a39f365", size = 185752, upload-time = "2025-06-05T16:16:09.955Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/f3/94/ad0d435f7c48debe960c53b8f60fb41c2026b1d0fa4a99a1cb17c3461e09/greenlet-3.2.3-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:25ad29caed5783d4bd7a85c9251c651696164622494c00802a139c00d639242d", size = 271992, upload-time = "2025-06-05T16:11:23.467Z" },
-    { url = "https://files.pythonhosted.org/packages/93/5d/7c27cf4d003d6e77749d299c7c8f5fd50b4f251647b5c2e97e1f20da0ab5/greenlet-3.2.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:88cd97bf37fe24a6710ec6a3a7799f3f81d9cd33317dcf565ff9950c83f55e0b", size = 638820, upload-time = "2025-06-05T16:38:52.882Z" },
-    { url = "https://files.pythonhosted.org/packages/c6/7e/807e1e9be07a125bb4c169144937910bf59b9d2f6d931578e57f0bce0ae2/greenlet-3.2.3-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:baeedccca94880d2f5666b4fa16fc20ef50ba1ee353ee2d7092b383a243b0b0d", size = 653046, upload-time = "2025-06-05T16:41:36.343Z" },
-    { url = "https://files.pythonhosted.org/packages/9d/ab/158c1a4ea1068bdbc78dba5a3de57e4c7aeb4e7fa034320ea94c688bfb61/greenlet-3.2.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:be52af4b6292baecfa0f397f3edb3c6092ce071b499dd6fe292c9ac9f2c8f264", size = 647701, upload-time = "2025-06-05T16:48:19.604Z" },
-    { url = "https://files.pythonhosted.org/packages/cc/0d/93729068259b550d6a0288da4ff72b86ed05626eaf1eb7c0d3466a2571de/greenlet-3.2.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0cc73378150b8b78b0c9fe2ce56e166695e67478550769536a6742dca3651688", size = 649747, upload-time = "2025-06-05T16:13:04.628Z" },
-    { url = "https://files.pythonhosted.org/packages/f6/f6/c82ac1851c60851302d8581680573245c8fc300253fc1ff741ae74a6c24d/greenlet-3.2.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:706d016a03e78df129f68c4c9b4c4f963f7d73534e48a24f5f5a7101ed13dbbb", size = 605461, upload-time = "2025-06-05T16:12:50.792Z" },
-    { url = "https://files.pythonhosted.org/packages/98/82/d022cf25ca39cf1200650fc58c52af32c90f80479c25d1cbf57980ec3065/greenlet-3.2.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:419e60f80709510c343c57b4bb5a339d8767bf9aef9b8ce43f4f143240f88b7c", size = 1121190, upload-time = "2025-06-05T16:36:48.59Z" },
-    { url = "https://files.pythonhosted.org/packages/f5/e1/25297f70717abe8104c20ecf7af0a5b82d2f5a980eb1ac79f65654799f9f/greenlet-3.2.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:93d48533fade144203816783373f27a97e4193177ebaaf0fc396db19e5d61163", size = 1149055, upload-time = "2025-06-05T16:12:40.457Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/8f/8f9e56c5e82eb2c26e8cde787962e66494312dc8cb261c460e1f3a9c88bc/greenlet-3.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:7454d37c740bb27bdeddfc3f358f26956a07d5220818ceb467a483197d84f849", size = 297817, upload-time = "2025-06-05T16:29:49.244Z" },
-    { url = "https://files.pythonhosted.org/packages/b1/cf/f5c0b23309070ae93de75c90d29300751a5aacefc0a3ed1b1d8edb28f08b/greenlet-3.2.3-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:500b8689aa9dd1ab26872a34084503aeddefcb438e2e7317b89b11eaea1901ad", size = 270732, upload-time = "2025-06-05T16:10:08.26Z" },
-    { url = "https://files.pythonhosted.org/packages/48/ae/91a957ba60482d3fecf9be49bc3948f341d706b52ddb9d83a70d42abd498/greenlet-3.2.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:a07d3472c2a93117af3b0136f246b2833fdc0b542d4a9799ae5f41c28323faef", size = 639033, upload-time = "2025-06-05T16:38:53.983Z" },
-    { url = "https://files.pythonhosted.org/packages/6f/df/20ffa66dd5a7a7beffa6451bdb7400d66251374ab40b99981478c69a67a8/greenlet-3.2.3-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:8704b3768d2f51150626962f4b9a9e4a17d2e37c8a8d9867bbd9fa4eb938d3b3", size = 652999, upload-time = "2025-06-05T16:41:37.89Z" },
-    { url = "https://files.pythonhosted.org/packages/51/b4/ebb2c8cb41e521f1d72bf0465f2f9a2fd803f674a88db228887e6847077e/greenlet-3.2.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:5035d77a27b7c62db6cf41cf786cfe2242644a7a337a0e155c80960598baab95", size = 647368, upload-time = "2025-06-05T16:48:21.467Z" },
-    { url = "https://files.pythonhosted.org/packages/8e/6a/1e1b5aa10dced4ae876a322155705257748108b7fd2e4fae3f2a091fe81a/greenlet-3.2.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2d8aa5423cd4a396792f6d4580f88bdc6efcb9205891c9d40d20f6e670992efb", size = 650037, upload-time = "2025-06-05T16:13:06.402Z" },
-    { url = "https://files.pythonhosted.org/packages/26/f2/ad51331a157c7015c675702e2d5230c243695c788f8f75feba1af32b3617/greenlet-3.2.3-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2c724620a101f8170065d7dded3f962a2aea7a7dae133a009cada42847e04a7b", size = 608402, upload-time = "2025-06-05T16:12:51.91Z" },
-    { url = "https://files.pythonhosted.org/packages/26/bc/862bd2083e6b3aff23300900a956f4ea9a4059de337f5c8734346b9b34fc/greenlet-3.2.3-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:873abe55f134c48e1f2a6f53f7d1419192a3d1a4e873bace00499a4e45ea6af0", size = 1119577, upload-time = "2025-06-05T16:36:49.787Z" },
-    { url = "https://files.pythonhosted.org/packages/86/94/1fc0cc068cfde885170e01de40a619b00eaa8f2916bf3541744730ffb4c3/greenlet-3.2.3-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:024571bbce5f2c1cfff08bf3fbaa43bbc7444f580ae13b0099e95d0e6e67ed36", size = 1147121, upload-time = "2025-06-05T16:12:42.527Z" },
-    { url = "https://files.pythonhosted.org/packages/27/1a/199f9587e8cb08a0658f9c30f3799244307614148ffe8b1e3aa22f324dea/greenlet-3.2.3-cp313-cp313-win_amd64.whl", hash = "sha256:5195fb1e75e592dd04ce79881c8a22becdfa3e6f500e7feb059b1e6fdd54d3e3", size = 297603, upload-time = "2025-06-05T16:20:12.651Z" },
-    { url = "https://files.pythonhosted.org/packages/d8/ca/accd7aa5280eb92b70ed9e8f7fd79dc50a2c21d8c73b9a0856f5b564e222/greenlet-3.2.3-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:3d04332dddb10b4a211b68111dabaee2e1a073663d117dc10247b5b1642bac86", size = 271479, upload-time = "2025-06-05T16:10:47.525Z" },
-    { url = "https://files.pythonhosted.org/packages/55/71/01ed9895d9eb49223280ecc98a557585edfa56b3d0e965b9fa9f7f06b6d9/greenlet-3.2.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8186162dffde068a465deab08fc72c767196895c39db26ab1c17c0b77a6d8b97", size = 683952, upload-time = "2025-06-05T16:38:55.125Z" },
-    { url = "https://files.pythonhosted.org/packages/ea/61/638c4bdf460c3c678a0a1ef4c200f347dff80719597e53b5edb2fb27ab54/greenlet-3.2.3-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f4bfbaa6096b1b7a200024784217defedf46a07c2eee1a498e94a1b5f8ec5728", size = 696917, upload-time = "2025-06-05T16:41:38.959Z" },
-    { url = "https://files.pythonhosted.org/packages/22/cc/0bd1a7eb759d1f3e3cc2d1bc0f0b487ad3cc9f34d74da4b80f226fde4ec3/greenlet-3.2.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:ed6cfa9200484d234d8394c70f5492f144b20d4533f69262d530a1a082f6ee9a", size = 692443, upload-time = "2025-06-05T16:48:23.113Z" },
-    { url = "https://files.pythonhosted.org/packages/67/10/b2a4b63d3f08362662e89c103f7fe28894a51ae0bc890fabf37d1d780e52/greenlet-3.2.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:02b0df6f63cd15012bed5401b47829cfd2e97052dc89da3cfaf2c779124eb892", size = 692995, upload-time = "2025-06-05T16:13:07.972Z" },
-    { url = "https://files.pythonhosted.org/packages/5a/c6/ad82f148a4e3ce9564056453a71529732baf5448ad53fc323e37efe34f66/greenlet-3.2.3-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:86c2d68e87107c1792e2e8d5399acec2487a4e993ab76c792408e59394d52141", size = 655320, upload-time = "2025-06-05T16:12:53.453Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/4f/aab73ecaa6b3086a4c89863d94cf26fa84cbff63f52ce9bc4342b3087a06/greenlet-3.2.3-cp314-cp314-win_amd64.whl", hash = "sha256:8c47aae8fbbfcf82cc13327ae802ba13c9c36753b67e760023fd116bc124a62a", size = 301236, upload-time = "2025-06-05T16:15:20.111Z" },
+sdist = { url = "https://files.pythonhosted.org/packages/52/36/bbdede67400277bef33d3ec0e6a31750da972c469f75966b4930c753218f/groovy-0.1.2.tar.gz", hash = "sha256:25c1dc09b3f9d7e292458aa762c6beb96ea037071bf5e917fc81fb78d2231083", size = 17325, upload-time = "2025-02-28T20:24:56.068Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/28/27/3d6dcadc8a3214d8522c1e7f6a19554e33659be44546d44a2f7572ac7d2a/groovy-0.1.2-py3-none-any.whl", hash = "sha256:7f7975bab18c729a257a8b1ae9dcd70b7cafb1720481beae47719af57c35fa64", size = 14090, upload-time = "2025-02-28T20:24:55.152Z" },
 ]
 
 [[package]]
 name = "grpcio"
-version = "1.73.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/8e/7b/ca3f561aeecf0c846d15e1b38921a60dffffd5d4113931198fbf455334ee/grpcio-1.73.0.tar.gz", hash = "sha256:3af4c30918a7f0d39de500d11255f8d9da4f30e94a2033e70fe2a720e184bd8e", size = 12786424, upload-time = "2025-06-09T10:08:23.365Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/9d/4d/e938f3a0e51a47f2ce7e55f12f19f316e7074770d56a7c2765e782ec76bc/grpcio-1.73.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:fb9d7c27089d9ba3746f18d2109eb530ef2a37452d2ff50f5a6696cd39167d3b", size = 5334911, upload-time = "2025-06-09T10:03:33.494Z" },
-    { url = "https://files.pythonhosted.org/packages/13/56/f09c72c43aa8d6f15a71f2c63ebdfac9cf9314363dea2598dc501d8370db/grpcio-1.73.0-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:128ba2ebdac41e41554d492b82c34586a90ebd0766f8ebd72160c0e3a57b9155", size = 10601460, upload-time = "2025-06-09T10:03:36.613Z" },
-    { url = "https://files.pythonhosted.org/packages/20/e3/85496edc81e41b3c44ebefffc7bce133bb531120066877df0f910eabfa19/grpcio-1.73.0-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:068ecc415f79408d57a7f146f54cdf9f0acb4b301a52a9e563973dc981e82f3d", size = 5759191, upload-time = "2025-06-09T10:03:39.838Z" },
-    { url = "https://files.pythonhosted.org/packages/88/cc/fef74270a6d29f35ad744bfd8e6c05183f35074ff34c655a2c80f3b422b2/grpcio-1.73.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ddc1cfb2240f84d35d559ade18f69dcd4257dbaa5ba0de1a565d903aaab2968", size = 6409961, upload-time = "2025-06-09T10:03:42.706Z" },
-    { url = "https://files.pythonhosted.org/packages/b0/e6/13cfea15e3b8f79c4ae7b676cb21fab70978b0fde1e1d28bb0e073291290/grpcio-1.73.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e53007f70d9783f53b41b4cf38ed39a8e348011437e4c287eee7dd1d39d54b2f", size = 6003948, upload-time = "2025-06-09T10:03:44.96Z" },
-    { url = "https://files.pythonhosted.org/packages/c2/ed/b1a36dad4cc0dbf1f83f6d7b58825fefd5cc9ff3a5036e46091335649473/grpcio-1.73.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:4dd8d8d092efede7d6f48d695ba2592046acd04ccf421436dd7ed52677a9ad29", size = 6103788, upload-time = "2025-06-09T10:03:48.053Z" },
-    { url = "https://files.pythonhosted.org/packages/e7/c8/d381433d3d46d10f6858126d2d2245ef329e30f3752ce4514c93b95ca6fc/grpcio-1.73.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:70176093d0a95b44d24baa9c034bb67bfe2b6b5f7ebc2836f4093c97010e17fd", size = 6749508, upload-time = "2025-06-09T10:03:51.185Z" },
-    { url = "https://files.pythonhosted.org/packages/87/0a/ff0c31dbd15e63b34320efafac647270aa88c31aa19ff01154a73dc7ce86/grpcio-1.73.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:085ebe876373ca095e24ced95c8f440495ed0b574c491f7f4f714ff794bbcd10", size = 6284342, upload-time = "2025-06-09T10:03:54.467Z" },
-    { url = "https://files.pythonhosted.org/packages/fd/73/f762430c0ba867403b9d6e463afe026bf019bd9206eee753785239719273/grpcio-1.73.0-cp312-cp312-win32.whl", hash = "sha256:cfc556c1d6aef02c727ec7d0016827a73bfe67193e47c546f7cadd3ee6bf1a60", size = 3669319, upload-time = "2025-06-09T10:03:56.751Z" },
-    { url = "https://files.pythonhosted.org/packages/10/8b/3411609376b2830449cf416f457ad9d2aacb7f562e1b90fdd8bdedf26d63/grpcio-1.73.0-cp312-cp312-win_amd64.whl", hash = "sha256:bbf45d59d090bf69f1e4e1594832aaf40aa84b31659af3c5e2c3f6a35202791a", size = 4335596, upload-time = "2025-06-09T10:03:59.866Z" },
-    { url = "https://files.pythonhosted.org/packages/60/da/6f3f7a78e5455c4cbe87c85063cc6da05d65d25264f9d4aed800ece46294/grpcio-1.73.0-cp313-cp313-linux_armv7l.whl", hash = "sha256:da1d677018ef423202aca6d73a8d3b2cb245699eb7f50eb5f74cae15a8e1f724", size = 5335867, upload-time = "2025-06-09T10:04:03.153Z" },
-    { url = "https://files.pythonhosted.org/packages/53/14/7d1f2526b98b9658d7be0bb163fd78d681587de6709d8b0c74b4b481b013/grpcio-1.73.0-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:36bf93f6a657f37c131d9dd2c391b867abf1426a86727c3575393e9e11dadb0d", size = 10595587, upload-time = "2025-06-09T10:04:05.694Z" },
-    { url = "https://files.pythonhosted.org/packages/02/24/a293c398ae44e741da1ed4b29638edbb002258797b07a783f65506165b4c/grpcio-1.73.0-cp313-cp313-manylinux_2_17_aarch64.whl", hash = "sha256:d84000367508ade791d90c2bafbd905574b5ced8056397027a77a215d601ba15", size = 5765793, upload-time = "2025-06-09T10:04:09.235Z" },
-    { url = "https://files.pythonhosted.org/packages/e1/24/d84dbd0b5bf36fb44922798d525a85cefa2ffee7b7110e61406e9750ed15/grpcio-1.73.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c98ba1d928a178ce33f3425ff823318040a2b7ef875d30a0073565e5ceb058d9", size = 6415494, upload-time = "2025-06-09T10:04:12.377Z" },
-    { url = "https://files.pythonhosted.org/packages/5e/85/c80dc65aed8e9dce3d54688864bac45331d9c7600985541f18bd5cb301d4/grpcio-1.73.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a73c72922dfd30b396a5f25bb3a4590195ee45ecde7ee068acb0892d2900cf07", size = 6007279, upload-time = "2025-06-09T10:04:14.878Z" },
-    { url = "https://files.pythonhosted.org/packages/37/fc/207c00a4c6fa303d26e2cbd62fbdb0582facdfd08f55500fd83bf6b0f8db/grpcio-1.73.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:10e8edc035724aba0346a432060fd192b42bd03675d083c01553cab071a28da5", size = 6105505, upload-time = "2025-06-09T10:04:17.39Z" },
-    { url = "https://files.pythonhosted.org/packages/72/35/8fe69af820667b87ebfcb24214e42a1d53da53cb39edd6b4f84f6b36da86/grpcio-1.73.0-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:f5cdc332b503c33b1643b12ea933582c7b081957c8bc2ea4cc4bc58054a09288", size = 6753792, upload-time = "2025-06-09T10:04:19.989Z" },
-    { url = "https://files.pythonhosted.org/packages/e2/d8/738c77c1e821e350da4a048849f695ff88a02b291f8c69db23908867aea6/grpcio-1.73.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:07ad7c57233c2109e4ac999cb9c2710c3b8e3f491a73b058b0ce431f31ed8145", size = 6287593, upload-time = "2025-06-09T10:04:22.878Z" },
-    { url = "https://files.pythonhosted.org/packages/09/ec/8498eabc018fa39ae8efe5e47e3f4c1bc9ed6281056713871895dc998807/grpcio-1.73.0-cp313-cp313-win32.whl", hash = "sha256:0eb5df4f41ea10bda99a802b2a292d85be28958ede2a50f2beb8c7fc9a738419", size = 3668637, upload-time = "2025-06-09T10:04:25.787Z" },
-    { url = "https://files.pythonhosted.org/packages/d7/35/347db7d2e7674b621afd21b12022e7f48c7b0861b5577134b4e939536141/grpcio-1.73.0-cp313-cp313-win_amd64.whl", hash = "sha256:38cf518cc54cd0c47c9539cefa8888549fcc067db0b0c66a46535ca8032020c4", size = 4335872, upload-time = "2025-06-09T10:04:29.032Z" },
+version = "1.74.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/38/b4/35feb8f7cab7239c5b94bd2db71abb3d6adb5f335ad8f131abb6060840b6/grpcio-1.74.0.tar.gz", hash = "sha256:80d1f4fbb35b0742d3e3d3bb654b7381cd5f015f8497279a1e9c21ba623e01b1", size = 12756048, upload-time = "2025-07-24T18:54:23.039Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/4c/5d/e504d5d5c4469823504f65687d6c8fb97b7f7bf0b34873b7598f1df24630/grpcio-1.74.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:8533e6e9c5bd630ca98062e3a1326249e6ada07d05acf191a77bc33f8948f3d8", size = 5445551, upload-time = "2025-07-24T18:53:23.641Z" },
+    { url = "https://files.pythonhosted.org/packages/43/01/730e37056f96f2f6ce9f17999af1556df62ee8dab7fa48bceeaab5fd3008/grpcio-1.74.0-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:2918948864fec2a11721d91568effffbe0a02b23ecd57f281391d986847982f6", size = 10979810, upload-time = "2025-07-24T18:53:25.349Z" },
+    { url = "https://files.pythonhosted.org/packages/79/3d/09fd100473ea5c47083889ca47ffd356576173ec134312f6aa0e13111dee/grpcio-1.74.0-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:60d2d48b0580e70d2e1954d0d19fa3c2e60dd7cbed826aca104fff518310d1c5", size = 5941946, upload-time = "2025-07-24T18:53:27.387Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/99/12d2cca0a63c874c6d3d195629dcd85cdf5d6f98a30d8db44271f8a97b93/grpcio-1.74.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3601274bc0523f6dc07666c0e01682c94472402ac2fd1226fd96e079863bfa49", size = 6621763, upload-time = "2025-07-24T18:53:29.193Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/2c/930b0e7a2f1029bbc193443c7bc4dc2a46fedb0203c8793dcd97081f1520/grpcio-1.74.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:176d60a5168d7948539def20b2a3adcce67d72454d9ae05969a2e73f3a0feee7", size = 6180664, upload-time = "2025-07-24T18:53:30.823Z" },
+    { url = "https://files.pythonhosted.org/packages/db/d5/ff8a2442180ad0867717e670f5ec42bfd8d38b92158ad6bcd864e6d4b1ed/grpcio-1.74.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:e759f9e8bc908aaae0412642afe5416c9f983a80499448fcc7fab8692ae044c3", size = 6301083, upload-time = "2025-07-24T18:53:32.454Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/ba/b361d390451a37ca118e4ec7dccec690422e05bc85fba2ec72b06cefec9f/grpcio-1.74.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:9e7c4389771855a92934b2846bd807fc25a3dfa820fd912fe6bd8136026b2707", size = 6994132, upload-time = "2025-07-24T18:53:34.506Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/0c/3a5fa47d2437a44ced74141795ac0251bbddeae74bf81df3447edd767d27/grpcio-1.74.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:cce634b10aeab37010449124814b05a62fb5f18928ca878f1bf4750d1f0c815b", size = 6489616, upload-time = "2025-07-24T18:53:36.217Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/95/ab64703b436d99dc5217228babc76047d60e9ad14df129e307b5fec81fd0/grpcio-1.74.0-cp312-cp312-win32.whl", hash = "sha256:885912559974df35d92219e2dc98f51a16a48395f37b92865ad45186f294096c", size = 3807083, upload-time = "2025-07-24T18:53:37.911Z" },
+    { url = "https://files.pythonhosted.org/packages/84/59/900aa2445891fc47a33f7d2f76e00ca5d6ae6584b20d19af9c06fa09bf9a/grpcio-1.74.0-cp312-cp312-win_amd64.whl", hash = "sha256:42f8fee287427b94be63d916c90399ed310ed10aadbf9e2e5538b3e497d269bc", size = 4490123, upload-time = "2025-07-24T18:53:39.528Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/d8/1004a5f468715221450e66b051c839c2ce9a985aa3ee427422061fcbb6aa/grpcio-1.74.0-cp313-cp313-linux_armv7l.whl", hash = "sha256:2bc2d7d8d184e2362b53905cb1708c84cb16354771c04b490485fa07ce3a1d89", size = 5449488, upload-time = "2025-07-24T18:53:41.174Z" },
+    { url = "https://files.pythonhosted.org/packages/94/0e/33731a03f63740d7743dced423846c831d8e6da808fcd02821a4416df7fa/grpcio-1.74.0-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:c14e803037e572c177ba54a3e090d6eb12efd795d49327c5ee2b3bddb836bf01", size = 10974059, upload-time = "2025-07-24T18:53:43.066Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/c6/3d2c14d87771a421205bdca991467cfe473ee4c6a1231c1ede5248c62ab8/grpcio-1.74.0-cp313-cp313-manylinux_2_17_aarch64.whl", hash = "sha256:f6ec94f0e50eb8fa1744a731088b966427575e40c2944a980049798b127a687e", size = 5945647, upload-time = "2025-07-24T18:53:45.269Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/83/5a354c8aaff58594eef7fffebae41a0f8995a6258bbc6809b800c33d4c13/grpcio-1.74.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:566b9395b90cc3d0d0c6404bc8572c7c18786ede549cdb540ae27b58afe0fb91", size = 6626101, upload-time = "2025-07-24T18:53:47.015Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/ca/4fdc7bf59bf6994aa45cbd4ef1055cd65e2884de6113dbd49f75498ddb08/grpcio-1.74.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e1ea6176d7dfd5b941ea01c2ec34de9531ba494d541fe2057c904e601879f249", size = 6182562, upload-time = "2025-07-24T18:53:48.967Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/48/2869e5b2c1922583686f7ae674937986807c2f676d08be70d0a541316270/grpcio-1.74.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:64229c1e9cea079420527fa8ac45d80fc1e8d3f94deaa35643c381fa8d98f362", size = 6303425, upload-time = "2025-07-24T18:53:50.847Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/0e/bac93147b9a164f759497bc6913e74af1cb632c733c7af62c0336782bd38/grpcio-1.74.0-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:0f87bddd6e27fc776aacf7ebfec367b6d49cad0455123951e4488ea99d9b9b8f", size = 6996533, upload-time = "2025-07-24T18:53:52.747Z" },
+    { url = "https://files.pythonhosted.org/packages/84/35/9f6b2503c1fd86d068b46818bbd7329db26a87cdd8c01e0d1a9abea1104c/grpcio-1.74.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:3b03d8f2a07f0fea8c8f74deb59f8352b770e3900d143b3d1475effcb08eec20", size = 6491489, upload-time = "2025-07-24T18:53:55.06Z" },
+    { url = "https://files.pythonhosted.org/packages/75/33/a04e99be2a82c4cbc4039eb3a76f6c3632932b9d5d295221389d10ac9ca7/grpcio-1.74.0-cp313-cp313-win32.whl", hash = "sha256:b6a73b2ba83e663b2480a90b82fdae6a7aa6427f62bf43b29912c0cfd1aa2bfa", size = 3805811, upload-time = "2025-07-24T18:53:56.798Z" },
+    { url = "https://files.pythonhosted.org/packages/34/80/de3eb55eb581815342d097214bed4c59e806b05f1b3110df03b2280d6dfd/grpcio-1.74.0-cp313-cp313-win_amd64.whl", hash = "sha256:fd3c71aeee838299c5887230b8a1822795325ddfea635edd82954c1eaa831e24", size = 4489214, upload-time = "2025-07-24T18:53:59.771Z" },
 ]
 
 [[package]]
@@ -1380,7 +1918,7 @@ name = "gunicorn"
 version = "23.0.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "packaging" },
+    { name = "packaging", marker = "sys_platform != 'win32'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/34/72/9614c465dc206155d93eff0ca20d42e1e35afc533971379482de953521a4/gunicorn-23.0.0.tar.gz", hash = "sha256:f014447a0101dc57e294f6c18ca6b40227a4c90e9bdb586042628030cba004ec", size = 375031, upload-time = "2024-08-10T20:25:27.378Z" }
 wheels = [
@@ -1396,27 +1934,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" },
 ]
 
-[[package]]
-name = "h5py"
-version = "3.14.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "numpy" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/5d/57/dfb3c5c3f1bf5f5ef2e59a22dec4ff1f3d7408b55bfcefcfb0ea69ef21c6/h5py-3.14.0.tar.gz", hash = "sha256:2372116b2e0d5d3e5e705b7f663f7c8d96fa79a4052d250484ef91d24d6a08f4", size = 424323, upload-time = "2025-06-06T14:06:15.01Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/3e/77/8f651053c1843391e38a189ccf50df7e261ef8cd8bfd8baba0cbe694f7c3/h5py-3.14.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:e0045115d83272090b0717c555a31398c2c089b87d212ceba800d3dc5d952e23", size = 3312740, upload-time = "2025-06-06T14:05:01.193Z" },
-    { url = "https://files.pythonhosted.org/packages/ff/10/20436a6cf419b31124e59fefc78d74cb061ccb22213226a583928a65d715/h5py-3.14.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6da62509b7e1d71a7d110478aa25d245dd32c8d9a1daee9d2a42dba8717b047a", size = 2829207, upload-time = "2025-06-06T14:05:05.061Z" },
-    { url = "https://files.pythonhosted.org/packages/3f/19/c8bfe8543bfdd7ccfafd46d8cfd96fce53d6c33e9c7921f375530ee1d39a/h5py-3.14.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:554ef0ced3571366d4d383427c00c966c360e178b5fb5ee5bb31a435c424db0c", size = 4708455, upload-time = "2025-06-06T14:05:11.528Z" },
-    { url = "https://files.pythonhosted.org/packages/86/f9/f00de11c82c88bfc1ef22633557bfba9e271e0cb3189ad704183fc4a2644/h5py-3.14.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0cbd41f4e3761f150aa5b662df991868ca533872c95467216f2bec5fcad84882", size = 4929422, upload-time = "2025-06-06T14:05:18.399Z" },
-    { url = "https://files.pythonhosted.org/packages/7a/6d/6426d5d456f593c94b96fa942a9b3988ce4d65ebaf57d7273e452a7222e8/h5py-3.14.0-cp312-cp312-win_amd64.whl", hash = "sha256:bf4897d67e613ecf5bdfbdab39a1158a64df105827da70ea1d90243d796d367f", size = 2862845, upload-time = "2025-06-06T14:05:23.699Z" },
-    { url = "https://files.pythonhosted.org/packages/6c/c2/7efe82d09ca10afd77cd7c286e42342d520c049a8c43650194928bcc635c/h5py-3.14.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:aa4b7bbce683379b7bf80aaba68e17e23396100336a8d500206520052be2f812", size = 3289245, upload-time = "2025-06-06T14:05:28.24Z" },
-    { url = "https://files.pythonhosted.org/packages/4f/31/f570fab1239b0d9441024b92b6ad03bb414ffa69101a985e4c83d37608bd/h5py-3.14.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ef9603a501a04fcd0ba28dd8f0995303d26a77a980a1f9474b3417543d4c6174", size = 2807335, upload-time = "2025-06-06T14:05:31.997Z" },
-    { url = "https://files.pythonhosted.org/packages/0d/ce/3a21d87896bc7e3e9255e0ad5583ae31ae9e6b4b00e0bcb2a67e2b6acdbc/h5py-3.14.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8cbaf6910fa3983c46172666b0b8da7b7bd90d764399ca983236f2400436eeb", size = 4700675, upload-time = "2025-06-06T14:05:37.38Z" },
-    { url = "https://files.pythonhosted.org/packages/e7/ec/86f59025306dcc6deee5fda54d980d077075b8d9889aac80f158bd585f1b/h5py-3.14.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d90e6445ab7c146d7f7981b11895d70bc1dd91278a4f9f9028bc0c95e4a53f13", size = 4921632, upload-time = "2025-06-06T14:05:43.464Z" },
-    { url = "https://files.pythonhosted.org/packages/3f/6d/0084ed0b78d4fd3e7530c32491f2884140d9b06365dac8a08de726421d4a/h5py-3.14.0-cp313-cp313-win_amd64.whl", hash = "sha256:ae18e3de237a7a830adb76aaa68ad438d85fe6e19e0d99944a3ce46b772c69b3", size = 2852929, upload-time = "2025-06-06T14:05:47.659Z" },
-]
-
 [[package]]
 name = "hatchling"
 version = "1.27.0"
@@ -1434,17 +1951,17 @@ wheels = [
 
 [[package]]
 name = "hf-xet"
-version = "1.1.5"
+version = "1.1.8"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/ed/d4/7685999e85945ed0d7f0762b686ae7015035390de1161dcea9d5276c134c/hf_xet-1.1.5.tar.gz", hash = "sha256:69ebbcfd9ec44fdc2af73441619eeb06b94ee34511bbcf57cd423820090f5694", size = 495969, upload-time = "2025-06-20T21:48:38.007Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/7a/49/91010b59debc7c862a5fd426d343134dd9a68778dbe570234b6495a4e204/hf_xet-1.1.8.tar.gz", hash = "sha256:62a0043e441753bbc446dcb5a3fe40a4d03f5fb9f13589ef1df9ab19252beb53", size = 484065, upload-time = "2025-08-18T22:01:03.584Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/00/89/a1119eebe2836cb25758e7661d6410d3eae982e2b5e974bcc4d250be9012/hf_xet-1.1.5-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:f52c2fa3635b8c37c7764d8796dfa72706cc4eded19d638331161e82b0792e23", size = 2687929, upload-time = "2025-06-20T21:48:32.284Z" },
-    { url = "https://files.pythonhosted.org/packages/de/5f/2c78e28f309396e71ec8e4e9304a6483dcbc36172b5cea8f291994163425/hf_xet-1.1.5-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:9fa6e3ee5d61912c4a113e0708eaaef987047616465ac7aa30f7121a48fc1af8", size = 2556338, upload-time = "2025-06-20T21:48:30.079Z" },
-    { url = "https://files.pythonhosted.org/packages/6d/2f/6cad7b5fe86b7652579346cb7f85156c11761df26435651cbba89376cd2c/hf_xet-1.1.5-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc874b5c843e642f45fd85cda1ce599e123308ad2901ead23d3510a47ff506d1", size = 3102894, upload-time = "2025-06-20T21:48:28.114Z" },
-    { url = "https://files.pythonhosted.org/packages/d0/54/0fcf2b619720a26fbb6cc941e89f2472a522cd963a776c089b189559447f/hf_xet-1.1.5-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:dbba1660e5d810bd0ea77c511a99e9242d920790d0e63c0e4673ed36c4022d18", size = 3002134, upload-time = "2025-06-20T21:48:25.906Z" },
-    { url = "https://files.pythonhosted.org/packages/f3/92/1d351ac6cef7c4ba8c85744d37ffbfac2d53d0a6c04d2cabeba614640a78/hf_xet-1.1.5-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:ab34c4c3104133c495785d5d8bba3b1efc99de52c02e759cf711a91fd39d3a14", size = 3171009, upload-time = "2025-06-20T21:48:33.987Z" },
-    { url = "https://files.pythonhosted.org/packages/c9/65/4b2ddb0e3e983f2508528eb4501288ae2f84963586fbdfae596836d5e57a/hf_xet-1.1.5-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:83088ecea236d5113de478acb2339f92c95b4fb0462acaa30621fac02f5a534a", size = 3279245, upload-time = "2025-06-20T21:48:36.051Z" },
-    { url = "https://files.pythonhosted.org/packages/f0/55/ef77a85ee443ae05a9e9cba1c9f0dd9241eb42da2aeba1dc50f51154c81a/hf_xet-1.1.5-cp37-abi3-win_amd64.whl", hash = "sha256:73e167d9807d166596b4b2f0b585c6d5bd84a26dea32843665a8b58f6edba245", size = 2738931, upload-time = "2025-06-20T21:48:39.482Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/91/5814db3a0d4a65fb6a87f0931ae28073b87f06307701fe66e7c41513bfb4/hf_xet-1.1.8-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:3d5f82e533fc51c7daad0f9b655d9c7811b5308e5890236828bd1dd3ed8fea74", size = 2752357, upload-time = "2025-08-18T22:00:58.777Z" },
+    { url = "https://files.pythonhosted.org/packages/70/72/ce898516e97341a7a9d450609e130e108643389110261eaee6deb1ba8545/hf_xet-1.1.8-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:8e2dba5896bca3ab61d0bef4f01a1647004de59640701b37e37eaa57087bbd9d", size = 2613142, upload-time = "2025-08-18T22:00:57.252Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/d6/13af5f916cef795ac2b5e4cc1de31f2e0e375f4475d50799915835f301c2/hf_xet-1.1.8-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bfe5700bc729be3d33d4e9a9b5cc17a951bf8c7ada7ba0c9198a6ab2053b7453", size = 3175859, upload-time = "2025-08-18T22:00:55.978Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/ed/34a193c9d1d72b7c3901b3b5153b1be9b2736b832692e1c3f167af537102/hf_xet-1.1.8-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:09e86514c3c4284ed8a57d6b0f3d089f9836a0af0a1ceb3c9dd664f1f3eaefef", size = 3074178, upload-time = "2025-08-18T22:00:54.147Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/1b/de6817b4bf65385280252dff5c9cceeedfbcb27ddb93923639323c1034a4/hf_xet-1.1.8-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:4a9b99ab721d385b83f4fc8ee4e0366b0b59dce03b5888a86029cc0ca634efbf", size = 3238122, upload-time = "2025-08-18T22:01:00.546Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/13/874c85c7ed519ec101deb654f06703d9e5e68d34416730f64c4755ada36a/hf_xet-1.1.8-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:25b9d43333bbef39aeae1616789ec329c21401a7fe30969d538791076227b591", size = 3344325, upload-time = "2025-08-18T22:01:02.013Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/d3/0aaf279f4f3dea58e99401b92c31c0f752924ba0e6c7d7bb07b1dbd7f35e/hf_xet-1.1.8-cp37-abi3-win_amd64.whl", hash = "sha256:4171f31d87b13da4af1ed86c98cf763292e4720c088b4957cf9d564f92904ca9", size = 2801689, upload-time = "2025-08-18T22:01:04.81Z" },
 ]
 
 [[package]]
@@ -1499,7 +2016,7 @@ wheels = [
 
 [[package]]
 name = "huggingface-hub"
-version = "0.33.0"
+version = "0.34.4"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "filelock" },
@@ -1511,14 +2028,9 @@ dependencies = [
     { name = "tqdm" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/91/8a/1362d565fefabaa4185cf3ae842a98dbc5b35146f5694f7080f043a6952f/huggingface_hub-0.33.0.tar.gz", hash = "sha256:aa31f70d29439d00ff7a33837c03f1f9dd83971ce4e29ad664d63ffb17d3bb97", size = 426179, upload-time = "2025-06-11T17:08:07.913Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/45/c9/bdbe19339f76d12985bc03572f330a01a93c04dffecaaea3061bdd7fb892/huggingface_hub-0.34.4.tar.gz", hash = "sha256:a4228daa6fb001be3f4f4bdaf9a0db00e1739235702848df00885c9b5742c85c", size = 459768, upload-time = "2025-08-08T09:14:52.365Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/33/fb/53587a89fbc00799e4179796f51b3ad713c5de6bb680b2becb6d37c94649/huggingface_hub-0.33.0-py3-none-any.whl", hash = "sha256:e8668875b40c68f9929150d99727d39e5ebb8a05a98e4191b908dc7ded9074b3", size = 514799, upload-time = "2025-06-11T17:08:05.757Z" },
-]
-
-[package.optional-dependencies]
-hf-xet = [
-    { name = "hf-xet" },
+    { url = "https://files.pythonhosted.org/packages/39/7b/bb06b061991107cd8783f300adff3e7b7f284e330fd82f507f2a1417b11d/huggingface_hub-0.34.4-py3-none-any.whl", hash = "sha256:9b365d781739c93ff90c359844221beef048403f1bc1f1c123c191257c3c890a", size = 561452, upload-time = "2025-08-08T09:14:50.159Z" },
 ]
 
 [[package]]
@@ -1537,11 +2049,11 @@ wheels = [
 
 [[package]]
 name = "identify"
-version = "2.6.12"
+version = "2.6.13"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/a2/88/d193a27416618628a5eea64e3223acd800b40749a96ffb322a9b55a49ed1/identify-2.6.12.tar.gz", hash = "sha256:d8de45749f1efb108badef65ee8386f0f7bb19a7f26185f74de6367bffbaf0e6", size = 99254, upload-time = "2025-05-23T20:37:53.3Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/82/ca/ffbabe3635bb839aa36b3a893c91a9b0d368cb4d8073e03a12896970af82/identify-2.6.13.tar.gz", hash = "sha256:da8d6c828e773620e13bfa86ea601c5a5310ba4bcd65edf378198b56a1f9fb32", size = 99243, upload-time = "2025-08-09T19:35:00.6Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/7a/cd/18f8da995b658420625f7ef13f037be53ae04ec5ad33f9b718240dcfd48c/identify-2.6.12-py2.py3-none-any.whl", hash = "sha256:ad9672d5a72e0d2ff7c5c8809b62dfa60458626352fb0eb7b55e69bdc45334a2", size = 99145, upload-time = "2025-05-23T20:37:51.495Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/ce/461b60a3ee109518c055953729bf9ed089a04db895d47e95444071dcdef2/identify-2.6.13-py2.py3-none-any.whl", hash = "sha256:60381139b3ae39447482ecc406944190f690d4a2997f2584062089848361b33b", size = 99153, upload-time = "2025-08-09T19:34:59.1Z" },
 ]
 
 [[package]]
@@ -1553,47 +2065,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442, upload-time = "2024-09-15T18:07:37.964Z" },
 ]
 
-[[package]]
-name = "ijson"
-version = "3.4.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/a3/4f/1cfeada63f5fce87536651268ddf5cca79b8b4bbb457aee4e45777964a0a/ijson-3.4.0.tar.gz", hash = "sha256:5f74dcbad9d592c428d3ca3957f7115a42689ee7ee941458860900236ae9bb13", size = 65782, upload-time = "2025-05-08T02:37:20.135Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/f8/ec/317ee5b2d13e50448833ead3aa906659a32b376191f6abc2a7c6112d2b27/ijson-3.4.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:956b148f88259a80a9027ffbe2d91705fae0c004fbfba3e5a24028fbe72311a9", size = 87212, upload-time = "2025-05-08T02:35:51.835Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/43/b06c96ced30cacecc5d518f89b0fd1c98c294a30ff88848b70ed7b7f72a1/ijson-3.4.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:06b89960f5c721106394c7fba5760b3f67c515b8eb7d80f612388f5eca2f4621", size = 59175, upload-time = "2025-05-08T02:35:52.988Z" },
-    { url = "https://files.pythonhosted.org/packages/e9/df/b4aeafb7ecde463130840ee9be36130823ec94a00525049bf700883378b8/ijson-3.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9a0bb591cf250dd7e9dfab69d634745a7f3272d31cfe879f9156e0a081fd97ee", size = 59011, upload-time = "2025-05-08T02:35:54.394Z" },
-    { url = "https://files.pythonhosted.org/packages/e3/7c/a80b8e361641609507f62022089626d4b8067f0826f51e1c09e4ba86eba8/ijson-3.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:72e92de999977f4c6b660ffcf2b8d59604ccd531edcbfde05b642baf283e0de8", size = 146094, upload-time = "2025-05-08T02:35:55.601Z" },
-    { url = "https://files.pythonhosted.org/packages/01/44/fa416347b9a802e3646c6ff377fc3278bd7d6106e17beb339514b6a3184e/ijson-3.4.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9e9602157a5b869d44b6896e64f502c712a312fcde044c2e586fccb85d3e316e", size = 137903, upload-time = "2025-05-08T02:35:56.814Z" },
-    { url = "https://files.pythonhosted.org/packages/24/c6/41a9ad4d42df50ff6e70fdce79b034f09b914802737ebbdc141153d8d791/ijson-3.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1e83660edb931a425b7ff662eb49db1f10d30ca6d4d350e5630edbed098bc01", size = 148339, upload-time = "2025-05-08T02:35:58.595Z" },
-    { url = "https://files.pythonhosted.org/packages/5f/6f/7d01efda415b8502dce67e067ed9e8a124f53e763002c02207e542e1a2f1/ijson-3.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:49bf8eac1c7b7913073865a859c215488461f7591b4fa6a33c14b51cb73659d0", size = 149383, upload-time = "2025-05-08T02:36:00.197Z" },
-    { url = "https://files.pythonhosted.org/packages/95/6c/0d67024b9ecb57916c5e5ab0350251c9fe2f86dc9c8ca2b605c194bdad6a/ijson-3.4.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:160b09273cb42019f1811469508b0a057d19f26434d44752bde6f281da6d3f32", size = 141580, upload-time = "2025-05-08T02:36:01.998Z" },
-    { url = "https://files.pythonhosted.org/packages/06/43/e10edcc1c6a3b619294de835e7678bfb3a1b8a75955f3689fd66a1e9e7b4/ijson-3.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2019ff4e6f354aa00c76c8591bd450899111c61f2354ad55cc127e2ce2492c44", size = 150280, upload-time = "2025-05-08T02:36:03.926Z" },
-    { url = "https://files.pythonhosted.org/packages/07/84/1cbeee8e8190a1ebe6926569a92cf1fa80ddb380c129beb6f86559e1bb24/ijson-3.4.0-cp312-cp312-win32.whl", hash = "sha256:931c007bf6bb8330705429989b2deed6838c22b63358a330bf362b6e458ba0bf", size = 51512, upload-time = "2025-05-08T02:36:05.595Z" },
-    { url = "https://files.pythonhosted.org/packages/66/13/530802bc391c95be6fe9f96e9aa427d94067e7c0b7da7a9092344dc44c4b/ijson-3.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:71523f2b64cb856a820223e94d23e88369f193017ecc789bb4de198cc9d349eb", size = 54081, upload-time = "2025-05-08T02:36:07.099Z" },
-    { url = "https://files.pythonhosted.org/packages/77/b3/b1d2eb2745e5204ec7a25365a6deb7868576214feb5e109bce368fb692c9/ijson-3.4.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e8d96f88d75196a61c9d9443de2b72c2d4a7ba9456ff117b57ae3bba23a54256", size = 87216, upload-time = "2025-05-08T02:36:08.414Z" },
-    { url = "https://files.pythonhosted.org/packages/b1/cd/cd6d340087617f8cc9bedbb21d974542fe2f160ed0126b8288d3499a469b/ijson-3.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c45906ce2c1d3b62f15645476fc3a6ca279549127f01662a39ca5ed334a00cf9", size = 59170, upload-time = "2025-05-08T02:36:09.604Z" },
-    { url = "https://files.pythonhosted.org/packages/3e/4d/32d3a9903b488d3306e3c8288f6ee4217d2eea82728261db03a1045eb5d1/ijson-3.4.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:4ab4bc2119b35c4363ea49f29563612237cae9413d2fbe54b223be098b97bc9e", size = 59013, upload-time = "2025-05-08T02:36:10.696Z" },
-    { url = "https://files.pythonhosted.org/packages/d5/c8/db15465ab4b0b477cee5964c8bfc94bf8c45af8e27a23e1ad78d1926e587/ijson-3.4.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:97b0a9b5a15e61dfb1f14921ea4e0dba39f3a650df6d8f444ddbc2b19b479ff1", size = 146564, upload-time = "2025-05-08T02:36:11.916Z" },
-    { url = "https://files.pythonhosted.org/packages/c4/d8/0755545bc122473a9a434ab90e0f378780e603d75495b1ca3872de757873/ijson-3.4.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e3047bb994dabedf11de11076ed1147a307924b6e5e2df6784fb2599c4ad8c60", size = 137917, upload-time = "2025-05-08T02:36:13.532Z" },
-    { url = "https://files.pythonhosted.org/packages/d0/c6/aeb89c8939ebe3f534af26c8c88000c5e870dbb6ae33644c21a4531f87d2/ijson-3.4.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:68c83161b052e9f5dc8191acbc862bb1e63f8a35344cb5cd0db1afd3afd487a6", size = 148897, upload-time = "2025-05-08T02:36:14.813Z" },
-    { url = "https://files.pythonhosted.org/packages/be/0e/7ef6e9b372106f2682a4a32b3c65bf86bb471a1670e4dac242faee4a7d3f/ijson-3.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1eebd9b6c20eb1dffde0ae1f0fbb4aeacec2eb7b89adb5c7c0449fc9fd742760", size = 149711, upload-time = "2025-05-08T02:36:16.476Z" },
-    { url = "https://files.pythonhosted.org/packages/d1/5d/9841c3ed75bcdabf19b3202de5f862a9c9c86ce5c7c9d95fa32347fdbf5f/ijson-3.4.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:13fb6d5c35192c541421f3ee81239d91fc15a8d8f26c869250f941f4b346a86c", size = 141691, upload-time = "2025-05-08T02:36:18.044Z" },
-    { url = "https://files.pythonhosted.org/packages/d5/d2/ce74e17218dba292e9be10a44ed0c75439f7958cdd263adb0b5b92d012d5/ijson-3.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:28b7196ff7b37c4897c547a28fa4876919696739fc91c1f347651c9736877c69", size = 150738, upload-time = "2025-05-08T02:36:19.483Z" },
-    { url = "https://files.pythonhosted.org/packages/4e/43/dcc480f94453b1075c9911d4755b823f3ace275761bb37b40139f22109ca/ijson-3.4.0-cp313-cp313-win32.whl", hash = "sha256:3c2691d2da42629522140f77b99587d6f5010440d58d36616f33bc7bdc830cc3", size = 51512, upload-time = "2025-05-08T02:36:20.99Z" },
-    { url = "https://files.pythonhosted.org/packages/35/dd/d8c5f15efd85ba51e6e11451ebe23d779361a9ec0d192064c2a8c3cdfcb8/ijson-3.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:c4554718c275a044c47eb3874f78f2c939f300215d9031e785a6711cc51b83fc", size = 54074, upload-time = "2025-05-08T02:36:22.075Z" },
-    { url = "https://files.pythonhosted.org/packages/79/73/24ad8cd106203419c4d22bed627e02e281d66b83e91bc206a371893d0486/ijson-3.4.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:915a65e3f3c0eee2ea937bc62aaedb6c14cc1e8f0bb9f3f4fb5a9e2bbfa4b480", size = 91694, upload-time = "2025-05-08T02:36:23.289Z" },
-    { url = "https://files.pythonhosted.org/packages/17/2d/f7f680984bcb7324a46a4c2df3bd73cf70faef0acfeb85a3f811abdfd590/ijson-3.4.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:afbe9748707684b6c5adc295c4fdcf27765b300aec4d484e14a13dca4e5c0afa", size = 61390, upload-time = "2025-05-08T02:36:24.42Z" },
-    { url = "https://files.pythonhosted.org/packages/09/a1/f3ca7bab86f95bdb82494739e71d271410dfefce4590785d511669127145/ijson-3.4.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:d823f8f321b4d8d5fa020d0a84f089fec5d52b7c0762430476d9f8bf95bbc1a9", size = 61140, upload-time = "2025-05-08T02:36:26.708Z" },
-    { url = "https://files.pythonhosted.org/packages/51/79/dd340df3d4fc7771c95df29997956b92ed0570fe7b616d1792fea9ad93f2/ijson-3.4.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b8a0a2c54f3becf76881188beefd98b484b1d3bd005769a740d5b433b089fa23", size = 214739, upload-time = "2025-05-08T02:36:27.973Z" },
-    { url = "https://files.pythonhosted.org/packages/59/f0/85380b7f51d1f5fb7065d76a7b623e02feca920cc678d329b2eccc0011e0/ijson-3.4.0-cp313-cp313t-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ced19a83ab09afa16257a0b15bc1aa888dbc555cb754be09d375c7f8d41051f2", size = 198338, upload-time = "2025-05-08T02:36:29.496Z" },
-    { url = "https://files.pythonhosted.org/packages/a5/cd/313264cf2ec42e0f01d198c49deb7b6fadeb793b3685e20e738eb6b3fa13/ijson-3.4.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8100f9885eff1f38d35cef80ef759a1bbf5fc946349afa681bd7d0e681b7f1a0", size = 207515, upload-time = "2025-05-08T02:36:30.981Z" },
-    { url = "https://files.pythonhosted.org/packages/12/94/bf14457aa87ea32641f2db577c9188ef4e4ae373478afef422b31fc7f309/ijson-3.4.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:d7bcc3f7f21b0f703031ecd15209b1284ea51b2a329d66074b5261de3916c1eb", size = 210081, upload-time = "2025-05-08T02:36:32.403Z" },
-    { url = "https://files.pythonhosted.org/packages/7d/b4/eaee39e290e40e52d665db9bd1492cfdce86bd1e47948e0440db209c6023/ijson-3.4.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:2dcb190227b09dd171bdcbfe4720fddd574933c66314818dfb3960c8a6246a77", size = 199253, upload-time = "2025-05-08T02:36:33.861Z" },
-    { url = "https://files.pythonhosted.org/packages/c5/9c/e09c7b9ac720a703ab115b221b819f149ed54c974edfff623c1e925e57da/ijson-3.4.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:eda4cfb1d49c6073a901735aaa62e39cb7ab47f3ad7bb184862562f776f1fa8a", size = 203816, upload-time = "2025-05-08T02:36:35.348Z" },
-    { url = "https://files.pythonhosted.org/packages/7c/14/acd304f412e32d16a2c12182b9d78206bb0ae35354d35664f45db05c1b3b/ijson-3.4.0-cp313-cp313t-win32.whl", hash = "sha256:0772638efa1f3b72b51736833404f1cbd2f5beeb9c1a3d392e7d385b9160cba7", size = 53760, upload-time = "2025-05-08T02:36:36.608Z" },
-    { url = "https://files.pythonhosted.org/packages/2f/24/93dd0a467191590a5ed1fc2b35842bca9d09900d001e00b0b497c0208ef6/ijson-3.4.0-cp313-cp313t-win_amd64.whl", hash = "sha256:3d8a0d67f36e4fb97c61a724456ef0791504b16ce6f74917a31c2e92309bbeb9", size = 56948, upload-time = "2025-05-08T02:36:37.849Z" },
-]
-
 [[package]]
 name = "imagesize"
 version = "1.4.1"
@@ -1603,6 +2074,20 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ff/62/85c4c919272577931d407be5ba5d71c20f0b616d31a0befe0ae45bb79abd/imagesize-1.4.1-py2.py3-none-any.whl", hash = "sha256:0d8d18d08f840c19d0ee7ca1fd82490fdc3729b7ac93f49870406ddde8ef8d8b", size = 8769, upload-time = "2022-07-01T12:21:02.467Z" },
 ]
 
+[[package]]
+name = "import-linter"
+version = "2.4"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "click" },
+    { name = "grimp" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/db/33/e3c29beb4d8a33cfacdbe2858a3a4533694a0c1d0c060daaa761eff6d929/import_linter-2.4.tar.gz", hash = "sha256:4888fde83dd18bdbecd57ea1a98a1f3d52c6b6507d700f89f8678b44306c0ab4", size = 29942, upload-time = "2025-08-15T06:57:23.423Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/29/11/2c108fc1138e506762db332c4a7ebc589cb379bc443939a81ec738b4cf73/import_linter-2.4-py3-none-any.whl", hash = "sha256:2ad6d5a164cdcd5ebdda4172cf0169f73dde1a8925ef7216672c321cd38f8499", size = 42355, upload-time = "2025-08-15T06:57:22.221Z" },
+]
+
 [[package]]
 name = "importlib-metadata"
 version = "8.7.0"
@@ -1624,19 +2109,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050, upload-time = "2025-03-19T20:10:01.071Z" },
 ]
 
-[[package]]
-name = "inquirerpy"
-version = "0.3.4"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "pfzy" },
-    { name = "prompt-toolkit" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/64/73/7570847b9da026e07053da3bbe2ac7ea6cde6bb2cbd3c7a5a950fa0ae40b/InquirerPy-0.3.4.tar.gz", hash = "sha256:89d2ada0111f337483cb41ae31073108b2ec1e618a49d7110b0d7ade89fc197e", size = 44431, upload-time = "2022-06-27T23:11:20.598Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/ce/ff/3b59672c47c6284e8005b42e84ceba13864aa0f39f067c973d1af02f5d91/InquirerPy-0.3.4-py3-none-any.whl", hash = "sha256:c65fdfbac1fa00e3ee4fb10679f4d3ed7a012abf4833910e63c295827fe2a7d4", size = 67677, upload-time = "2022-06-27T23:11:17.723Z" },
-]
-
 [[package]]
 name = "interegular"
 version = "0.3.3"
@@ -1646,15 +2118,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/c4/01/72d6472f80651673716d1deda2a5bbb633e563ecf94f4479da5519d69d25/interegular-0.3.3-py37-none-any.whl", hash = "sha256:b0c07007d48c89d6d19f7204972d369b2a77222722e126b6aa63aa721dc3b19c", size = 23635, upload-time = "2024-01-06T23:01:20.829Z" },
 ]
 
-[[package]]
-name = "invoke"
-version = "2.2.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/f9/42/127e6d792884ab860defc3f4d80a8f9812e48ace584ffc5a346de58cdc6c/invoke-2.2.0.tar.gz", hash = "sha256:ee6cbb101af1a859c7fe84f2a264c059020b0cb7fe3535f9424300ab568f6bd5", size = 299835, upload-time = "2023-07-12T18:05:17.998Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/0a/66/7f8c48009c72d73bc6bbe6eb87ac838d6a526146f7dab14af671121eb379/invoke-2.2.0-py3-none-any.whl", hash = "sha256:6ea924cc53d4f78e3d98bc436b08069a03077e6f85ad1ddaa8a116d7dad15820", size = 160274, upload-time = "2023-07-12T18:05:16.294Z" },
-]
-
 [[package]]
 name = "itsdangerous"
 version = "2.2.0"
@@ -1724,6 +2187,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b3/4a/4175a563579e884192ba6e81725fc0448b042024419be8d83aa8a80a3f44/jiter-0.10.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3aa96f2abba33dc77f79b4cf791840230375f9534e5fac927ccceb58c5e604a5", size = 354213, upload-time = "2025-05-18T19:04:41.894Z" },
 ]
 
+[[package]]
+name = "jmespath"
+version = "1.0.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/00/2a/e867e8531cf3e36b41201936b7fa7ba7b5702dbef42922193f05c8976cd6/jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe", size = 25843, upload-time = "2022-06-17T18:00:12.224Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/31/b4/b9b800c45527aadd64d5b442f9b932b00648617eb5d63d2c7a6587b7cafc/jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980", size = 20256, upload-time = "2022-06-17T18:00:10.251Z" },
+]
+
 [[package]]
 name = "joblib"
 version = "1.5.1"
@@ -1735,7 +2207,7 @@ wheels = [
 
 [[package]]
 name = "jsonschema"
-version = "4.24.0"
+version = "4.25.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "attrs" },
@@ -1743,9 +2215,9 @@ dependencies = [
     { name = "referencing" },
     { name = "rpds-py" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/bf/d3/1cf5326b923a53515d8f3a2cd442e6d7e94fcc444716e879ea70a0ce3177/jsonschema-4.24.0.tar.gz", hash = "sha256:0b4e8069eb12aedfa881333004bccaec24ecef5a8a6a4b6df142b2cc9599d196", size = 353480, upload-time = "2025-05-26T18:48:10.459Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/74/69/f7185de793a29082a9f3c7728268ffb31cb5095131a9c139a74078e27336/jsonschema-4.25.1.tar.gz", hash = "sha256:e4a9655ce0da0c0b67a085847e00a3a51449e1157f4f75e9fb5aa545e122eb85", size = 357342, upload-time = "2025-08-18T17:03:50.038Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/a2/3d/023389198f69c722d039351050738d6755376c8fd343e91dc493ea485905/jsonschema-4.24.0-py3-none-any.whl", hash = "sha256:a462455f19f5faf404a7902952b6f0e3ce868f3ee09a359b05eca6673bd8412d", size = 88709, upload-time = "2025-05-26T18:48:08.417Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/9c/8c95d856233c1f82500c2450b8c68576b4cf1c871db3afac5c34ff84e6fd/jsonschema-4.25.1-py3-none-any.whl", hash = "sha256:3fba0169e345c7175110351d456342c364814cfcf3b964ba4587f22915230a63", size = 90040, upload-time = "2025-08-18T17:03:48.373Z" },
 ]
 
 [[package]]
@@ -1762,53 +2234,74 @@ wheels = [
 
 [[package]]
 name = "kiwisolver"
-version = "1.4.8"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/82/59/7c91426a8ac292e1cdd53a63b6d9439abd573c875c3f92c146767dd33faf/kiwisolver-1.4.8.tar.gz", hash = "sha256:23d5f023bdc8c7e54eb65f03ca5d5bb25b601eac4d7f1a042888a1f45237987e", size = 97538, upload-time = "2024-12-24T18:30:51.519Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/fc/aa/cea685c4ab647f349c3bc92d2daf7ae34c8e8cf405a6dcd3a497f58a2ac3/kiwisolver-1.4.8-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:d6af5e8815fd02997cb6ad9bbed0ee1e60014438ee1a5c2444c96f87b8843502", size = 124152, upload-time = "2024-12-24T18:29:16.85Z" },
-    { url = "https://files.pythonhosted.org/packages/c5/0b/8db6d2e2452d60d5ebc4ce4b204feeb16176a851fd42462f66ade6808084/kiwisolver-1.4.8-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:bade438f86e21d91e0cf5dd7c0ed00cda0f77c8c1616bd83f9fc157fa6760d31", size = 66555, upload-time = "2024-12-24T18:29:19.146Z" },
-    { url = "https://files.pythonhosted.org/packages/60/26/d6a0db6785dd35d3ba5bf2b2df0aedc5af089962c6eb2cbf67a15b81369e/kiwisolver-1.4.8-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b83dc6769ddbc57613280118fb4ce3cd08899cc3369f7d0e0fab518a7cf37fdb", size = 65067, upload-time = "2024-12-24T18:29:20.096Z" },
-    { url = "https://files.pythonhosted.org/packages/c9/ed/1d97f7e3561e09757a196231edccc1bcf59d55ddccefa2afc9c615abd8e0/kiwisolver-1.4.8-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:111793b232842991be367ed828076b03d96202c19221b5ebab421ce8bcad016f", size = 1378443, upload-time = "2024-12-24T18:29:22.843Z" },
-    { url = "https://files.pythonhosted.org/packages/29/61/39d30b99954e6b46f760e6289c12fede2ab96a254c443639052d1b573fbc/kiwisolver-1.4.8-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:257af1622860e51b1a9d0ce387bf5c2c4f36a90594cb9514f55b074bcc787cfc", size = 1472728, upload-time = "2024-12-24T18:29:24.463Z" },
-    { url = "https://files.pythonhosted.org/packages/0c/3e/804163b932f7603ef256e4a715e5843a9600802bb23a68b4e08c8c0ff61d/kiwisolver-1.4.8-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:69b5637c3f316cab1ec1c9a12b8c5f4750a4c4b71af9157645bf32830e39c03a", size = 1478388, upload-time = "2024-12-24T18:29:25.776Z" },
-    { url = "https://files.pythonhosted.org/packages/8a/9e/60eaa75169a154700be74f875a4d9961b11ba048bef315fbe89cb6999056/kiwisolver-1.4.8-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:782bb86f245ec18009890e7cb8d13a5ef54dcf2ebe18ed65f795e635a96a1c6a", size = 1413849, upload-time = "2024-12-24T18:29:27.202Z" },
-    { url = "https://files.pythonhosted.org/packages/bc/b3/9458adb9472e61a998c8c4d95cfdfec91c73c53a375b30b1428310f923e4/kiwisolver-1.4.8-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cc978a80a0db3a66d25767b03688f1147a69e6237175c0f4ffffaaedf744055a", size = 1475533, upload-time = "2024-12-24T18:29:28.638Z" },
-    { url = "https://files.pythonhosted.org/packages/e4/7a/0a42d9571e35798de80aef4bb43a9b672aa7f8e58643d7bd1950398ffb0a/kiwisolver-1.4.8-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:36dbbfd34838500a31f52c9786990d00150860e46cd5041386f217101350f0d3", size = 2268898, upload-time = "2024-12-24T18:29:30.368Z" },
-    { url = "https://files.pythonhosted.org/packages/d9/07/1255dc8d80271400126ed8db35a1795b1a2c098ac3a72645075d06fe5c5d/kiwisolver-1.4.8-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:eaa973f1e05131de5ff3569bbba7f5fd07ea0595d3870ed4a526d486fe57fa1b", size = 2425605, upload-time = "2024-12-24T18:29:33.151Z" },
-    { url = "https://files.pythonhosted.org/packages/84/df/5a3b4cf13780ef6f6942df67b138b03b7e79e9f1f08f57c49957d5867f6e/kiwisolver-1.4.8-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:a66f60f8d0c87ab7f59b6fb80e642ebb29fec354a4dfad687ca4092ae69d04f4", size = 2375801, upload-time = "2024-12-24T18:29:34.584Z" },
-    { url = "https://files.pythonhosted.org/packages/8f/10/2348d068e8b0f635c8c86892788dac7a6b5c0cb12356620ab575775aad89/kiwisolver-1.4.8-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:858416b7fb777a53f0c59ca08190ce24e9abbd3cffa18886a5781b8e3e26f65d", size = 2520077, upload-time = "2024-12-24T18:29:36.138Z" },
-    { url = "https://files.pythonhosted.org/packages/32/d8/014b89fee5d4dce157d814303b0fce4d31385a2af4c41fed194b173b81ac/kiwisolver-1.4.8-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:085940635c62697391baafaaeabdf3dd7a6c3643577dde337f4d66eba021b2b8", size = 2338410, upload-time = "2024-12-24T18:29:39.991Z" },
-    { url = "https://files.pythonhosted.org/packages/bd/72/dfff0cc97f2a0776e1c9eb5bef1ddfd45f46246c6533b0191887a427bca5/kiwisolver-1.4.8-cp312-cp312-win_amd64.whl", hash = "sha256:01c3d31902c7db5fb6182832713d3b4122ad9317c2c5877d0539227d96bb2e50", size = 71853, upload-time = "2024-12-24T18:29:42.006Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/85/220d13d914485c0948a00f0b9eb419efaf6da81b7d72e88ce2391f7aed8d/kiwisolver-1.4.8-cp312-cp312-win_arm64.whl", hash = "sha256:a3c44cb68861de93f0c4a8175fbaa691f0aa22550c331fefef02b618a9dcb476", size = 65424, upload-time = "2024-12-24T18:29:44.38Z" },
-    { url = "https://files.pythonhosted.org/packages/79/b3/e62464a652f4f8cd9006e13d07abad844a47df1e6537f73ddfbf1bc997ec/kiwisolver-1.4.8-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:1c8ceb754339793c24aee1c9fb2485b5b1f5bb1c2c214ff13368431e51fc9a09", size = 124156, upload-time = "2024-12-24T18:29:45.368Z" },
-    { url = "https://files.pythonhosted.org/packages/8d/2d/f13d06998b546a2ad4f48607a146e045bbe48030774de29f90bdc573df15/kiwisolver-1.4.8-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:54a62808ac74b5e55a04a408cda6156f986cefbcf0ada13572696b507cc92fa1", size = 66555, upload-time = "2024-12-24T18:29:46.37Z" },
-    { url = "https://files.pythonhosted.org/packages/59/e3/b8bd14b0a54998a9fd1e8da591c60998dc003618cb19a3f94cb233ec1511/kiwisolver-1.4.8-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:68269e60ee4929893aad82666821aaacbd455284124817af45c11e50a4b42e3c", size = 65071, upload-time = "2024-12-24T18:29:47.333Z" },
-    { url = "https://files.pythonhosted.org/packages/f0/1c/6c86f6d85ffe4d0ce04228d976f00674f1df5dc893bf2dd4f1928748f187/kiwisolver-1.4.8-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:34d142fba9c464bc3bbfeff15c96eab0e7310343d6aefb62a79d51421fcc5f1b", size = 1378053, upload-time = "2024-12-24T18:29:49.636Z" },
-    { url = "https://files.pythonhosted.org/packages/4e/b9/1c6e9f6dcb103ac5cf87cb695845f5fa71379021500153566d8a8a9fc291/kiwisolver-1.4.8-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3ddc373e0eef45b59197de815b1b28ef89ae3955e7722cc9710fb91cd77b7f47", size = 1472278, upload-time = "2024-12-24T18:29:51.164Z" },
-    { url = "https://files.pythonhosted.org/packages/ee/81/aca1eb176de671f8bda479b11acdc42c132b61a2ac861c883907dde6debb/kiwisolver-1.4.8-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:77e6f57a20b9bd4e1e2cedda4d0b986ebd0216236f0106e55c28aea3d3d69b16", size = 1478139, upload-time = "2024-12-24T18:29:52.594Z" },
-    { url = "https://files.pythonhosted.org/packages/49/f4/e081522473671c97b2687d380e9e4c26f748a86363ce5af48b4a28e48d06/kiwisolver-1.4.8-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:08e77738ed7538f036cd1170cbed942ef749137b1311fa2bbe2a7fda2f6bf3cc", size = 1413517, upload-time = "2024-12-24T18:29:53.941Z" },
-    { url = "https://files.pythonhosted.org/packages/8f/e9/6a7d025d8da8c4931522922cd706105aa32b3291d1add8c5427cdcd66e63/kiwisolver-1.4.8-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a5ce1e481a74b44dd5e92ff03ea0cb371ae7a0268318e202be06c8f04f4f1246", size = 1474952, upload-time = "2024-12-24T18:29:56.523Z" },
-    { url = "https://files.pythonhosted.org/packages/82/13/13fa685ae167bee5d94b415991c4fc7bb0a1b6ebea6e753a87044b209678/kiwisolver-1.4.8-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:fc2ace710ba7c1dfd1a3b42530b62b9ceed115f19a1656adefce7b1782a37794", size = 2269132, upload-time = "2024-12-24T18:29:57.989Z" },
-    { url = "https://files.pythonhosted.org/packages/ef/92/bb7c9395489b99a6cb41d502d3686bac692586db2045adc19e45ee64ed23/kiwisolver-1.4.8-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:3452046c37c7692bd52b0e752b87954ef86ee2224e624ef7ce6cb21e8c41cc1b", size = 2425997, upload-time = "2024-12-24T18:29:59.393Z" },
-    { url = "https://files.pythonhosted.org/packages/ed/12/87f0e9271e2b63d35d0d8524954145837dd1a6c15b62a2d8c1ebe0f182b4/kiwisolver-1.4.8-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:7e9a60b50fe8b2ec6f448fe8d81b07e40141bfced7f896309df271a0b92f80f3", size = 2376060, upload-time = "2024-12-24T18:30:01.338Z" },
-    { url = "https://files.pythonhosted.org/packages/02/6e/c8af39288edbce8bf0fa35dee427b082758a4b71e9c91ef18fa667782138/kiwisolver-1.4.8-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:918139571133f366e8362fa4a297aeba86c7816b7ecf0bc79168080e2bd79957", size = 2520471, upload-time = "2024-12-24T18:30:04.574Z" },
-    { url = "https://files.pythonhosted.org/packages/13/78/df381bc7b26e535c91469f77f16adcd073beb3e2dd25042efd064af82323/kiwisolver-1.4.8-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e063ef9f89885a1d68dd8b2e18f5ead48653176d10a0e324e3b0030e3a69adeb", size = 2338793, upload-time = "2024-12-24T18:30:06.25Z" },
-    { url = "https://files.pythonhosted.org/packages/d0/dc/c1abe38c37c071d0fc71c9a474fd0b9ede05d42f5a458d584619cfd2371a/kiwisolver-1.4.8-cp313-cp313-win_amd64.whl", hash = "sha256:a17b7c4f5b2c51bb68ed379defd608a03954a1845dfed7cc0117f1cc8a9b7fd2", size = 71855, upload-time = "2024-12-24T18:30:07.535Z" },
-    { url = "https://files.pythonhosted.org/packages/a0/b6/21529d595b126ac298fdd90b705d87d4c5693de60023e0efcb4f387ed99e/kiwisolver-1.4.8-cp313-cp313-win_arm64.whl", hash = "sha256:3cd3bc628b25f74aedc6d374d5babf0166a92ff1317f46267f12d2ed54bc1d30", size = 65430, upload-time = "2024-12-24T18:30:08.504Z" },
-    { url = "https://files.pythonhosted.org/packages/34/bd/b89380b7298e3af9b39f49334e3e2a4af0e04819789f04b43d560516c0c8/kiwisolver-1.4.8-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:370fd2df41660ed4e26b8c9d6bbcad668fbe2560462cba151a721d49e5b6628c", size = 126294, upload-time = "2024-12-24T18:30:09.508Z" },
-    { url = "https://files.pythonhosted.org/packages/83/41/5857dc72e5e4148eaac5aa76e0703e594e4465f8ab7ec0fc60e3a9bb8fea/kiwisolver-1.4.8-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:84a2f830d42707de1d191b9490ac186bf7997a9495d4e9072210a1296345f7dc", size = 67736, upload-time = "2024-12-24T18:30:11.039Z" },
-    { url = "https://files.pythonhosted.org/packages/e1/d1/be059b8db56ac270489fb0b3297fd1e53d195ba76e9bbb30e5401fa6b759/kiwisolver-1.4.8-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:7a3ad337add5148cf51ce0b55642dc551c0b9d6248458a757f98796ca7348712", size = 66194, upload-time = "2024-12-24T18:30:14.886Z" },
-    { url = "https://files.pythonhosted.org/packages/e1/83/4b73975f149819eb7dcf9299ed467eba068ecb16439a98990dcb12e63fdd/kiwisolver-1.4.8-cp313-cp313t-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7506488470f41169b86d8c9aeff587293f530a23a23a49d6bc64dab66bedc71e", size = 1465942, upload-time = "2024-12-24T18:30:18.927Z" },
-    { url = "https://files.pythonhosted.org/packages/c7/2c/30a5cdde5102958e602c07466bce058b9d7cb48734aa7a4327261ac8e002/kiwisolver-1.4.8-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2f0121b07b356a22fb0414cec4666bbe36fd6d0d759db3d37228f496ed67c880", size = 1595341, upload-time = "2024-12-24T18:30:22.102Z" },
-    { url = "https://files.pythonhosted.org/packages/ff/9b/1e71db1c000385aa069704f5990574b8244cce854ecd83119c19e83c9586/kiwisolver-1.4.8-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d6d6bd87df62c27d4185de7c511c6248040afae67028a8a22012b010bc7ad062", size = 1598455, upload-time = "2024-12-24T18:30:24.947Z" },
-    { url = "https://files.pythonhosted.org/packages/85/92/c8fec52ddf06231b31cbb779af77e99b8253cd96bd135250b9498144c78b/kiwisolver-1.4.8-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:291331973c64bb9cce50bbe871fb2e675c4331dab4f31abe89f175ad7679a4d7", size = 1522138, upload-time = "2024-12-24T18:30:26.286Z" },
-    { url = "https://files.pythonhosted.org/packages/0b/51/9eb7e2cd07a15d8bdd976f6190c0164f92ce1904e5c0c79198c4972926b7/kiwisolver-1.4.8-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:893f5525bb92d3d735878ec00f781b2de998333659507d29ea4466208df37bed", size = 1582857, upload-time = "2024-12-24T18:30:28.86Z" },
-    { url = "https://files.pythonhosted.org/packages/0f/95/c5a00387a5405e68ba32cc64af65ce881a39b98d73cc394b24143bebc5b8/kiwisolver-1.4.8-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:b47a465040146981dc9db8647981b8cb96366fbc8d452b031e4f8fdffec3f26d", size = 2293129, upload-time = "2024-12-24T18:30:30.34Z" },
-    { url = "https://files.pythonhosted.org/packages/44/83/eeb7af7d706b8347548313fa3a3a15931f404533cc54fe01f39e830dd231/kiwisolver-1.4.8-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:99cea8b9dd34ff80c521aef46a1dddb0dcc0283cf18bde6d756f1e6f31772165", size = 2421538, upload-time = "2024-12-24T18:30:33.334Z" },
-    { url = "https://files.pythonhosted.org/packages/05/f9/27e94c1b3eb29e6933b6986ffc5fa1177d2cd1f0c8efc5f02c91c9ac61de/kiwisolver-1.4.8-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:151dffc4865e5fe6dafce5480fab84f950d14566c480c08a53c663a0020504b6", size = 2390661, upload-time = "2024-12-24T18:30:34.939Z" },
-    { url = "https://files.pythonhosted.org/packages/d9/d4/3c9735faa36ac591a4afcc2980d2691000506050b7a7e80bcfe44048daa7/kiwisolver-1.4.8-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:577facaa411c10421314598b50413aa1ebcf5126f704f1e5d72d7e4e9f020d90", size = 2546710, upload-time = "2024-12-24T18:30:37.281Z" },
-    { url = "https://files.pythonhosted.org/packages/4c/fa/be89a49c640930180657482a74970cdcf6f7072c8d2471e1babe17a222dc/kiwisolver-1.4.8-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:be4816dc51c8a471749d664161b434912eee82f2ea66bd7628bd14583a833e85", size = 2349213, upload-time = "2024-12-24T18:30:40.019Z" },
+version = "1.4.9"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/5c/3c/85844f1b0feb11ee581ac23fe5fce65cd049a200c1446708cc1b7f922875/kiwisolver-1.4.9.tar.gz", hash = "sha256:c3b22c26c6fd6811b0ae8363b95ca8ce4ea3c202d3d0975b2914310ceb1bcc4d", size = 97564, upload-time = "2025-08-10T21:27:49.279Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/86/c9/13573a747838aeb1c76e3267620daa054f4152444d1f3d1a2324b78255b5/kiwisolver-1.4.9-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:ac5a486ac389dddcc5bef4f365b6ae3ffff2c433324fb38dd35e3fab7c957999", size = 123686, upload-time = "2025-08-10T21:26:10.034Z" },
+    { url = "https://files.pythonhosted.org/packages/51/ea/2ecf727927f103ffd1739271ca19c424d0e65ea473fbaeea1c014aea93f6/kiwisolver-1.4.9-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f2ba92255faa7309d06fe44c3a4a97efe1c8d640c2a79a5ef728b685762a6fd2", size = 66460, upload-time = "2025-08-10T21:26:11.083Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/5a/51f5464373ce2aeb5194508298a508b6f21d3867f499556263c64c621914/kiwisolver-1.4.9-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4a2899935e724dd1074cb568ce7ac0dce28b2cd6ab539c8e001a8578eb106d14", size = 64952, upload-time = "2025-08-10T21:26:12.058Z" },
+    { url = "https://files.pythonhosted.org/packages/70/90/6d240beb0f24b74371762873e9b7f499f1e02166a2d9c5801f4dbf8fa12e/kiwisolver-1.4.9-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f6008a4919fdbc0b0097089f67a1eb55d950ed7e90ce2cc3e640abadd2757a04", size = 1474756, upload-time = "2025-08-10T21:26:13.096Z" },
+    { url = "https://files.pythonhosted.org/packages/12/42/f36816eaf465220f683fb711efdd1bbf7a7005a2473d0e4ed421389bd26c/kiwisolver-1.4.9-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:67bb8b474b4181770f926f7b7d2f8c0248cbcb78b660fdd41a47054b28d2a752", size = 1276404, upload-time = "2025-08-10T21:26:14.457Z" },
+    { url = "https://files.pythonhosted.org/packages/2e/64/bc2de94800adc830c476dce44e9b40fd0809cddeef1fde9fcf0f73da301f/kiwisolver-1.4.9-cp312-cp312-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2327a4a30d3ee07d2fbe2e7933e8a37c591663b96ce42a00bc67461a87d7df77", size = 1294410, upload-time = "2025-08-10T21:26:15.73Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/42/2dc82330a70aa8e55b6d395b11018045e58d0bb00834502bf11509f79091/kiwisolver-1.4.9-cp312-cp312-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:7a08b491ec91b1d5053ac177afe5290adacf1f0f6307d771ccac5de30592d198", size = 1343631, upload-time = "2025-08-10T21:26:17.045Z" },
+    { url = "https://files.pythonhosted.org/packages/22/fd/f4c67a6ed1aab149ec5a8a401c323cee7a1cbe364381bb6c9c0d564e0e20/kiwisolver-1.4.9-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d8fc5c867c22b828001b6a38d2eaeb88160bf5783c6cb4a5e440efc981ce286d", size = 2224963, upload-time = "2025-08-10T21:26:18.737Z" },
+    { url = "https://files.pythonhosted.org/packages/45/aa/76720bd4cb3713314677d9ec94dcc21ced3f1baf4830adde5bb9b2430a5f/kiwisolver-1.4.9-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:3b3115b2581ea35bb6d1f24a4c90af37e5d9b49dcff267eeed14c3893c5b86ab", size = 2321295, upload-time = "2025-08-10T21:26:20.11Z" },
+    { url = "https://files.pythonhosted.org/packages/80/19/d3ec0d9ab711242f56ae0dc2fc5d70e298bb4a1f9dfab44c027668c673a1/kiwisolver-1.4.9-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:858e4c22fb075920b96a291928cb7dea5644e94c0ee4fcd5af7e865655e4ccf2", size = 2487987, upload-time = "2025-08-10T21:26:21.49Z" },
+    { url = "https://files.pythonhosted.org/packages/39/e9/61e4813b2c97e86b6fdbd4dd824bf72d28bcd8d4849b8084a357bc0dd64d/kiwisolver-1.4.9-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ed0fecd28cc62c54b262e3736f8bb2512d8dcfdc2bcf08be5f47f96bf405b145", size = 2291817, upload-time = "2025-08-10T21:26:22.812Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/41/85d82b0291db7504da3c2defe35c9a8a5c9803a730f297bd823d11d5fb77/kiwisolver-1.4.9-cp312-cp312-win_amd64.whl", hash = "sha256:f68208a520c3d86ea51acf688a3e3002615a7f0238002cccc17affecc86a8a54", size = 73895, upload-time = "2025-08-10T21:26:24.37Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/92/5f3068cf15ee5cb624a0c7596e67e2a0bb2adee33f71c379054a491d07da/kiwisolver-1.4.9-cp312-cp312-win_arm64.whl", hash = "sha256:2c1a4f57df73965f3f14df20b80ee29e6a7930a57d2d9e8491a25f676e197c60", size = 64992, upload-time = "2025-08-10T21:26:25.732Z" },
+    { url = "https://files.pythonhosted.org/packages/31/c1/c2686cda909742ab66c7388e9a1a8521a59eb89f8bcfbee28fc980d07e24/kiwisolver-1.4.9-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a5d0432ccf1c7ab14f9949eec60c5d1f924f17c037e9f8b33352fa05799359b8", size = 123681, upload-time = "2025-08-10T21:26:26.725Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/f0/f44f50c9f5b1a1860261092e3bc91ecdc9acda848a8b8c6abfda4a24dd5c/kiwisolver-1.4.9-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:efb3a45b35622bb6c16dbfab491a8f5a391fe0e9d45ef32f4df85658232ca0e2", size = 66464, upload-time = "2025-08-10T21:26:27.733Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/7a/9d90a151f558e29c3936b8a47ac770235f436f2120aca41a6d5f3d62ae8d/kiwisolver-1.4.9-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1a12cf6398e8a0a001a059747a1cbf24705e18fe413bc22de7b3d15c67cffe3f", size = 64961, upload-time = "2025-08-10T21:26:28.729Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/e9/f218a2cb3a9ffbe324ca29a9e399fa2d2866d7f348ec3a88df87fc248fc5/kiwisolver-1.4.9-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:b67e6efbf68e077dd71d1a6b37e43e1a99d0bff1a3d51867d45ee8908b931098", size = 1474607, upload-time = "2025-08-10T21:26:29.798Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/28/aac26d4c882f14de59041636292bc838db8961373825df23b8eeb807e198/kiwisolver-1.4.9-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5656aa670507437af0207645273ccdfee4f14bacd7f7c67a4306d0dcaeaf6eed", size = 1276546, upload-time = "2025-08-10T21:26:31.401Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/ad/8bfc1c93d4cc565e5069162f610ba2f48ff39b7de4b5b8d93f69f30c4bed/kiwisolver-1.4.9-cp313-cp313-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:bfc08add558155345129c7803b3671cf195e6a56e7a12f3dde7c57d9b417f525", size = 1294482, upload-time = "2025-08-10T21:26:32.721Z" },
+    { url = "https://files.pythonhosted.org/packages/da/f1/6aca55ff798901d8ce403206d00e033191f63d82dd708a186e0ed2067e9c/kiwisolver-1.4.9-cp313-cp313-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:40092754720b174e6ccf9e845d0d8c7d8e12c3d71e7fc35f55f3813e96376f78", size = 1343720, upload-time = "2025-08-10T21:26:34.032Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/91/eed031876c595c81d90d0f6fc681ece250e14bf6998c3d7c419466b523b7/kiwisolver-1.4.9-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:497d05f29a1300d14e02e6441cf0f5ee81c1ff5a304b0d9fb77423974684e08b", size = 2224907, upload-time = "2025-08-10T21:26:35.824Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/ec/4d1925f2e49617b9cca9c34bfa11adefad49d00db038e692a559454dfb2e/kiwisolver-1.4.9-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:bdd1a81a1860476eb41ac4bc1e07b3f07259e6d55bbf739b79c8aaedcf512799", size = 2321334, upload-time = "2025-08-10T21:26:37.534Z" },
+    { url = "https://files.pythonhosted.org/packages/43/cb/450cd4499356f68802750c6ddc18647b8ea01ffa28f50d20598e0befe6e9/kiwisolver-1.4.9-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:e6b93f13371d341afee3be9f7c5964e3fe61d5fa30f6a30eb49856935dfe4fc3", size = 2488313, upload-time = "2025-08-10T21:26:39.191Z" },
+    { url = "https://files.pythonhosted.org/packages/71/67/fc76242bd99f885651128a5d4fa6083e5524694b7c88b489b1b55fdc491d/kiwisolver-1.4.9-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d75aa530ccfaa593da12834b86a0724f58bff12706659baa9227c2ccaa06264c", size = 2291970, upload-time = "2025-08-10T21:26:40.828Z" },
+    { url = "https://files.pythonhosted.org/packages/75/bd/f1a5d894000941739f2ae1b65a32892349423ad49c2e6d0771d0bad3fae4/kiwisolver-1.4.9-cp313-cp313-win_amd64.whl", hash = "sha256:dd0a578400839256df88c16abddf9ba14813ec5f21362e1fe65022e00c883d4d", size = 73894, upload-time = "2025-08-10T21:26:42.33Z" },
+    { url = "https://files.pythonhosted.org/packages/95/38/dce480814d25b99a391abbddadc78f7c117c6da34be68ca8b02d5848b424/kiwisolver-1.4.9-cp313-cp313-win_arm64.whl", hash = "sha256:d4188e73af84ca82468f09cadc5ac4db578109e52acb4518d8154698d3a87ca2", size = 64995, upload-time = "2025-08-10T21:26:43.889Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/37/7d218ce5d92dadc5ebdd9070d903e0c7cf7edfe03f179433ac4d13ce659c/kiwisolver-1.4.9-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:5a0f2724dfd4e3b3ac5a82436a8e6fd16baa7d507117e4279b660fe8ca38a3a1", size = 126510, upload-time = "2025-08-10T21:26:44.915Z" },
+    { url = "https://files.pythonhosted.org/packages/23/b0/e85a2b48233daef4b648fb657ebbb6f8367696a2d9548a00b4ee0eb67803/kiwisolver-1.4.9-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:1b11d6a633e4ed84fc0ddafd4ebfd8ea49b3f25082c04ad12b8315c11d504dc1", size = 67903, upload-time = "2025-08-10T21:26:45.934Z" },
+    { url = "https://files.pythonhosted.org/packages/44/98/f2425bc0113ad7de24da6bb4dae1343476e95e1d738be7c04d31a5d037fd/kiwisolver-1.4.9-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:61874cdb0a36016354853593cffc38e56fc9ca5aa97d2c05d3dcf6922cd55a11", size = 66402, upload-time = "2025-08-10T21:26:47.101Z" },
+    { url = "https://files.pythonhosted.org/packages/98/d8/594657886df9f34c4177cc353cc28ca7e6e5eb562d37ccc233bff43bbe2a/kiwisolver-1.4.9-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:60c439763a969a6af93b4881db0eed8fadf93ee98e18cbc35bc8da868d0c4f0c", size = 1582135, upload-time = "2025-08-10T21:26:48.665Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/c6/38a115b7170f8b306fc929e166340c24958347308ea3012c2b44e7e295db/kiwisolver-1.4.9-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:92a2f997387a1b79a75e7803aa7ded2cfbe2823852ccf1ba3bcf613b62ae3197", size = 1389409, upload-time = "2025-08-10T21:26:50.335Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/3b/e04883dace81f24a568bcee6eb3001da4ba05114afa622ec9b6fafdc1f5e/kiwisolver-1.4.9-cp313-cp313t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a31d512c812daea6d8b3be3b2bfcbeb091dbb09177706569bcfc6240dcf8b41c", size = 1401763, upload-time = "2025-08-10T21:26:51.867Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/80/20ace48e33408947af49d7d15c341eaee69e4e0304aab4b7660e234d6288/kiwisolver-1.4.9-cp313-cp313t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:52a15b0f35dad39862d376df10c5230155243a2c1a436e39eb55623ccbd68185", size = 1453643, upload-time = "2025-08-10T21:26:53.592Z" },
+    { url = "https://files.pythonhosted.org/packages/64/31/6ce4380a4cd1f515bdda976a1e90e547ccd47b67a1546d63884463c92ca9/kiwisolver-1.4.9-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a30fd6fdef1430fd9e1ba7b3398b5ee4e2887783917a687d86ba69985fb08748", size = 2330818, upload-time = "2025-08-10T21:26:55.051Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/e9/3f3fcba3bcc7432c795b82646306e822f3fd74df0ee81f0fa067a1f95668/kiwisolver-1.4.9-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:cc9617b46837c6468197b5945e196ee9ca43057bb7d9d1ae688101e4e1dddf64", size = 2419963, upload-time = "2025-08-10T21:26:56.421Z" },
+    { url = "https://files.pythonhosted.org/packages/99/43/7320c50e4133575c66e9f7dadead35ab22d7c012a3b09bb35647792b2a6d/kiwisolver-1.4.9-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:0ab74e19f6a2b027ea4f845a78827969af45ce790e6cb3e1ebab71bdf9f215ff", size = 2594639, upload-time = "2025-08-10T21:26:57.882Z" },
+    { url = "https://files.pythonhosted.org/packages/65/d6/17ae4a270d4a987ef8a385b906d2bdfc9fce502d6dc0d3aea865b47f548c/kiwisolver-1.4.9-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:dba5ee5d3981160c28d5490f0d1b7ed730c22470ff7f6cc26cfcfaacb9896a07", size = 2391741, upload-time = "2025-08-10T21:26:59.237Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/8f/8f6f491d595a9e5912971f3f863d81baddccc8a4d0c3749d6a0dd9ffc9df/kiwisolver-1.4.9-cp313-cp313t-win_arm64.whl", hash = "sha256:0749fd8f4218ad2e851e11cc4dc05c7cbc0cbc4267bdfdb31782e65aace4ee9c", size = 68646, upload-time = "2025-08-10T21:27:00.52Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/32/6cc0fbc9c54d06c2969faa9c1d29f5751a2e51809dd55c69055e62d9b426/kiwisolver-1.4.9-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:9928fe1eb816d11ae170885a74d074f57af3a0d65777ca47e9aeb854a1fba386", size = 123806, upload-time = "2025-08-10T21:27:01.537Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/dd/2bfb1d4a4823d92e8cbb420fe024b8d2167f72079b3bb941207c42570bdf/kiwisolver-1.4.9-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:d0005b053977e7b43388ddec89fa567f43d4f6d5c2c0affe57de5ebf290dc552", size = 66605, upload-time = "2025-08-10T21:27:03.335Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/69/00aafdb4e4509c2ca6064646cba9cd4b37933898f426756adb2cb92ebbed/kiwisolver-1.4.9-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:2635d352d67458b66fd0667c14cb1d4145e9560d503219034a18a87e971ce4f3", size = 64925, upload-time = "2025-08-10T21:27:04.339Z" },
+    { url = "https://files.pythonhosted.org/packages/43/dc/51acc6791aa14e5cb6d8a2e28cefb0dc2886d8862795449d021334c0df20/kiwisolver-1.4.9-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:767c23ad1c58c9e827b649a9ab7809fd5fd9db266a9cf02b0e926ddc2c680d58", size = 1472414, upload-time = "2025-08-10T21:27:05.437Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/bb/93fa64a81db304ac8a246f834d5094fae4b13baf53c839d6bb6e81177129/kiwisolver-1.4.9-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:72d0eb9fba308b8311685c2268cf7d0a0639a6cd027d8128659f72bdd8a024b4", size = 1281272, upload-time = "2025-08-10T21:27:07.063Z" },
+    { url = "https://files.pythonhosted.org/packages/70/e6/6df102916960fb8d05069d4bd92d6d9a8202d5a3e2444494e7cd50f65b7a/kiwisolver-1.4.9-cp314-cp314-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f68e4f3eeca8fb22cc3d731f9715a13b652795ef657a13df1ad0c7dc0e9731df", size = 1298578, upload-time = "2025-08-10T21:27:08.452Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/47/e142aaa612f5343736b087864dbaebc53ea8831453fb47e7521fa8658f30/kiwisolver-1.4.9-cp314-cp314-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d84cd4061ae292d8ac367b2c3fa3aad11cb8625a95d135fe93f286f914f3f5a6", size = 1345607, upload-time = "2025-08-10T21:27:10.125Z" },
+    { url = "https://files.pythonhosted.org/packages/54/89/d641a746194a0f4d1a3670fb900d0dbaa786fb98341056814bc3f058fa52/kiwisolver-1.4.9-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:a60ea74330b91bd22a29638940d115df9dc00af5035a9a2a6ad9399ffb4ceca5", size = 2230150, upload-time = "2025-08-10T21:27:11.484Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/6b/5ee1207198febdf16ac11f78c5ae40861b809cbe0e6d2a8d5b0b3044b199/kiwisolver-1.4.9-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:ce6a3a4e106cf35c2d9c4fa17c05ce0b180db622736845d4315519397a77beaf", size = 2325979, upload-time = "2025-08-10T21:27:12.917Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/ff/b269eefd90f4ae14dcc74973d5a0f6d28d3b9bb1afd8c0340513afe6b39a/kiwisolver-1.4.9-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:77937e5e2a38a7b48eef0585114fe7930346993a88060d0bf886086d2aa49ef5", size = 2491456, upload-time = "2025-08-10T21:27:14.353Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/d4/10303190bd4d30de547534601e259a4fbf014eed94aae3e5521129215086/kiwisolver-1.4.9-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:24c175051354f4a28c5d6a31c93906dc653e2bf234e8a4bbfb964892078898ce", size = 2294621, upload-time = "2025-08-10T21:27:15.808Z" },
+    { url = "https://files.pythonhosted.org/packages/28/e0/a9a90416fce5c0be25742729c2ea52105d62eda6c4be4d803c2a7be1fa50/kiwisolver-1.4.9-cp314-cp314-win_amd64.whl", hash = "sha256:0763515d4df10edf6d06a3c19734e2566368980d21ebec439f33f9eb936c07b7", size = 75417, upload-time = "2025-08-10T21:27:17.436Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/10/6949958215b7a9a264299a7db195564e87900f709db9245e4ebdd3c70779/kiwisolver-1.4.9-cp314-cp314-win_arm64.whl", hash = "sha256:0e4e2bf29574a6a7b7f6cb5fa69293b9f96c928949ac4a53ba3f525dffb87f9c", size = 66582, upload-time = "2025-08-10T21:27:18.436Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/79/60e53067903d3bc5469b369fe0dfc6b3482e2133e85dae9daa9527535991/kiwisolver-1.4.9-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:d976bbb382b202f71c67f77b0ac11244021cfa3f7dfd9e562eefcea2df711548", size = 126514, upload-time = "2025-08-10T21:27:19.465Z" },
+    { url = "https://files.pythonhosted.org/packages/25/d1/4843d3e8d46b072c12a38c97c57fab4608d36e13fe47d47ee96b4d61ba6f/kiwisolver-1.4.9-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:2489e4e5d7ef9a1c300a5e0196e43d9c739f066ef23270607d45aba368b91f2d", size = 67905, upload-time = "2025-08-10T21:27:20.51Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/ae/29ffcbd239aea8b93108de1278271ae764dfc0d803a5693914975f200596/kiwisolver-1.4.9-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:e2ea9f7ab7fbf18fffb1b5434ce7c69a07582f7acc7717720f1d69f3e806f90c", size = 66399, upload-time = "2025-08-10T21:27:21.496Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/ae/d7ba902aa604152c2ceba5d352d7b62106bedbccc8e95c3934d94472bfa3/kiwisolver-1.4.9-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:b34e51affded8faee0dfdb705416153819d8ea9250bbbf7ea1b249bdeb5f1122", size = 1582197, upload-time = "2025-08-10T21:27:22.604Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/41/27c70d427eddb8bc7e4f16420a20fefc6f480312122a59a959fdfe0445ad/kiwisolver-1.4.9-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d8aacd3d4b33b772542b2e01beb50187536967b514b00003bdda7589722d2a64", size = 1390125, upload-time = "2025-08-10T21:27:24.036Z" },
+    { url = "https://files.pythonhosted.org/packages/41/42/b3799a12bafc76d962ad69083f8b43b12bf4fe78b097b12e105d75c9b8f1/kiwisolver-1.4.9-cp314-cp314t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7cf974dd4e35fa315563ac99d6287a1024e4dc2077b8a7d7cd3d2fb65d283134", size = 1402612, upload-time = "2025-08-10T21:27:25.773Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/b5/a210ea073ea1cfaca1bb5c55a62307d8252f531beb364e18aa1e0888b5a0/kiwisolver-1.4.9-cp314-cp314t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:85bd218b5ecfbee8c8a82e121802dcb519a86044c9c3b2e4aef02fa05c6da370", size = 1453990, upload-time = "2025-08-10T21:27:27.089Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/ce/a829eb8c033e977d7ea03ed32fb3c1781b4fa0433fbadfff29e39c676f32/kiwisolver-1.4.9-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:0856e241c2d3df4efef7c04a1e46b1936b6120c9bcf36dd216e3acd84bc4fb21", size = 2331601, upload-time = "2025-08-10T21:27:29.343Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/4b/b5e97eb142eb9cd0072dacfcdcd31b1c66dc7352b0f7c7255d339c0edf00/kiwisolver-1.4.9-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:9af39d6551f97d31a4deebeac6f45b156f9755ddc59c07b402c148f5dbb6482a", size = 2422041, upload-time = "2025-08-10T21:27:30.754Z" },
+    { url = "https://files.pythonhosted.org/packages/40/be/8eb4cd53e1b85ba4edc3a9321666f12b83113a178845593307a3e7891f44/kiwisolver-1.4.9-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:bb4ae2b57fc1d8cbd1cf7b1d9913803681ffa903e7488012be5b76dedf49297f", size = 2594897, upload-time = "2025-08-10T21:27:32.803Z" },
+    { url = "https://files.pythonhosted.org/packages/99/dd/841e9a66c4715477ea0abc78da039832fbb09dac5c35c58dc4c41a407b8a/kiwisolver-1.4.9-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:aedff62918805fb62d43a4aa2ecd4482c380dc76cd31bd7c8878588a61bd0369", size = 2391835, upload-time = "2025-08-10T21:27:34.23Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/28/4b2e5c47a0da96896fdfdb006340ade064afa1e63675d01ea5ac222b6d52/kiwisolver-1.4.9-cp314-cp314t-win_amd64.whl", hash = "sha256:1fa333e8b2ce4d9660f2cda9c0e1b6bafcfb2457a9d259faa82289e73ec24891", size = 79988, upload-time = "2025-08-10T21:27:35.587Z" },
+    { url = "https://files.pythonhosted.org/packages/80/be/3578e8afd18c88cdf9cb4cffde75a96d2be38c5a903f1ed0ceec061bd09e/kiwisolver-1.4.9-cp314-cp314t-win_arm64.whl", hash = "sha256:4a48a2ce79d65d363597ef7b567ce3d14d68783d2b2263d98db3d9477805ba32", size = 70260, upload-time = "2025-08-10T21:27:36.606Z" },
 ]
 
 [[package]]
@@ -1822,96 +2315,34 @@ wheels = [
 
 [[package]]
 name = "latex2sympy2-extended"
-version = "1.10.1"
+version = "1.10.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "antlr4-python3-runtime" },
     { name = "sympy" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/14/8d/de5cc53dcf8a98396094b81a6478eb7234ef2e5c5fa4900dcc335d3b7fc2/latex2sympy2_extended-1.10.1.tar.gz", hash = "sha256:e365f0cb8807296c9d1f1661be926859bebd922a6139ce4be1e6572ca183ca57", size = 205808, upload-time = "2025-02-27T16:02:48.124Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/f4/de/472f9115c14c6f6d8a5889cabe3418283d708bde62ce00402c29441deed4/latex2sympy2_extended-1.10.2.tar.gz", hash = "sha256:41a517ffcc5a140e910a7d1646ce6ff440817e5f9d48fc8279d88bd0925bc389", size = 206188, upload-time = "2025-07-02T15:26:06.225Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/6d/eb/a5e8b06b924b4149cf498e1598116bad1e91ab23046c2dfc2c498154d393/latex2sympy2_extended-1.10.1-py3-none-any.whl", hash = "sha256:917a23e8f3b6edea88a56978fbbe87ed9fca4197f8277646be57b4660710347c", size = 207460, upload-time = "2025-02-27T16:02:45.941Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/60/dfbbf40e3a371388c0e03ff65b01319b7d4023e883df6d7261125772ffdc/latex2sympy2_extended-1.10.2-py3-none-any.whl", hash = "sha256:f910442c5b02a466c1046f47d05cc5285181068b882399281f30102715337fb7", size = 207855, upload-time = "2025-07-02T15:26:04.88Z" },
 ]
 
 [[package]]
-name = "libcst"
-version = "1.8.2"
+name = "liger-kernel"
+version = "0.6.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "pyyaml", marker = "python_full_version < '3.13'" },
-    { name = "pyyaml-ft", marker = "python_full_version >= '3.13'" },
+    { name = "torch", version = "2.8.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "(platform_machine != 'aarch64' and sys_platform != 'darwin') or sys_platform == 'win32'" },
+    { name = "triton", version = "3.4.0", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "platform_machine != 'aarch64' and sys_platform == 'linux'" },
+    { name = "triton", version = "3.4.0", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux') or sys_platform == 'win32'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/89/aa/b52d195b167958fe1bd106a260f64cc80ec384f6ac2a9cda874d8803df06/libcst-1.8.2.tar.gz", hash = "sha256:66e82cedba95a6176194a817be4232c720312f8be6d2c8f3847f3317d95a0c7f", size = 881534, upload-time = "2025-06-13T20:56:37.915Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/31/23/be0b4dcac42d77f99406c906567cde22a7a3d71b3f3ffdfda2ac6153ec36/liger_kernel-0.6.2.tar.gz", hash = "sha256:5c5bcffffa769bc26ae838f5a4954170dd5cacde036abb1b383039f39fa5fd69", size = 3679495, upload-time = "2025-08-22T00:15:28.456Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/31/2d/8726bf8ea8252e8fd1e48980753eef5449622c5f6cf731102bc43dcdc2c6/libcst-1.8.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2e8c1dfa854e700fcf6cd79b2796aa37d55697a74646daf5ea47c7c764bac31c", size = 2185942, upload-time = "2025-06-13T20:55:26.105Z" },
-    { url = "https://files.pythonhosted.org/packages/99/b3/565d24db8daed66eae7653c1fc1bc97793d49d5d3bcef530450ee8da882c/libcst-1.8.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2b5c57a3c1976c365678eb0730bcb140d40510990cb77df9a91bb5c41d587ba6", size = 2072622, upload-time = "2025-06-13T20:55:27.548Z" },
-    { url = "https://files.pythonhosted.org/packages/8c/d6/5a433e8a58eeb5c5d46635cfe958d0605f598d87977d4560484e3662d438/libcst-1.8.2-cp312-cp312-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:0f23409add2aaebbb6d8e881babab43c2d979f051b8bd8aed5fe779ea180a4e8", size = 2402738, upload-time = "2025-06-13T20:55:29.539Z" },
-    { url = "https://files.pythonhosted.org/packages/85/e4/0dd752c1880b570118fa91ac127589e6cf577ddcb2eef1aaf8b81ecc3f79/libcst-1.8.2-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:b88e9104c456590ad0ef0e82851d4fc03e9aa9d621fa8fdd4cd0907152a825ae", size = 2219932, upload-time = "2025-06-13T20:55:31.17Z" },
-    { url = "https://files.pythonhosted.org/packages/42/bc/fceae243c6a329477ac6d4edb887bcaa2ae7a3686158d8d9b9abb3089c37/libcst-1.8.2-cp312-cp312-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:e5ba3ea570c8fb6fc44f71aa329edc7c668e2909311913123d0d7ab8c65fc357", size = 2191891, upload-time = "2025-06-13T20:55:33.066Z" },
-    { url = "https://files.pythonhosted.org/packages/7d/7d/eb341bdc11f1147e7edeccffd0f2f785eff014e72134f5e46067472012b0/libcst-1.8.2-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:460fcf3562f078781e1504983cb11909eb27a1d46eaa99e65c4b0fafdc298298", size = 2311927, upload-time = "2025-06-13T20:55:34.614Z" },
-    { url = "https://files.pythonhosted.org/packages/d8/19/78bfc7aa5a542574d2ab0768210d084901dec5fc373103ca119905408cf2/libcst-1.8.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c1381ddbd1066d543e05d580c15beacf671e1469a0b2adb6dba58fec311f4eed", size = 2281098, upload-time = "2025-06-13T20:55:36.089Z" },
-    { url = "https://files.pythonhosted.org/packages/83/37/a41788a72dc06ed3566606f7cf50349c9918cee846eeae45d1bac03d54c2/libcst-1.8.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a70e40ce7600e1b32e293bb9157e9de3b69170e2318ccb219102f1abb826c94a", size = 2387649, upload-time = "2025-06-13T20:55:37.797Z" },
-    { url = "https://files.pythonhosted.org/packages/bb/df/7a49576c9fd55cdfd8bcfb725273aa4ee7dc41e87609f3451a4901d68057/libcst-1.8.2-cp312-cp312-win_amd64.whl", hash = "sha256:3ece08ba778b6eeea74d9c705e9af2d1b4e915e9bc6de67ad173b962e575fcc0", size = 2094574, upload-time = "2025-06-13T20:55:39.833Z" },
-    { url = "https://files.pythonhosted.org/packages/29/60/27381e194d2af08bfd0fed090c905b2732907b69da48d97d86c056d70790/libcst-1.8.2-cp312-cp312-win_arm64.whl", hash = "sha256:5efd1bf6ee5840d1b0b82ec8e0b9c64f182fa5a7c8aad680fbd918c4fa3826e0", size = 1984568, upload-time = "2025-06-13T20:55:41.511Z" },
-    { url = "https://files.pythonhosted.org/packages/11/9c/e3d4c7f1eb5c23907f905f84a4da271b60cd15b746ac794d42ea18bb105e/libcst-1.8.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:08e9dca4ab6f8551794ce7ec146f86def6a82da41750cbed2c07551345fa10d3", size = 2185848, upload-time = "2025-06-13T20:55:43.653Z" },
-    { url = "https://files.pythonhosted.org/packages/59/e0/635cbb205d42fd296c01ab5cd1ba485b0aee92bffe061de587890c81f1bf/libcst-1.8.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:8310521f2ccb79b5c4345750d475b88afa37bad930ab5554735f85ad5e3add30", size = 2072510, upload-time = "2025-06-13T20:55:45.287Z" },
-    { url = "https://files.pythonhosted.org/packages/fe/45/8911cfe9413fd690a024a1ff2c8975f060dd721160178679d3f6a21f939e/libcst-1.8.2-cp313-cp313-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:da2d8b008aff72acd5a4a588491abdda1b446f17508e700f26df9be80d8442ae", size = 2403226, upload-time = "2025-06-13T20:55:46.927Z" },
-    { url = "https://files.pythonhosted.org/packages/38/83/819d2b1b1fd870ad34ce4f34ec68704ca69bf48ef2d7665483115f267ec4/libcst-1.8.2-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:be821d874ce8b26cbadd7277fa251a9b37f6d2326f8b5682b6fc8966b50a3a59", size = 2220669, upload-time = "2025-06-13T20:55:48.597Z" },
-    { url = "https://files.pythonhosted.org/packages/d4/2f/2c4742bf834f88a9803095915c4f41cafefb7b04bde66ea86f74668b4b7b/libcst-1.8.2-cp313-cp313-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:f74b0bc7378ad5afcf25ac9d0367b4dbba50f6f6468faa41f5dfddcf8bf9c0f8", size = 2191919, upload-time = "2025-06-13T20:55:50.092Z" },
-    { url = "https://files.pythonhosted.org/packages/64/f4/107e13815f1ee5aad642d4eb4671c0273ee737f3832e3dbca9603b39f8d9/libcst-1.8.2-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:b68ea4a6018abfea1f68d50f74de7d399172684c264eb09809023e2c8696fc23", size = 2311965, upload-time = "2025-06-13T20:55:51.974Z" },
-    { url = "https://files.pythonhosted.org/packages/03/63/2948b6e4be367ad375d273a8ad00df573029cffe5ac8f6c09398c250de5b/libcst-1.8.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:2e264307ec49b2c72480422abafe80457f90b4e6e693b7ddf8a23d24b5c24001", size = 2281704, upload-time = "2025-06-13T20:55:54.036Z" },
-    { url = "https://files.pythonhosted.org/packages/c8/d3/590cde9c8c386d5f4f05fdef3394c437ea51060478a5141ff4a1f289e747/libcst-1.8.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a5d5519962ce7c72d81888fb0c09e58e308ba4c376e76bcd853b48151063d6a8", size = 2387511, upload-time = "2025-06-13T20:55:55.538Z" },
-    { url = "https://files.pythonhosted.org/packages/96/3d/ba5e36c663028043fc607dc33e5c390c7f73136fb15a890fb3710ee9d158/libcst-1.8.2-cp313-cp313-win_amd64.whl", hash = "sha256:b62aa11d6b74ed5545e58ac613d3f63095e5fd0254b3e0d1168fda991b9a6b41", size = 2094526, upload-time = "2025-06-13T20:55:57.486Z" },
-    { url = "https://files.pythonhosted.org/packages/a5/34/530ca3b972dddad562f266c81190bea29376f8ba70054ea7b45b114504cd/libcst-1.8.2-cp313-cp313-win_arm64.whl", hash = "sha256:9c2bd4ac288a9cdb7ffc3229a9ce8027a66a3fd3f2ab9e13da60f5fbfe91f3b2", size = 1984627, upload-time = "2025-06-13T20:55:59.017Z" },
-    { url = "https://files.pythonhosted.org/packages/19/9f/491f7b8d9d93444cd9bf711156ee1f122c38d25b903599e363d669acc8ab/libcst-1.8.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:08a8c7d9922ca6eed24e2c13a3c552b3c186af8fc78e5d4820b58487d780ec19", size = 2175415, upload-time = "2025-06-13T20:56:01.157Z" },
-    { url = "https://files.pythonhosted.org/packages/2e/fe/4d13437f453f92687246aa7c5138e102ee5186fe96609ee4c598bb9f9ecb/libcst-1.8.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:bba7c2b5063e8ada5a5477f9fa0c01710645426b5a8628ec50d558542a0a292e", size = 2063719, upload-time = "2025-06-13T20:56:02.787Z" },
-    { url = "https://files.pythonhosted.org/packages/94/59/758ae142c6607f275269021362b731e0f22ff5c9aa7cc67b0ed3a6bc930f/libcst-1.8.2-cp313-cp313t-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:d97c9fe13aacfbefded6861f5200dcb8e837da7391a9bdeb44ccb133705990af", size = 2380624, upload-time = "2025-06-13T20:56:04.909Z" },
-    { url = "https://files.pythonhosted.org/packages/ac/c5/31d214a0bcb3523243a9b5643b597ff653d6ec9e1f3326cfcc16bcbf185d/libcst-1.8.2-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:d2194ae959630aae4176a4b75bd320b3274c20bef2a5ca6b8d6fc96d3c608edf", size = 2208801, upload-time = "2025-06-13T20:56:06.983Z" },
-    { url = "https://files.pythonhosted.org/packages/70/16/a53f852322b266c63b492836a5c4968f192ee70fb52795a79feb4924e9ed/libcst-1.8.2-cp313-cp313t-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:0be639f5b2e1999a4b4a82a0f4633969f97336f052d0c131627983589af52f56", size = 2179557, upload-time = "2025-06-13T20:56:09.09Z" },
-    { url = "https://files.pythonhosted.org/packages/fa/49/12a5664c73107187ba3af14869d3878fca1fd4c37f6fbb9adb943cb7a791/libcst-1.8.2-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:6753e50904e05c27915933da41518ecd7a8ca4dd3602112ba44920c6e353a455", size = 2302499, upload-time = "2025-06-13T20:56:10.751Z" },
-    { url = "https://files.pythonhosted.org/packages/e9/46/2d62552a9346a040c045d6619b645d59bb707a586318121f099abd0cd5c4/libcst-1.8.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:706d07106af91c343150be86caeae1ea3851b74aa0730fcbbf8cd089e817f818", size = 2271070, upload-time = "2025-06-13T20:56:12.445Z" },
-    { url = "https://files.pythonhosted.org/packages/af/67/b625fd6ae22575255aade0a24f45e1d430b7e7279729c9c51d4faac982d2/libcst-1.8.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:dd4310ea8ddc49cc8872e083737cf806299b17f93159a1f354d59aa08993e876", size = 2380767, upload-time = "2025-06-13T20:56:13.995Z" },
-    { url = "https://files.pythonhosted.org/packages/e6/84/fb88f2ffdb045ff7323a6c05dd3d243a9eb3cb3517a6269dee43fbfb9990/libcst-1.8.2-cp313-cp313t-win_amd64.whl", hash = "sha256:51bbafdd847529e8a16d1965814ed17831af61452ee31943c414cb23451de926", size = 2083403, upload-time = "2025-06-13T20:56:15.959Z" },
-    { url = "https://files.pythonhosted.org/packages/d3/8f/da755d6d517eb8ec9664afae967b00a9b8dd567bbbb350e261359c1b47fc/libcst-1.8.2-cp313-cp313t-win_arm64.whl", hash = "sha256:4f14f5045766646ed9e8826b959c6d07194788babed1e0ba08c94ea4f39517e3", size = 1974355, upload-time = "2025-06-13T20:56:18.064Z" },
+    { url = "https://files.pythonhosted.org/packages/94/2c/68d992835e8630c1b94cdcb246ea7eecad790a955037ca3f19b6c01e8215/liger_kernel-0.6.2-py3-none-any.whl", hash = "sha256:303b9bbf5c10f9289c3139afb41e4d989e8c809516624a106b89b064163d971d", size = 192815, upload-time = "2025-08-22T00:15:27.04Z" },
 ]
 
 [[package]]
-name = "lightning"
-version = "2.5.2"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "fsspec", extra = ["http"] },
-    { name = "lightning-utilities" },
-    { name = "packaging" },
-    { name = "pytorch-lightning" },
-    { name = "pyyaml" },
-    { name = "torch" },
-    { name = "torchmetrics" },
-    { name = "tqdm" },
-    { name = "typing-extensions" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/42/3c/6a930ac7c64fb896adbe560a9141570732d9ca890a11e6d158edd5aece76/lightning-2.5.2.tar.gz", hash = "sha256:9550df613cfb22358ebf77b4a8ad45f3767cd7d26ba2d52b7f036bd3cdd701c4", size = 633391, upload-time = "2025-06-20T15:58:22.065Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/71/a9/5d39280e55dc5df9e98be074029f6b48f86fe3db4929cb9ada6401234b47/lightning-2.5.2-py3-none-any.whl", hash = "sha256:7e7f23245e214c8ec14d5d8119d3856c25cfe96f9856296fd5df4e29c2ff88a7", size = 821145, upload-time = "2025-06-20T15:58:18.609Z" },
-]
-
-[[package]]
-name = "lightning-utilities"
-version = "0.14.3"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "packaging" },
-    { name = "setuptools" },
-    { name = "typing-extensions" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/0f/bb/63a6a8c9e7a96b6ba92647fa5b1595c2dbee29f8178705adb4704d82ecba/lightning_utilities-0.14.3.tar.gz", hash = "sha256:37e2f83f273890052955a44054382c211a303012ee577619efbaa5df9e65e9f5", size = 30346, upload-time = "2025-04-03T15:59:56.928Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/1a/c1/31b3184cba7b257a4a3b5ca5b88b9204ccb7aa02fe3c992280899293ed54/lightning_utilities-0.14.3-py3-none-any.whl", hash = "sha256:4ab9066aa36cd7b93a05713808901909e96cc3f187ea6fd3052b2fd91313b468", size = 28894, upload-time = "2025-04-03T15:59:55.658Z" },
-]
-
-[[package]]
-name = "llguidance"
-version = "0.7.30"
+name = "llguidance"
+version = "0.7.30"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/bf/38/d1ef3ae08d8d857e5e0690c5b1e07bf7eb4a1cae5881d87215826dc6cadb/llguidance-0.7.30.tar.gz", hash = "sha256:e93bf75f2b6e48afb86a5cee23038746975e1654672bf5ba0ae75f7d4d4a2248", size = 1055528, upload-time = "2025-06-23T00:23:49.247Z" }
 wheels = [
@@ -1942,7 +2373,7 @@ wheels = [
 
 [[package]]
 name = "lm-format-enforcer"
-version = "0.10.11"
+version = "0.11.3"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "interegular" },
@@ -1950,51 +2381,49 @@ dependencies = [
     { name = "pydantic" },
     { name = "pyyaml" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/5b/cc/8a5bf6706385c89474161081d2eeec4dd9cef12dc29cca6acc872685ceb6/lm_format_enforcer-0.10.11.tar.gz", hash = "sha256:8ab371924e166a1df68f243aca73a8a647bea5909f37edd6a53a694e7e7c3274", size = 39390, upload-time = "2025-02-26T22:18:45.338Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/84/d5/41cd417ba7dfdbbcfe46cebf81fb3dfd7c591b89897560ad05bb410a465d/lm_format_enforcer-0.11.3.tar.gz", hash = "sha256:e68081c108719cce284a9bcc889709b26ffb085a1945b5eba3a12cfa96d528da", size = 40258, upload-time = "2025-08-24T19:37:47.527Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/06/cb/bf172960241842e953b3354247f792aae2fc5221552a0741a1c98f35b6f7/lm_format_enforcer-0.10.11-py3-none-any.whl", hash = "sha256:563e0dbc930a6d50fb687951506c5de098c6e962601be0ce723f3b7d0b916a1b", size = 44229, upload-time = "2025-02-26T22:18:42.543Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/ef/11292bb0b85cf4c93447cab5a29f64576ed14d3ab4280e35ddd23486594a/lm_format_enforcer-0.11.3-py3-none-any.whl", hash = "sha256:cf586350875def1ae7a8fba84fcbbfc8371424b6c9d05c1fcba70aa233fbf06f", size = 45418, upload-time = "2025-08-24T19:37:46.325Z" },
 ]
 
 [[package]]
 name = "lxml"
-version = "5.4.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/76/3d/14e82fc7c8fb1b7761f7e748fd47e2ec8276d137b6acfe5a4bb73853e08f/lxml-5.4.0.tar.gz", hash = "sha256:d12832e1dbea4be280b22fd0ea7c9b87f0d8fc51ba06e92dc62d52f804f78ebd", size = 3679479, upload-time = "2025-04-23T01:50:29.322Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/f8/4c/d101ace719ca6a4ec043eb516fcfcb1b396a9fccc4fcd9ef593df34ba0d5/lxml-5.4.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:b5aff6f3e818e6bdbbb38e5967520f174b18f539c2b9de867b1e7fde6f8d95a4", size = 8127392, upload-time = "2025-04-23T01:46:04.09Z" },
-    { url = "https://files.pythonhosted.org/packages/11/84/beddae0cec4dd9ddf46abf156f0af451c13019a0fa25d7445b655ba5ccb7/lxml-5.4.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:942a5d73f739ad7c452bf739a62a0f83e2578afd6b8e5406308731f4ce78b16d", size = 4415103, upload-time = "2025-04-23T01:46:07.227Z" },
-    { url = "https://files.pythonhosted.org/packages/d0/25/d0d93a4e763f0462cccd2b8a665bf1e4343dd788c76dcfefa289d46a38a9/lxml-5.4.0-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:460508a4b07364d6abf53acaa0a90b6d370fafde5693ef37602566613a9b0779", size = 5024224, upload-time = "2025-04-23T01:46:10.237Z" },
-    { url = "https://files.pythonhosted.org/packages/31/ce/1df18fb8f7946e7f3388af378b1f34fcf253b94b9feedb2cec5969da8012/lxml-5.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:529024ab3a505fed78fe3cc5ddc079464e709f6c892733e3f5842007cec8ac6e", size = 4769913, upload-time = "2025-04-23T01:46:12.757Z" },
-    { url = "https://files.pythonhosted.org/packages/4e/62/f4a6c60ae7c40d43657f552f3045df05118636be1165b906d3423790447f/lxml-5.4.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7ca56ebc2c474e8f3d5761debfd9283b8b18c76c4fc0967b74aeafba1f5647f9", size = 5290441, upload-time = "2025-04-23T01:46:16.037Z" },
-    { url = "https://files.pythonhosted.org/packages/9e/aa/04f00009e1e3a77838c7fc948f161b5d2d5de1136b2b81c712a263829ea4/lxml-5.4.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a81e1196f0a5b4167a8dafe3a66aa67c4addac1b22dc47947abd5d5c7a3f24b5", size = 4820165, upload-time = "2025-04-23T01:46:19.137Z" },
-    { url = "https://files.pythonhosted.org/packages/c9/1f/e0b2f61fa2404bf0f1fdf1898377e5bd1b74cc9b2cf2c6ba8509b8f27990/lxml-5.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00b8686694423ddae324cf614e1b9659c2edb754de617703c3d29ff568448df5", size = 4932580, upload-time = "2025-04-23T01:46:21.963Z" },
-    { url = "https://files.pythonhosted.org/packages/24/a2/8263f351b4ffe0ed3e32ea7b7830f845c795349034f912f490180d88a877/lxml-5.4.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:c5681160758d3f6ac5b4fea370495c48aac0989d6a0f01bb9a72ad8ef5ab75c4", size = 4759493, upload-time = "2025-04-23T01:46:24.316Z" },
-    { url = "https://files.pythonhosted.org/packages/05/00/41db052f279995c0e35c79d0f0fc9f8122d5b5e9630139c592a0b58c71b4/lxml-5.4.0-cp312-cp312-manylinux_2_28_ppc64le.whl", hash = "sha256:2dc191e60425ad70e75a68c9fd90ab284df64d9cd410ba8d2b641c0c45bc006e", size = 5324679, upload-time = "2025-04-23T01:46:27.097Z" },
-    { url = "https://files.pythonhosted.org/packages/1d/be/ee99e6314cdef4587617d3b3b745f9356d9b7dd12a9663c5f3b5734b64ba/lxml-5.4.0-cp312-cp312-manylinux_2_28_s390x.whl", hash = "sha256:67f779374c6b9753ae0a0195a892a1c234ce8416e4448fe1e9f34746482070a7", size = 4890691, upload-time = "2025-04-23T01:46:30.009Z" },
-    { url = "https://files.pythonhosted.org/packages/ad/36/239820114bf1d71f38f12208b9c58dec033cbcf80101cde006b9bde5cffd/lxml-5.4.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:79d5bfa9c1b455336f52343130b2067164040604e41f6dc4d8313867ed540079", size = 4955075, upload-time = "2025-04-23T01:46:32.33Z" },
-    { url = "https://files.pythonhosted.org/packages/d4/e1/1b795cc0b174efc9e13dbd078a9ff79a58728a033142bc6d70a1ee8fc34d/lxml-5.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3d3c30ba1c9b48c68489dc1829a6eede9873f52edca1dda900066542528d6b20", size = 4838680, upload-time = "2025-04-23T01:46:34.852Z" },
-    { url = "https://files.pythonhosted.org/packages/72/48/3c198455ca108cec5ae3662ae8acd7fd99476812fd712bb17f1b39a0b589/lxml-5.4.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:1af80c6316ae68aded77e91cd9d80648f7dd40406cef73df841aa3c36f6907c8", size = 5391253, upload-time = "2025-04-23T01:46:37.608Z" },
-    { url = "https://files.pythonhosted.org/packages/d6/10/5bf51858971c51ec96cfc13e800a9951f3fd501686f4c18d7d84fe2d6352/lxml-5.4.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:4d885698f5019abe0de3d352caf9466d5de2baded00a06ef3f1216c1a58ae78f", size = 5261651, upload-time = "2025-04-23T01:46:40.183Z" },
-    { url = "https://files.pythonhosted.org/packages/2b/11/06710dd809205377da380546f91d2ac94bad9ff735a72b64ec029f706c85/lxml-5.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:aea53d51859b6c64e7c51d522c03cc2c48b9b5d6172126854cc7f01aa11f52bc", size = 5024315, upload-time = "2025-04-23T01:46:43.333Z" },
-    { url = "https://files.pythonhosted.org/packages/f5/b0/15b6217834b5e3a59ebf7f53125e08e318030e8cc0d7310355e6edac98ef/lxml-5.4.0-cp312-cp312-win32.whl", hash = "sha256:d90b729fd2732df28130c064aac9bb8aff14ba20baa4aee7bd0795ff1187545f", size = 3486149, upload-time = "2025-04-23T01:46:45.684Z" },
-    { url = "https://files.pythonhosted.org/packages/91/1e/05ddcb57ad2f3069101611bd5f5084157d90861a2ef460bf42f45cced944/lxml-5.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:1dc4ca99e89c335a7ed47d38964abcb36c5910790f9bd106f2a8fa2ee0b909d2", size = 3817095, upload-time = "2025-04-23T01:46:48.521Z" },
-    { url = "https://files.pythonhosted.org/packages/87/cb/2ba1e9dd953415f58548506fa5549a7f373ae55e80c61c9041b7fd09a38a/lxml-5.4.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:773e27b62920199c6197130632c18fb7ead3257fce1ffb7d286912e56ddb79e0", size = 8110086, upload-time = "2025-04-23T01:46:52.218Z" },
-    { url = "https://files.pythonhosted.org/packages/b5/3e/6602a4dca3ae344e8609914d6ab22e52ce42e3e1638c10967568c5c1450d/lxml-5.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ce9c671845de9699904b1e9df95acfe8dfc183f2310f163cdaa91a3535af95de", size = 4404613, upload-time = "2025-04-23T01:46:55.281Z" },
-    { url = "https://files.pythonhosted.org/packages/4c/72/bf00988477d3bb452bef9436e45aeea82bb40cdfb4684b83c967c53909c7/lxml-5.4.0-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9454b8d8200ec99a224df8854786262b1bd6461f4280064c807303c642c05e76", size = 5012008, upload-time = "2025-04-23T01:46:57.817Z" },
-    { url = "https://files.pythonhosted.org/packages/92/1f/93e42d93e9e7a44b2d3354c462cd784dbaaf350f7976b5d7c3f85d68d1b1/lxml-5.4.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cccd007d5c95279e529c146d095f1d39ac05139de26c098166c4beb9374b0f4d", size = 4760915, upload-time = "2025-04-23T01:47:00.745Z" },
-    { url = "https://files.pythonhosted.org/packages/45/0b/363009390d0b461cf9976a499e83b68f792e4c32ecef092f3f9ef9c4ba54/lxml-5.4.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0fce1294a0497edb034cb416ad3e77ecc89b313cff7adbee5334e4dc0d11f422", size = 5283890, upload-time = "2025-04-23T01:47:04.702Z" },
-    { url = "https://files.pythonhosted.org/packages/19/dc/6056c332f9378ab476c88e301e6549a0454dbee8f0ae16847414f0eccb74/lxml-5.4.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:24974f774f3a78ac12b95e3a20ef0931795ff04dbb16db81a90c37f589819551", size = 4812644, upload-time = "2025-04-23T01:47:07.833Z" },
-    { url = "https://files.pythonhosted.org/packages/ee/8a/f8c66bbb23ecb9048a46a5ef9b495fd23f7543df642dabeebcb2eeb66592/lxml-5.4.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:497cab4d8254c2a90bf988f162ace2ddbfdd806fce3bda3f581b9d24c852e03c", size = 4921817, upload-time = "2025-04-23T01:47:10.317Z" },
-    { url = "https://files.pythonhosted.org/packages/04/57/2e537083c3f381f83d05d9b176f0d838a9e8961f7ed8ddce3f0217179ce3/lxml-5.4.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:e794f698ae4c5084414efea0f5cc9f4ac562ec02d66e1484ff822ef97c2cadff", size = 4753916, upload-time = "2025-04-23T01:47:12.823Z" },
-    { url = "https://files.pythonhosted.org/packages/d8/80/ea8c4072109a350848f1157ce83ccd9439601274035cd045ac31f47f3417/lxml-5.4.0-cp313-cp313-manylinux_2_28_ppc64le.whl", hash = "sha256:2c62891b1ea3094bb12097822b3d44b93fc6c325f2043c4d2736a8ff09e65f60", size = 5289274, upload-time = "2025-04-23T01:47:15.916Z" },
-    { url = "https://files.pythonhosted.org/packages/b3/47/c4be287c48cdc304483457878a3f22999098b9a95f455e3c4bda7ec7fc72/lxml-5.4.0-cp313-cp313-manylinux_2_28_s390x.whl", hash = "sha256:142accb3e4d1edae4b392bd165a9abdee8a3c432a2cca193df995bc3886249c8", size = 4874757, upload-time = "2025-04-23T01:47:19.793Z" },
-    { url = "https://files.pythonhosted.org/packages/2f/04/6ef935dc74e729932e39478e44d8cfe6a83550552eaa072b7c05f6f22488/lxml-5.4.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:1a42b3a19346e5601d1b8296ff6ef3d76038058f311902edd574461e9c036982", size = 4947028, upload-time = "2025-04-23T01:47:22.401Z" },
-    { url = "https://files.pythonhosted.org/packages/cb/f9/c33fc8daa373ef8a7daddb53175289024512b6619bc9de36d77dca3df44b/lxml-5.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4291d3c409a17febf817259cb37bc62cb7eb398bcc95c1356947e2871911ae61", size = 4834487, upload-time = "2025-04-23T01:47:25.513Z" },
-    { url = "https://files.pythonhosted.org/packages/8d/30/fc92bb595bcb878311e01b418b57d13900f84c2b94f6eca9e5073ea756e6/lxml-5.4.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:4f5322cf38fe0e21c2d73901abf68e6329dc02a4994e483adbcf92b568a09a54", size = 5381688, upload-time = "2025-04-23T01:47:28.454Z" },
-    { url = "https://files.pythonhosted.org/packages/43/d1/3ba7bd978ce28bba8e3da2c2e9d5ae3f8f521ad3f0ca6ea4788d086ba00d/lxml-5.4.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:0be91891bdb06ebe65122aa6bf3fc94489960cf7e03033c6f83a90863b23c58b", size = 5242043, upload-time = "2025-04-23T01:47:31.208Z" },
-    { url = "https://files.pythonhosted.org/packages/ee/cd/95fa2201041a610c4d08ddaf31d43b98ecc4b1d74b1e7245b1abdab443cb/lxml-5.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:15a665ad90054a3d4f397bc40f73948d48e36e4c09f9bcffc7d90c87410e478a", size = 5021569, upload-time = "2025-04-23T01:47:33.805Z" },
-    { url = "https://files.pythonhosted.org/packages/2d/a6/31da006fead660b9512d08d23d31e93ad3477dd47cc42e3285f143443176/lxml-5.4.0-cp313-cp313-win32.whl", hash = "sha256:d5663bc1b471c79f5c833cffbc9b87d7bf13f87e055a5c86c363ccd2348d7e82", size = 3485270, upload-time = "2025-04-23T01:47:36.133Z" },
-    { url = "https://files.pythonhosted.org/packages/fc/14/c115516c62a7d2499781d2d3d7215218c0731b2c940753bf9f9b7b73924d/lxml-5.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:bcb7a1096b4b6b24ce1ac24d4942ad98f983cd3810f9711bcd0293f43a9d8b9f", size = 3814606, upload-time = "2025-04-23T01:47:39.028Z" },
+version = "6.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/c5/ed/60eb6fa2923602fba988d9ca7c5cdbd7cf25faa795162ed538b527a35411/lxml-6.0.0.tar.gz", hash = "sha256:032e65120339d44cdc3efc326c9f660f5f7205f3a535c1fdbf898b29ea01fb72", size = 4096938, upload-time = "2025-06-26T16:28:19.373Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/89/c3/d01d735c298d7e0ddcedf6f028bf556577e5ab4f4da45175ecd909c79378/lxml-6.0.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:78718d8454a6e928470d511bf8ac93f469283a45c354995f7d19e77292f26108", size = 8429515, upload-time = "2025-06-26T16:26:06.776Z" },
+    { url = "https://files.pythonhosted.org/packages/06/37/0e3eae3043d366b73da55a86274a590bae76dc45aa004b7042e6f97803b1/lxml-6.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:84ef591495ffd3f9dcabffd6391db7bb70d7230b5c35ef5148354a134f56f2be", size = 4601387, upload-time = "2025-06-26T16:26:09.511Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/28/e1a9a881e6d6e29dda13d633885d13acb0058f65e95da67841c8dd02b4a8/lxml-6.0.0-cp312-cp312-manylinux2010_i686.manylinux2014_i686.manylinux_2_12_i686.manylinux_2_17_i686.whl", hash = "sha256:2930aa001a3776c3e2601cb8e0a15d21b8270528d89cc308be4843ade546b9ab", size = 5228928, upload-time = "2025-06-26T16:26:12.337Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/55/2cb24ea48aa30c99f805921c1c7860c1f45c0e811e44ee4e6a155668de06/lxml-6.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:219e0431ea8006e15005767f0351e3f7f9143e793e58519dc97fe9e07fae5563", size = 4952289, upload-time = "2025-06-28T18:47:25.602Z" },
+    { url = "https://files.pythonhosted.org/packages/31/c0/b25d9528df296b9a3306ba21ff982fc5b698c45ab78b94d18c2d6ae71fd9/lxml-6.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bd5913b4972681ffc9718bc2d4c53cde39ef81415e1671ff93e9aa30b46595e7", size = 5111310, upload-time = "2025-06-28T18:47:28.136Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/af/681a8b3e4f668bea6e6514cbcb297beb6de2b641e70f09d3d78655f4f44c/lxml-6.0.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:390240baeb9f415a82eefc2e13285016f9c8b5ad71ec80574ae8fa9605093cd7", size = 5025457, upload-time = "2025-06-26T16:26:15.068Z" },
+    { url = "https://files.pythonhosted.org/packages/99/b6/3a7971aa05b7be7dfebc7ab57262ec527775c2c3c5b2f43675cac0458cad/lxml-6.0.0-cp312-cp312-manylinux_2_27_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d6e200909a119626744dd81bae409fc44134389e03fbf1d68ed2a55a2fb10991", size = 5657016, upload-time = "2025-07-03T19:19:06.008Z" },
+    { url = "https://files.pythonhosted.org/packages/69/f8/693b1a10a891197143c0673fcce5b75fc69132afa81a36e4568c12c8faba/lxml-6.0.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ca50bd612438258a91b5b3788c6621c1f05c8c478e7951899f492be42defc0da", size = 5257565, upload-time = "2025-06-26T16:26:17.906Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/96/e08ff98f2c6426c98c8964513c5dab8d6eb81dadcd0af6f0c538ada78d33/lxml-6.0.0-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:c24b8efd9c0f62bad0439283c2c795ef916c5a6b75f03c17799775c7ae3c0c9e", size = 4713390, upload-time = "2025-06-26T16:26:20.292Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/83/6184aba6cc94d7413959f6f8f54807dc318fdcd4985c347fe3ea6937f772/lxml-6.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:afd27d8629ae94c5d863e32ab0e1d5590371d296b87dae0a751fb22bf3685741", size = 5066103, upload-time = "2025-06-26T16:26:22.765Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/01/8bf1f4035852d0ff2e36a4d9aacdbcc57e93a6cd35a54e05fa984cdf73ab/lxml-6.0.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:54c4855eabd9fc29707d30141be99e5cd1102e7d2258d2892314cf4c110726c3", size = 4791428, upload-time = "2025-06-26T16:26:26.461Z" },
+    { url = "https://files.pythonhosted.org/packages/29/31/c0267d03b16954a85ed6b065116b621d37f559553d9339c7dcc4943a76f1/lxml-6.0.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:c907516d49f77f6cd8ead1322198bdfd902003c3c330c77a1c5f3cc32a0e4d16", size = 5678523, upload-time = "2025-07-03T19:19:09.837Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/f7/5495829a864bc5f8b0798d2b52a807c89966523140f3d6fa3a58ab6720ea/lxml-6.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:36531f81c8214e293097cd2b7873f178997dae33d3667caaae8bdfb9666b76c0", size = 5281290, upload-time = "2025-06-26T16:26:29.406Z" },
+    { url = "https://files.pythonhosted.org/packages/79/56/6b8edb79d9ed294ccc4e881f4db1023af56ba451909b9ce79f2a2cd7c532/lxml-6.0.0-cp312-cp312-win32.whl", hash = "sha256:690b20e3388a7ec98e899fd54c924e50ba6693874aa65ef9cb53de7f7de9d64a", size = 3613495, upload-time = "2025-06-26T16:26:31.588Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/1e/cc32034b40ad6af80b6fd9b66301fc0f180f300002e5c3eb5a6110a93317/lxml-6.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:310b719b695b3dd442cdfbbe64936b2f2e231bb91d998e99e6f0daf991a3eba3", size = 4014711, upload-time = "2025-06-26T16:26:33.723Z" },
+    { url = "https://files.pythonhosted.org/packages/55/10/dc8e5290ae4c94bdc1a4c55865be7e1f31dfd857a88b21cbba68b5fea61b/lxml-6.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:8cb26f51c82d77483cdcd2b4a53cda55bbee29b3c2f3ddeb47182a2a9064e4eb", size = 3674431, upload-time = "2025-06-26T16:26:35.959Z" },
+    { url = "https://files.pythonhosted.org/packages/79/21/6e7c060822a3c954ff085e5e1b94b4a25757c06529eac91e550f3f5cd8b8/lxml-6.0.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6da7cd4f405fd7db56e51e96bff0865b9853ae70df0e6720624049da76bde2da", size = 8414372, upload-time = "2025-06-26T16:26:39.079Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/f6/051b1607a459db670fc3a244fa4f06f101a8adf86cda263d1a56b3a4f9d5/lxml-6.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b34339898bb556a2351a1830f88f751679f343eabf9cf05841c95b165152c9e7", size = 4593940, upload-time = "2025-06-26T16:26:41.891Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/74/dd595d92a40bda3c687d70d4487b2c7eff93fd63b568acd64fedd2ba00fe/lxml-6.0.0-cp313-cp313-manylinux2010_i686.manylinux2014_i686.manylinux_2_12_i686.manylinux_2_17_i686.whl", hash = "sha256:51a5e4c61a4541bd1cd3ba74766d0c9b6c12d6a1a4964ef60026832aac8e79b3", size = 5214329, upload-time = "2025-06-26T16:26:44.669Z" },
+    { url = "https://files.pythonhosted.org/packages/52/46/3572761efc1bd45fcafb44a63b3b0feeb5b3f0066886821e94b0254f9253/lxml-6.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d18a25b19ca7307045581b18b3ec9ead2b1db5ccd8719c291f0cd0a5cec6cb81", size = 4947559, upload-time = "2025-06-28T18:47:31.091Z" },
+    { url = "https://files.pythonhosted.org/packages/94/8a/5e40de920e67c4f2eef9151097deb9b52d86c95762d8ee238134aff2125d/lxml-6.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d4f0c66df4386b75d2ab1e20a489f30dc7fd9a06a896d64980541506086be1f1", size = 5102143, upload-time = "2025-06-28T18:47:33.612Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/4b/20555bdd75d57945bdabfbc45fdb1a36a1a0ff9eae4653e951b2b79c9209/lxml-6.0.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9f4b481b6cc3a897adb4279216695150bbe7a44c03daba3c894f49d2037e0a24", size = 5021931, upload-time = "2025-06-26T16:26:47.503Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/6e/cf03b412f3763d4ca23b25e70c96a74cfece64cec3addf1c4ec639586b13/lxml-6.0.0-cp313-cp313-manylinux_2_27_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8a78d6c9168f5bcb20971bf3329c2b83078611fbe1f807baadc64afc70523b3a", size = 5645469, upload-time = "2025-07-03T19:19:13.32Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/dd/39c8507c16db6031f8c1ddf70ed95dbb0a6d466a40002a3522c128aba472/lxml-6.0.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2ae06fbab4f1bb7db4f7c8ca9897dc8db4447d1a2b9bee78474ad403437bcc29", size = 5247467, upload-time = "2025-06-26T16:26:49.998Z" },
+    { url = "https://files.pythonhosted.org/packages/4d/56/732d49def0631ad633844cfb2664563c830173a98d5efd9b172e89a4800d/lxml-6.0.0-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:1fa377b827ca2023244a06554c6e7dc6828a10aaf74ca41965c5d8a4925aebb4", size = 4720601, upload-time = "2025-06-26T16:26:52.564Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/7f/6b956fab95fa73462bca25d1ea7fc8274ddf68fb8e60b78d56c03b65278e/lxml-6.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1676b56d48048a62ef77a250428d1f31f610763636e0784ba67a9740823988ca", size = 5060227, upload-time = "2025-06-26T16:26:55.054Z" },
+    { url = "https://files.pythonhosted.org/packages/97/06/e851ac2924447e8b15a294855caf3d543424364a143c001014d22c8ca94c/lxml-6.0.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:0e32698462aacc5c1cf6bdfebc9c781821b7e74c79f13e5ffc8bfe27c42b1abf", size = 4790637, upload-time = "2025-06-26T16:26:57.384Z" },
+    { url = "https://files.pythonhosted.org/packages/06/d4/fd216f3cd6625022c25b336c7570d11f4a43adbaf0a56106d3d496f727a7/lxml-6.0.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:4d6036c3a296707357efb375cfc24bb64cd955b9ec731abf11ebb1e40063949f", size = 5662049, upload-time = "2025-07-03T19:19:16.409Z" },
+    { url = "https://files.pythonhosted.org/packages/52/03/0e764ce00b95e008d76b99d432f1807f3574fb2945b496a17807a1645dbd/lxml-6.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7488a43033c958637b1a08cddc9188eb06d3ad36582cebc7d4815980b47e27ef", size = 5272430, upload-time = "2025-06-26T16:27:00.031Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/01/d48cc141bc47bc1644d20fe97bbd5e8afb30415ec94f146f2f76d0d9d098/lxml-6.0.0-cp313-cp313-win32.whl", hash = "sha256:5fcd7d3b1d8ecb91445bd71b9c88bdbeae528fefee4f379895becfc72298d181", size = 3612896, upload-time = "2025-06-26T16:27:04.251Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/87/6456b9541d186ee7d4cb53bf1b9a0d7f3b1068532676940fdd594ac90865/lxml-6.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:2f34687222b78fff795feeb799a7d44eca2477c3d9d3a46ce17d51a4f383e32e", size = 4013132, upload-time = "2025-06-26T16:27:06.415Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/42/85b3aa8f06ca0d24962f8100f001828e1f1f1a38c954c16e71154ed7d53a/lxml-6.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:21db1ec5525780fd07251636eb5f7acb84003e9382c72c18c542a87c416ade03", size = 3672642, upload-time = "2025-06-26T16:27:09.888Z" },
 ]
 
 [[package]]
@@ -2009,6 +2438,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/87/fb/99f81ac72ae23375f22b7afdb7642aba97c00a713c217124420147681a2f/mako-1.3.10-py3-none-any.whl", hash = "sha256:baef24a52fc4fc514a0887ac600f9f1cff3d82c61d4d700a1fa84d597b88db59", size = 78509, upload-time = "2025-04-10T12:50:53.297Z" },
 ]
 
+[[package]]
+name = "mamba-ssm"
+version = "2.2.4"
+source = { git = "https://github.com/state-spaces/mamba.git?rev=2e16fc3062cdcd4ebef27a9aa4442676e1c7edf4#2e16fc3062cdcd4ebef27a9aa4442676e1c7edf4" }
+dependencies = [
+    { name = "causal-conv1d" },
+    { name = "ninja" },
+    { name = "packaging" },
+    { name = "torch", version = "2.8.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
+    { name = "torch", version = "2.8.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin'" },
+]
+
 [[package]]
 name = "markdown"
 version = "3.8.2"
@@ -2070,19 +2511,19 @@ wheels = [
 
 [[package]]
 name = "math-verify"
-version = "0.7.0"
+version = "0.8.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "latex2sympy2-extended" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/ab/59/7daf53e52593af0f7f455ea97c91dc34682f1c2e72f493490259a640836c/math_verify-0.7.0.tar.gz", hash = "sha256:b7459c7b9f1c503b7df32566a95f1e6a33a6cad8cbb759eaa6f100d401112024", size = 57582, upload-time = "2025-02-27T16:21:04.151Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/35/b5/b1db6fa6b6c28ebbe1889ee11a4703a72a2ca7750ec415f4559c758cf01a/math_verify-0.8.0.tar.gz", hash = "sha256:3295e0adb94bfe553ff6e3189c44f1916a85aa24ab5d1900f2086a706e28f7c4", size = 60191, upload-time = "2025-07-02T15:52:07.209Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/44/c5/e5de2e29f9db4c92956ce167e1aa534b1e1770ddc0b17600257cf5f8bd50/math_verify-0.7.0-py3-none-any.whl", hash = "sha256:bbdd491b511f6ceef27c5e08543affa1735807b6e3abb6f005bf493dc0eb485b", size = 28677, upload-time = "2025-02-27T16:20:56.612Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/9f/59979f699b5c97334298f1295bc9fcdc9904d98d2276479bffff863d23b1/math_verify-0.8.0-py3-none-any.whl", hash = "sha256:31ca651296d817a9bb3fd58ca1fd0d192dcea709b1e5ecf2d0a4514c16f89087", size = 29994, upload-time = "2025-07-02T15:52:05.023Z" },
 ]
 
 [[package]]
 name = "matplotlib"
-version = "3.10.3"
+version = "3.10.5"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "contourpy" },
@@ -2095,38 +2536,55 @@ dependencies = [
     { name = "pyparsing" },
     { name = "python-dateutil" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/26/91/d49359a21893183ed2a5b6c76bec40e0b1dcbf8ca148f864d134897cfc75/matplotlib-3.10.3.tar.gz", hash = "sha256:2f82d2c5bb7ae93aaaa4cd42aca65d76ce6376f83304fa3a630b569aca274df0", size = 34799811, upload-time = "2025-05-08T19:10:54.39Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/eb/43/6b80eb47d1071f234ef0c96ca370c2ca621f91c12045f1401b5c9b28a639/matplotlib-3.10.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:0ab1affc11d1f495ab9e6362b8174a25afc19c081ba5b0775ef00533a4236eea", size = 8179689, upload-time = "2025-05-08T19:10:07.602Z" },
-    { url = "https://files.pythonhosted.org/packages/0f/70/d61a591958325c357204870b5e7b164f93f2a8cca1dc6ce940f563909a13/matplotlib-3.10.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2a818d8bdcafa7ed2eed74487fdb071c09c1ae24152d403952adad11fa3c65b4", size = 8050466, upload-time = "2025-05-08T19:10:09.383Z" },
-    { url = "https://files.pythonhosted.org/packages/e7/75/70c9d2306203148cc7902a961240c5927dd8728afedf35e6a77e105a2985/matplotlib-3.10.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:748ebc3470c253e770b17d8b0557f0aa85cf8c63fd52f1a61af5b27ec0b7ffee", size = 8456252, upload-time = "2025-05-08T19:10:11.958Z" },
-    { url = "https://files.pythonhosted.org/packages/c4/91/ba0ae1ff4b3f30972ad01cd4a8029e70a0ec3b8ea5be04764b128b66f763/matplotlib-3.10.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ed70453fd99733293ace1aec568255bc51c6361cb0da94fa5ebf0649fdb2150a", size = 8601321, upload-time = "2025-05-08T19:10:14.47Z" },
-    { url = "https://files.pythonhosted.org/packages/d2/88/d636041eb54a84b889e11872d91f7cbf036b3b0e194a70fa064eb8b04f7a/matplotlib-3.10.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:dbed9917b44070e55640bd13419de83b4c918e52d97561544814ba463811cbc7", size = 9406972, upload-time = "2025-05-08T19:10:16.569Z" },
-    { url = "https://files.pythonhosted.org/packages/b1/79/0d1c165eac44405a86478082e225fce87874f7198300bbebc55faaf6d28d/matplotlib-3.10.3-cp312-cp312-win_amd64.whl", hash = "sha256:cf37d8c6ef1a48829443e8ba5227b44236d7fcaf7647caa3178a4ff9f7a5be05", size = 8067954, upload-time = "2025-05-08T19:10:18.663Z" },
-    { url = "https://files.pythonhosted.org/packages/3b/c1/23cfb566a74c696a3b338d8955c549900d18fe2b898b6e94d682ca21e7c2/matplotlib-3.10.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:9f2efccc8dcf2b86fc4ee849eea5dcaecedd0773b30f47980dc0cbeabf26ec84", size = 8180318, upload-time = "2025-05-08T19:10:20.426Z" },
-    { url = "https://files.pythonhosted.org/packages/6c/0c/02f1c3b66b30da9ee343c343acbb6251bef5b01d34fad732446eaadcd108/matplotlib-3.10.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3ddbba06a6c126e3301c3d272a99dcbe7f6c24c14024e80307ff03791a5f294e", size = 8051132, upload-time = "2025-05-08T19:10:22.569Z" },
-    { url = "https://files.pythonhosted.org/packages/b4/ab/8db1a5ac9b3a7352fb914133001dae889f9fcecb3146541be46bed41339c/matplotlib-3.10.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:748302b33ae9326995b238f606e9ed840bf5886ebafcb233775d946aa8107a15", size = 8457633, upload-time = "2025-05-08T19:10:24.749Z" },
-    { url = "https://files.pythonhosted.org/packages/f5/64/41c4367bcaecbc03ef0d2a3ecee58a7065d0a36ae1aa817fe573a2da66d4/matplotlib-3.10.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a80fcccbef63302c0efd78042ea3c2436104c5b1a4d3ae20f864593696364ac7", size = 8601031, upload-time = "2025-05-08T19:10:27.03Z" },
-    { url = "https://files.pythonhosted.org/packages/12/6f/6cc79e9e5ab89d13ed64da28898e40fe5b105a9ab9c98f83abd24e46d7d7/matplotlib-3.10.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:55e46cbfe1f8586adb34f7587c3e4f7dedc59d5226719faf6cb54fc24f2fd52d", size = 9406988, upload-time = "2025-05-08T19:10:29.056Z" },
-    { url = "https://files.pythonhosted.org/packages/b1/0f/eed564407bd4d935ffabf561ed31099ed609e19287409a27b6d336848653/matplotlib-3.10.3-cp313-cp313-win_amd64.whl", hash = "sha256:151d89cb8d33cb23345cd12490c76fd5d18a56581a16d950b48c6ff19bb2ab93", size = 8068034, upload-time = "2025-05-08T19:10:31.221Z" },
-    { url = "https://files.pythonhosted.org/packages/3e/e5/2f14791ff69b12b09e9975e1d116d9578ac684460860ce542c2588cb7a1c/matplotlib-3.10.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:c26dd9834e74d164d06433dc7be5d75a1e9890b926b3e57e74fa446e1a62c3e2", size = 8218223, upload-time = "2025-05-08T19:10:33.114Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/08/30a94afd828b6e02d0a52cae4a29d6e9ccfcf4c8b56cc28b021d3588873e/matplotlib-3.10.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:24853dad5b8c84c8c2390fc31ce4858b6df504156893292ce8092d190ef8151d", size = 8094985, upload-time = "2025-05-08T19:10:35.337Z" },
-    { url = "https://files.pythonhosted.org/packages/89/44/f3bc6b53066c889d7a1a3ea8094c13af6a667c5ca6220ec60ecceec2dabe/matplotlib-3.10.3-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:68f7878214d369d7d4215e2a9075fef743be38fa401d32e6020bab2dfabaa566", size = 8483109, upload-time = "2025-05-08T19:10:37.611Z" },
-    { url = "https://files.pythonhosted.org/packages/ba/c7/473bc559beec08ebee9f86ca77a844b65747e1a6c2691e8c92e40b9f42a8/matplotlib-3.10.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f6929fc618cb6db9cb75086f73b3219bbb25920cb24cee2ea7a12b04971a4158", size = 8618082, upload-time = "2025-05-08T19:10:39.892Z" },
-    { url = "https://files.pythonhosted.org/packages/d8/e9/6ce8edd264c8819e37bbed8172e0ccdc7107fe86999b76ab5752276357a4/matplotlib-3.10.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6c7818292a5cc372a2dc4c795e5c356942eb8350b98ef913f7fda51fe175ac5d", size = 9413699, upload-time = "2025-05-08T19:10:42.376Z" },
-    { url = "https://files.pythonhosted.org/packages/1b/92/9a45c91089c3cf690b5badd4be81e392ff086ccca8a1d4e3a08463d8a966/matplotlib-3.10.3-cp313-cp313t-win_amd64.whl", hash = "sha256:4f23ffe95c5667ef8a2b56eea9b53db7f43910fa4a2d5472ae0f72b64deab4d5", size = 8139044, upload-time = "2025-05-08T19:10:44.551Z" },
+sdist = { url = "https://files.pythonhosted.org/packages/43/91/f2939bb60b7ebf12478b030e0d7f340247390f402b3b189616aad790c366/matplotlib-3.10.5.tar.gz", hash = "sha256:352ed6ccfb7998a00881692f38b4ca083c691d3e275b4145423704c34c909076", size = 34804044, upload-time = "2025-07-31T18:09:33.805Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/66/1e/c6f6bcd882d589410b475ca1fc22e34e34c82adff519caf18f3e6dd9d682/matplotlib-3.10.5-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:00b6feadc28a08bd3c65b2894f56cf3c94fc8f7adcbc6ab4516ae1e8ed8f62e2", size = 8253056, upload-time = "2025-07-31T18:08:05.385Z" },
+    { url = "https://files.pythonhosted.org/packages/53/e6/d6f7d1b59413f233793dda14419776f5f443bcccb2dfc84b09f09fe05dbe/matplotlib-3.10.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ee98a5c5344dc7f48dc261b6ba5d9900c008fc12beb3fa6ebda81273602cc389", size = 8110131, upload-time = "2025-07-31T18:08:07.293Z" },
+    { url = "https://files.pythonhosted.org/packages/66/2b/bed8a45e74957549197a2ac2e1259671cd80b55ed9e1fe2b5c94d88a9202/matplotlib-3.10.5-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a17e57e33de901d221a07af32c08870ed4528db0b6059dce7d7e65c1122d4bea", size = 8669603, upload-time = "2025-07-31T18:08:09.064Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/a7/315e9435b10d057f5e52dfc603cd353167ae28bb1a4e033d41540c0067a4/matplotlib-3.10.5-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:97b9d6443419085950ee4a5b1ee08c363e5c43d7176e55513479e53669e88468", size = 9508127, upload-time = "2025-07-31T18:08:10.845Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/d9/edcbb1f02ca99165365d2768d517898c22c6040187e2ae2ce7294437c413/matplotlib-3.10.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ceefe5d40807d29a66ae916c6a3915d60ef9f028ce1927b84e727be91d884369", size = 9566926, upload-time = "2025-07-31T18:08:13.186Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/d9/6dd924ad5616c97b7308e6320cf392c466237a82a2040381163b7500510a/matplotlib-3.10.5-cp312-cp312-win_amd64.whl", hash = "sha256:c04cba0f93d40e45b3c187c6c52c17f24535b27d545f757a2fffebc06c12b98b", size = 8107599, upload-time = "2025-07-31T18:08:15.116Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/f3/522dc319a50f7b0279fbe74f86f7a3506ce414bc23172098e8d2bdf21894/matplotlib-3.10.5-cp312-cp312-win_arm64.whl", hash = "sha256:a41bcb6e2c8e79dc99c5511ae6f7787d2fb52efd3d805fff06d5d4f667db16b2", size = 7978173, upload-time = "2025-07-31T18:08:21.518Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/05/4f3c1f396075f108515e45cb8d334aff011a922350e502a7472e24c52d77/matplotlib-3.10.5-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:354204db3f7d5caaa10e5de74549ef6a05a4550fdd1c8f831ab9bca81efd39ed", size = 8253586, upload-time = "2025-07-31T18:08:23.107Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/2c/e084415775aac7016c3719fe7006cdb462582c6c99ac142f27303c56e243/matplotlib-3.10.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:b072aac0c3ad563a2b3318124756cb6112157017f7431626600ecbe890df57a1", size = 8110715, upload-time = "2025-07-31T18:08:24.675Z" },
+    { url = "https://files.pythonhosted.org/packages/52/1b/233e3094b749df16e3e6cd5a44849fd33852e692ad009cf7de00cf58ddf6/matplotlib-3.10.5-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d52fd5b684d541b5a51fb276b2b97b010c75bee9aa392f96b4a07aeb491e33c7", size = 8669397, upload-time = "2025-07-31T18:08:26.778Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/ec/03f9e003a798f907d9f772eed9b7c6a9775d5bd00648b643ebfb88e25414/matplotlib-3.10.5-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee7a09ae2f4676276f5a65bd9f2bd91b4f9fbaedf49f40267ce3f9b448de501f", size = 9508646, upload-time = "2025-07-31T18:08:28.848Z" },
+    { url = "https://files.pythonhosted.org/packages/91/e7/c051a7a386680c28487bca27d23b02d84f63e3d2a9b4d2fc478e6a42e37e/matplotlib-3.10.5-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ba6c3c9c067b83481d647af88b4e441d532acdb5ef22178a14935b0b881188f4", size = 9567424, upload-time = "2025-07-31T18:08:30.726Z" },
+    { url = "https://files.pythonhosted.org/packages/36/c2/24302e93ff431b8f4173ee1dd88976c8d80483cadbc5d3d777cef47b3a1c/matplotlib-3.10.5-cp313-cp313-win_amd64.whl", hash = "sha256:07442d2692c9bd1cceaa4afb4bbe5b57b98a7599de4dabfcca92d3eea70f9ebe", size = 8107809, upload-time = "2025-07-31T18:08:33.928Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/33/423ec6a668d375dad825197557ed8fbdb74d62b432c1ed8235465945475f/matplotlib-3.10.5-cp313-cp313-win_arm64.whl", hash = "sha256:48fe6d47380b68a37ccfcc94f009530e84d41f71f5dae7eda7c4a5a84aa0a674", size = 7978078, upload-time = "2025-07-31T18:08:36.764Z" },
+    { url = "https://files.pythonhosted.org/packages/51/17/521fc16ec766455c7bb52cc046550cf7652f6765ca8650ff120aa2d197b6/matplotlib-3.10.5-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:3b80eb8621331449fc519541a7461987f10afa4f9cfd91afcd2276ebe19bd56c", size = 8295590, upload-time = "2025-07-31T18:08:38.521Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/12/23c28b2c21114c63999bae129fce7fd34515641c517ae48ce7b7dcd33458/matplotlib-3.10.5-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:47a388908e469d6ca2a6015858fa924e0e8a2345a37125948d8e93a91c47933e", size = 8158518, upload-time = "2025-07-31T18:08:40.195Z" },
+    { url = "https://files.pythonhosted.org/packages/81/f8/aae4eb25e8e7190759f3cb91cbeaa344128159ac92bb6b409e24f8711f78/matplotlib-3.10.5-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8b6b49167d208358983ce26e43aa4196073b4702858670f2eb111f9a10652b4b", size = 8691815, upload-time = "2025-07-31T18:08:42.238Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/ba/450c39ebdd486bd33a359fc17365ade46c6a96bf637bbb0df7824de2886c/matplotlib-3.10.5-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8a8da0453a7fd8e3da114234ba70c5ba9ef0e98f190309ddfde0f089accd46ea", size = 9522814, upload-time = "2025-07-31T18:08:44.914Z" },
+    { url = "https://files.pythonhosted.org/packages/89/11/9c66f6a990e27bb9aa023f7988d2d5809cb98aa39c09cbf20fba75a542ef/matplotlib-3.10.5-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:52c6573dfcb7726a9907b482cd5b92e6b5499b284ffacb04ffbfe06b3e568124", size = 9573917, upload-time = "2025-07-31T18:08:47.038Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/69/8b49394de92569419e5e05e82e83df9b749a0ff550d07631ea96ed2eb35a/matplotlib-3.10.5-cp313-cp313t-win_amd64.whl", hash = "sha256:a23193db2e9d64ece69cac0c8231849db7dd77ce59c7b89948cf9d0ce655a3ce", size = 8181034, upload-time = "2025-07-31T18:08:48.943Z" },
+    { url = "https://files.pythonhosted.org/packages/47/23/82dc435bb98a2fc5c20dffcac8f0b083935ac28286413ed8835df40d0baa/matplotlib-3.10.5-cp313-cp313t-win_arm64.whl", hash = "sha256:56da3b102cf6da2776fef3e71cd96fcf22103a13594a18ac9a9b31314e0be154", size = 8023337, upload-time = "2025-07-31T18:08:50.791Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/e0/26b6cfde31f5383503ee45dcb7e691d45dadf0b3f54639332b59316a97f8/matplotlib-3.10.5-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:96ef8f5a3696f20f55597ffa91c28e2e73088df25c555f8d4754931515512715", size = 8253591, upload-time = "2025-07-31T18:08:53.254Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/89/98488c7ef7ea20ea659af7499628c240a608b337af4be2066d644cfd0a0f/matplotlib-3.10.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:77fab633e94b9da60512d4fa0213daeb76d5a7b05156840c4fd0399b4b818837", size = 8112566, upload-time = "2025-07-31T18:08:55.116Z" },
+    { url = "https://files.pythonhosted.org/packages/52/67/42294dfedc82aea55e1a767daf3263aacfb5a125f44ba189e685bab41b6f/matplotlib-3.10.5-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:27f52634315e96b1debbfdc5c416592edcd9c4221bc2f520fd39c33db5d9f202", size = 9513281, upload-time = "2025-07-31T18:08:56.885Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/68/f258239e0cf34c2cbc816781c7ab6fca768452e6bf1119aedd2bd4a882a3/matplotlib-3.10.5-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:525f6e28c485c769d1f07935b660c864de41c37fd716bfa64158ea646f7084bb", size = 9780873, upload-time = "2025-07-31T18:08:59.241Z" },
+    { url = "https://files.pythonhosted.org/packages/89/64/f4881554006bd12e4558bd66778bdd15d47b00a1f6c6e8b50f6208eda4b3/matplotlib-3.10.5-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:1f5f3ec4c191253c5f2b7c07096a142c6a1c024d9f738247bfc8e3f9643fc975", size = 9568954, upload-time = "2025-07-31T18:09:01.244Z" },
+    { url = "https://files.pythonhosted.org/packages/06/f8/42779d39c3f757e1f012f2dda3319a89fb602bd2ef98ce8faf0281f4febd/matplotlib-3.10.5-cp314-cp314-win_amd64.whl", hash = "sha256:707f9c292c4cd4716f19ab8a1f93f26598222cd931e0cd98fbbb1c5994bf7667", size = 8237465, upload-time = "2025-07-31T18:09:03.206Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/f8/153fd06b5160f0cd27c8b9dd797fcc9fb56ac6a0ebf3c1f765b6b68d3c8a/matplotlib-3.10.5-cp314-cp314-win_arm64.whl", hash = "sha256:21a95b9bf408178d372814de7baacd61c712a62cae560b5e6f35d791776f6516", size = 8108898, upload-time = "2025-07-31T18:09:05.231Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/ee/c4b082a382a225fe0d2a73f1f57cf6f6f132308805b493a54c8641006238/matplotlib-3.10.5-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:a6b310f95e1102a8c7c817ef17b60ee5d1851b8c71b63d9286b66b177963039e", size = 8295636, upload-time = "2025-07-31T18:09:07.306Z" },
+    { url = "https://files.pythonhosted.org/packages/30/73/2195fa2099718b21a20da82dfc753bf2af58d596b51aefe93e359dd5915a/matplotlib-3.10.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:94986a242747a0605cb3ff1cb98691c736f28a59f8ffe5175acaeb7397c49a5a", size = 8158575, upload-time = "2025-07-31T18:09:09.083Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/e9/a08cdb34618a91fa08f75e6738541da5cacde7c307cea18ff10f0d03fcff/matplotlib-3.10.5-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1ff10ea43288f0c8bab608a305dc6c918cc729d429c31dcbbecde3b9f4d5b569", size = 9522815, upload-time = "2025-07-31T18:09:11.191Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/bb/34d8b7e0d1bb6d06ef45db01dfa560d5a67b1c40c0b998ce9ccde934bb09/matplotlib-3.10.5-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f6adb644c9d040ffb0d3434e440490a66cf73dbfa118a6f79cd7568431f7a012", size = 9783514, upload-time = "2025-07-31T18:09:13.307Z" },
+    { url = "https://files.pythonhosted.org/packages/12/09/d330d1e55dcca2e11b4d304cc5227f52e2512e46828d6249b88e0694176e/matplotlib-3.10.5-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:4fa40a8f98428f789a9dcacd625f59b7bc4e3ef6c8c7c80187a7a709475cf592", size = 9573932, upload-time = "2025-07-31T18:09:15.335Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/3b/f70258ac729aa004aca673800a53a2b0a26d49ca1df2eaa03289a1c40f81/matplotlib-3.10.5-cp314-cp314t-win_amd64.whl", hash = "sha256:95672a5d628b44207aab91ec20bf59c26da99de12b88f7e0b1fb0a84a86ff959", size = 8322003, upload-time = "2025-07-31T18:09:17.416Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/60/3601f8ce6d76a7c81c7f25a0e15fde0d6b66226dd187aa6d2838e6374161/matplotlib-3.10.5-cp314-cp314t-win_arm64.whl", hash = "sha256:2efaf97d72629e74252e0b5e3c46813e9eeaa94e011ecf8084a971a31a97f40b", size = 8153849, upload-time = "2025-07-31T18:09:19.673Z" },
 ]
 
 [[package]]
 name = "mdit-py-plugins"
-version = "0.4.2"
+version = "0.5.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "markdown-it-py" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/19/03/a2ecab526543b152300717cf232bb4bb8605b6edb946c845016fa9c9c9fd/mdit_py_plugins-0.4.2.tar.gz", hash = "sha256:5f2cd1fdb606ddf152d37ec30e46101a60512bc0e5fa1a7002c36647b09e26b5", size = 43542, upload-time = "2024-09-09T20:27:49.564Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/b2/fd/a756d36c0bfba5f6e39a1cdbdbfdd448dc02692467d83816dff4592a1ebc/mdit_py_plugins-0.5.0.tar.gz", hash = "sha256:f4918cb50119f50446560513a8e311d574ff6aaed72606ddae6d35716fe809c6", size = 44655, upload-time = "2025-08-11T07:25:49.083Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/a7/f7/7782a043553ee469c1ff49cfa1cdace2d6bf99a1f333cf38676b3ddf30da/mdit_py_plugins-0.4.2-py3-none-any.whl", hash = "sha256:0c673c3f889399a33b95e88d2f0d111b4447bdfea7f237dab2d488f459835636", size = 55316, upload-time = "2024-09-09T20:27:48.397Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/86/dd6e5db36df29e76c7a7699123569a4a18c1623ce68d826ed96c62643cae/mdit_py_plugins-0.5.0-py3-none-any.whl", hash = "sha256:07a08422fc1936a5d26d146759e9155ea466e842f5ab2f7d2266dd084c8dab1f", size = 57205, upload-time = "2025-08-11T07:25:47.597Z" },
 ]
 
 [[package]]
@@ -2138,6 +2596,50 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" },
 ]
 
+[[package]]
+name = "megatron-bridge"
+source = { editable = "3rdparty/Megatron-Bridge-workspace" }
+dependencies = [
+    { name = "causal-conv1d" },
+    { name = "datasets" },
+    { name = "hydra-core" },
+    { name = "mamba-ssm" },
+    { name = "megatron-core" },
+    { name = "nvidia-resiliency-ext" },
+    { name = "omegaconf" },
+    { name = "pyyaml" },
+    { name = "qwen-vl-utils" },
+    { name = "regex" },
+    { name = "rich" },
+    { name = "six" },
+    { name = "tensorboard" },
+    { name = "tqdm" },
+    { name = "transformer-engine", extra = ["pytorch"] },
+    { name = "typing-extensions" },
+    { name = "wandb" },
+]
+
+[package.metadata]
+requires-dist = [
+    { name = "causal-conv1d", git = "https://github.com/Dao-AILab/causal-conv1d?tag=v1.5.0.post8" },
+    { name = "datasets" },
+    { name = "hydra-core", specifier = ">1.3,<=1.3.2" },
+    { name = "mamba-ssm", git = "https://github.com/state-spaces/mamba.git?rev=2e16fc3062cdcd4ebef27a9aa4442676e1c7edf4" },
+    { name = "megatron-core", extras = ["dev", "mlm"], editable = "3rdparty/Megatron-LM-workspace" },
+    { name = "nvidia-resiliency-ext" },
+    { name = "omegaconf", specifier = ">=2.3.0" },
+    { name = "pyyaml", specifier = ">=6.0.2" },
+    { name = "qwen-vl-utils" },
+    { name = "regex", specifier = ">=2024.11.6" },
+    { name = "rich" },
+    { name = "six", specifier = ">=1.17.0" },
+    { name = "tensorboard", specifier = ">=2.19.0" },
+    { name = "tqdm", specifier = ">=4.67.1" },
+    { name = "transformer-engine", extras = ["pytorch"], specifier = ">=2.9.0a0,<2.10.0" },
+    { name = "typing-extensions" },
+    { name = "wandb", specifier = ">=0.19.10" },
+]
+
 [[package]]
 name = "megatron-core"
 source = { editable = "3rdparty/Megatron-LM-workspace" }
@@ -2145,16 +2647,18 @@ dependencies = [
     { name = "einops" },
     { name = "flask-restful" },
     { name = "nltk" },
-    { name = "nvidia-modelopt", extra = ["torch"], marker = "sys_platform != 'darwin'" },
+    { name = "nvidia-modelopt", marker = "sys_platform != 'darwin'" },
     { name = "packaging" },
     { name = "pytest" },
     { name = "pytest-cov" },
     { name = "pytest-mock" },
     { name = "pytest-random-order" },
     { name = "sentencepiece" },
-    { name = "tensorstore" },
+    { name = "tensorstore", version = "0.1.74", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.13'" },
+    { name = "tensorstore", version = "0.1.76", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.13'" },
     { name = "tiktoken" },
-    { name = "torch" },
+    { name = "torch", version = "2.8.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
+    { name = "torch", version = "2.8.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin'" },
     { name = "wandb" },
     { name = "wrapt" },
     { name = "zarr" },
@@ -2174,70 +2678,137 @@ requires-dist = [
     { name = "sentencepiece" },
     { name = "tensorstore", specifier = "!=0.1.46,!=0.1.72" },
     { name = "tiktoken" },
-    { name = "torch", index = "https://download.pytorch.org/whl/cu128" },
+    { name = "torch", marker = "sys_platform != 'darwin'", index = "https://download.pytorch.org/whl/cu129" },
+    { name = "torch", marker = "sys_platform == 'darwin'", index = "https://pypi.org/simple" },
     { name = "wandb" },
     { name = "wrapt" },
     { name = "zarr" },
 ]
 
+[[package]]
+name = "megatron-fsdp"
+version = "0.1.0rc1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "numpy" },
+    { name = "packaging" },
+    { name = "torch", version = "2.8.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
+    { name = "torch", version = "2.8.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/a0/be/06ada3d765ebca304e2d87873d6cf00807b43155ed57058abcd813d13a5d/megatron_fsdp-0.1.0rc1.tar.gz", hash = "sha256:4852a1c62bb95b5fc9567165ee7119f2e68bc75d6103af06bd1e6d392a50021f", size = 71600, upload-time = "2025-09-02T21:29:10.757Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7c/27/26ac0642311ef4690b70718cf482c2b83ea91770cb73056c7aa1f06f8857/megatron_fsdp-0.1.0rc1-py3-none-any.whl", hash = "sha256:c790b31b34de278e2c0fb07aa9eaa7edbdd55492005e857c55bee1450ffd03c9", size = 75936, upload-time = "2025-09-08T04:17:06.049Z" },
+]
+
 [[package]]
 name = "mistral-common"
-version = "1.6.2"
+version = "1.8.4"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "jsonschema" },
     { name = "numpy" },
     { name = "pillow" },
     { name = "pydantic" },
+    { name = "pydantic-extra-types", extra = ["pycountry"] },
     { name = "requests" },
-    { name = "sentencepiece" },
     { name = "tiktoken" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/8c/ce/b82f260858f8971634b61c4fead2def5ad658ed5ed1c2f3dcadf198816c5/mistral_common-1.6.2.tar.gz", hash = "sha256:273605f0969cfaf1297af44c05c071f271fa193d28d83c43a1d7bfe08239a56e", size = 6298853, upload-time = "2025-06-12T15:20:06.396Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/ba/dd/1beb1e3d56300f0e4b45ba975ffa7f4b07e6f96a6e06601483f58931893b/mistral_common-1.8.4.tar.gz", hash = "sha256:e611c16ef59c2b60ffdecef4d5e9158e1bf838fad6bad34aa050123601af703a", size = 6333167, upload-time = "2025-08-20T07:22:26.347Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/4c/e8/4841d38a3a5e8a06a2903f553367951013c867a94b42adf67bcf2401d9fc/mistral_common-1.6.2-py3-none-any.whl", hash = "sha256:9fd2f54907374f1dbd7cdfa12c9ddabad8d7a39da2d9ebd15d80ae2d2dab5312", size = 6490291, upload-time = "2025-06-12T15:20:02.326Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/4f/756a66c608a767c7af7010b23992343e97558ce7f86c5c15929f1215f6ef/mistral_common-1.8.4-py3-none-any.whl", hash = "sha256:bfaf2550046cebe8289946adc267ba807ac266e5325647af4c4f67292124bc2f", size = 6517094, upload-time = "2025-08-20T07:22:23.686Z" },
 ]
 
 [package.optional-dependencies]
+audio = [
+    { name = "soundfile" },
+    { name = "soxr" },
+]
+image = [
+    { name = "opencv-python-headless" },
+]
 opencv = [
     { name = "opencv-python-headless" },
 ]
 
 [[package]]
 name = "ml-dtypes"
-version = "0.5.1"
+version = "0.4.1"
 source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'",
+    "python_full_version >= '3.13' and sys_platform == 'win32'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin'",
+    "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
+]
 dependencies = [
-    { name = "numpy" },
+    { name = "numpy", marker = "python_full_version >= '3.13'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/fd/15/76f86faa0902836cc133939732f7611ace68cf54148487a99c539c272dc8/ml_dtypes-0.4.1.tar.gz", hash = "sha256:fad5f2de464fd09127e49b7fd1252b9006fb43d2edc1ff112d390c324af5ca7a", size = 692594, upload-time = "2024-09-13T19:07:11.624Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ba/1a/99e924f12e4b62139fbac87419698c65f956d58de0dbfa7c028fa5b096aa/ml_dtypes-0.4.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:827d3ca2097085cf0355f8fdf092b888890bb1b1455f52801a2d7756f056f54b", size = 405077, upload-time = "2024-09-13T19:06:57.538Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/8c/7b610bd500617854c8cc6ed7c8cfb9d48d6a5c21a1437a36a4b9bc8a3598/ml_dtypes-0.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:772426b08a6172a891274d581ce58ea2789cc8abc1c002a27223f314aaf894e7", size = 2181554, upload-time = "2024-09-13T19:06:59.196Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/c6/f89620cecc0581dc1839e218c4315171312e46c62a62da6ace204bda91c0/ml_dtypes-0.4.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:126e7d679b8676d1a958f2651949fbfa182832c3cd08020d8facd94e4114f3e9", size = 2160488, upload-time = "2024-09-13T19:07:03.131Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/11/a742d3c31b2cc8557a48efdde53427fd5f9caa2fa3c9c27d826e78a66f51/ml_dtypes-0.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:df0fb650d5c582a9e72bb5bd96cfebb2cdb889d89daff621c8fbc60295eba66c", size = 127462, upload-time = "2024-09-13T19:07:04.916Z" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/32/49/6e67c334872d2c114df3020e579f3718c333198f8312290e09ec0216703a/ml_dtypes-0.5.1.tar.gz", hash = "sha256:ac5b58559bb84a95848ed6984eb8013249f90b6bab62aa5acbad876e256002c9", size = 698772, upload-time = "2025-01-07T03:34:55.613Z" }
+
+[[package]]
+name = "ml-dtypes"
+version = "0.5.3"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'",
+    "python_full_version < '3.13' and sys_platform == 'win32'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version < '3.13' and sys_platform == 'darwin'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
+]
+dependencies = [
+    { name = "numpy", marker = "python_full_version < '3.13'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/78/a7/aad060393123cfb383956dca68402aff3db1e1caffd5764887ed5153f41b/ml_dtypes-0.5.3.tar.gz", hash = "sha256:95ce33057ba4d05df50b1f3cfefab22e351868a843b3b15a46c65836283670c9", size = 692316, upload-time = "2025-07-29T18:39:19.454Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/47/56/1bb21218e1e692506c220ffabd456af9733fba7aa1b14f73899979f4cc20/ml_dtypes-0.5.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:6f462f5eca22fb66d7ff9c4744a3db4463af06c49816c4b6ac89b16bfcdc592e", size = 670372, upload-time = "2025-01-07T03:34:15.258Z" },
-    { url = "https://files.pythonhosted.org/packages/20/95/d8bd96a3b60e00bf31bd78ca4bdd2d6bbaf5acb09b42844432d719d34061/ml_dtypes-0.5.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6f76232163b5b9c34291b54621ee60417601e2e4802a188a0ea7157cd9b323f4", size = 4635946, upload-time = "2025-01-07T03:34:20.412Z" },
-    { url = "https://files.pythonhosted.org/packages/08/57/5d58fad4124192b1be42f68bd0c0ddaa26e44a730ff8c9337adade2f5632/ml_dtypes-0.5.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ad4953c5eb9c25a56d11a913c2011d7e580a435ef5145f804d98efa14477d390", size = 4694804, upload-time = "2025-01-07T03:34:23.608Z" },
-    { url = "https://files.pythonhosted.org/packages/38/bc/c4260e4a6c6bf684d0313308de1c860467275221d5e7daf69b3fcddfdd0b/ml_dtypes-0.5.1-cp312-cp312-win_amd64.whl", hash = "sha256:9626d0bca1fb387d5791ca36bacbba298c5ef554747b7ebeafefb4564fc83566", size = 210853, upload-time = "2025-01-07T03:34:26.027Z" },
-    { url = "https://files.pythonhosted.org/packages/0f/92/bb6a3d18e16fddd18ce6d5f480e1919b33338c70e18cba831c6ae59812ee/ml_dtypes-0.5.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:12651420130ee7cc13059fc56dac6ad300c3af3848b802d475148c9defd27c23", size = 667696, upload-time = "2025-01-07T03:34:27.526Z" },
-    { url = "https://files.pythonhosted.org/packages/6d/29/cfc89d842767e9a51146043b0fa18332c2b38f8831447e6cb1160e3c6102/ml_dtypes-0.5.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c9945669d3dadf8acb40ec2e57d38c985d8c285ea73af57fc5b09872c516106d", size = 4638365, upload-time = "2025-01-07T03:34:30.43Z" },
-    { url = "https://files.pythonhosted.org/packages/be/26/adc36e3ea09603d9f6d114894e1c1b7b8e8a9ef6d0b031cc270c6624a37c/ml_dtypes-0.5.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf9975bda82a99dc935f2ae4c83846d86df8fd6ba179614acac8e686910851da", size = 4702722, upload-time = "2025-01-07T03:34:33.813Z" },
-    { url = "https://files.pythonhosted.org/packages/da/8a/a2b9375c94077e5a488a624a195621407846f504068ce22ccf805c674156/ml_dtypes-0.5.1-cp313-cp313-win_amd64.whl", hash = "sha256:fd918d4e6a4e0c110e2e05be7a7814d10dc1b95872accbf6512b80a109b71ae1", size = 210850, upload-time = "2025-01-07T03:34:36.897Z" },
-    { url = "https://files.pythonhosted.org/packages/52/38/703169100fdde27957f061d4d0ea3e00525775a09acaccf7e655d9609d55/ml_dtypes-0.5.1-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:05f23447a1c20ddf4dc7c2c661aa9ed93fcb2658f1017c204d1e758714dc28a8", size = 693043, upload-time = "2025-01-07T03:34:38.457Z" },
-    { url = "https://files.pythonhosted.org/packages/28/ff/4e234c9c23e0d456f5da5a326c103bf890c746d93351524d987e41f438b3/ml_dtypes-0.5.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1b7fbe5571fdf28fd3aaab3ef4aafc847de9ebf263be959958c1ca58ec8eadf5", size = 4903946, upload-time = "2025-01-07T03:34:40.236Z" },
-    { url = "https://files.pythonhosted.org/packages/b7/45/c1a1ccfdd02bc4173ca0f4a2d327683a27df85797b885eb1da1ca325b85c/ml_dtypes-0.5.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d13755f8e8445b3870114e5b6240facaa7cb0c3361e54beba3e07fa912a6e12b", size = 5052731, upload-time = "2025-01-07T03:34:45.308Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/eb/bc07c88a6ab002b4635e44585d80fa0b350603f11a2097c9d1bfacc03357/ml_dtypes-0.5.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:156418abeeda48ea4797db6776db3c5bdab9ac7be197c1233771e0880c304057", size = 663864, upload-time = "2025-07-29T18:38:33.777Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/89/11af9b0f21b99e6386b6581ab40fb38d03225f9de5f55cf52097047e2826/ml_dtypes-0.5.3-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1db60c154989af253f6c4a34e8a540c2c9dce4d770784d426945e09908fbb177", size = 4951313, upload-time = "2025-07-29T18:38:36.45Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/a9/b98b86426c24900b0c754aad006dce2863df7ce0bb2bcc2c02f9cc7e8489/ml_dtypes-0.5.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1b255acada256d1fa8c35ed07b5f6d18bc21d1556f842fbc2d5718aea2cd9e55", size = 4928805, upload-time = "2025-07-29T18:38:38.29Z" },
+    { url = "https://files.pythonhosted.org/packages/50/c1/85e6be4fc09c6175f36fb05a45917837f30af9a5146a5151cb3a3f0f9e09/ml_dtypes-0.5.3-cp312-cp312-win_amd64.whl", hash = "sha256:da65e5fd3eea434ccb8984c3624bc234ddcc0d9f4c81864af611aaebcc08a50e", size = 208182, upload-time = "2025-07-29T18:38:39.72Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/17/cf5326d6867be057f232d0610de1458f70a8ce7b6290e4b4a277ea62b4cd/ml_dtypes-0.5.3-cp312-cp312-win_arm64.whl", hash = "sha256:8bb9cd1ce63096567f5f42851f5843b5a0ea11511e50039a7649619abfb4ba6d", size = 161560, upload-time = "2025-07-29T18:38:41.072Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/87/1bcc98a66de7b2455dfb292f271452cac9edc4e870796e0d87033524d790/ml_dtypes-0.5.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:5103856a225465371fe119f2fef737402b705b810bd95ad5f348e6e1a6ae21af", size = 663781, upload-time = "2025-07-29T18:38:42.984Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/2c/bd2a79ba7c759ee192b5601b675b180a3fd6ccf48ffa27fe1782d280f1a7/ml_dtypes-0.5.3-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4cae435a68861660af81fa3c5af16b70ca11a17275c5b662d9c6f58294e0f113", size = 4956217, upload-time = "2025-07-29T18:38:44.65Z" },
+    { url = "https://files.pythonhosted.org/packages/14/f3/091ba84e5395d7fe5b30c081a44dec881cd84b408db1763ee50768b2ab63/ml_dtypes-0.5.3-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6936283b56d74fbec431ca57ce58a90a908fdbd14d4e2d22eea6d72bb208a7b7", size = 4933109, upload-time = "2025-07-29T18:38:46.405Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/24/054036dbe32c43295382c90a1363241684c4d6aaa1ecc3df26bd0c8d5053/ml_dtypes-0.5.3-cp313-cp313-win_amd64.whl", hash = "sha256:d0f730a17cf4f343b2c7ad50cee3bd19e969e793d2be6ed911f43086460096e4", size = 208187, upload-time = "2025-07-29T18:38:48.24Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/3d/7dc3ec6794a4a9004c765e0c341e32355840b698f73fd2daff46f128afc1/ml_dtypes-0.5.3-cp313-cp313-win_arm64.whl", hash = "sha256:2db74788fc01914a3c7f7da0763427280adfc9cd377e9604b6b64eb8097284bd", size = 161559, upload-time = "2025-07-29T18:38:50.493Z" },
+    { url = "https://files.pythonhosted.org/packages/12/91/e6c7a0d67a152b9330445f9f0cf8ae6eee9b83f990b8c57fe74631e42a90/ml_dtypes-0.5.3-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:93c36a08a6d158db44f2eb9ce3258e53f24a9a4a695325a689494f0fdbc71770", size = 689321, upload-time = "2025-07-29T18:38:52.03Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/6c/b7b94b84a104a5be1883305b87d4c6bd6ae781504474b4cca067cb2340ec/ml_dtypes-0.5.3-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0e44a3761f64bc009d71ddb6d6c71008ba21b53ab6ee588dadab65e2fa79eafc", size = 5274495, upload-time = "2025-07-29T18:38:53.797Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/38/6266604dffb43378055394ea110570cf261a49876fc48f548dfe876f34cc/ml_dtypes-0.5.3-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bdf40d2aaabd3913dec11840f0d0ebb1b93134f99af6a0a4fd88ffe924928ab4", size = 5285422, upload-time = "2025-07-29T18:38:56.603Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/88/8612ff177d043a474b9408f0382605d881eeb4125ba89d4d4b3286573a83/ml_dtypes-0.5.3-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:aec640bd94c4c85c0d11e2733bd13cbb10438fb004852996ec0efbc6cacdaf70", size = 661182, upload-time = "2025-07-29T18:38:58.414Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/2b/0569a5e88b29240d373e835107c94ae9256fb2191d3156b43b2601859eff/ml_dtypes-0.5.3-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bda32ce212baa724e03c68771e5c69f39e584ea426bfe1a701cb01508ffc7035", size = 4956187, upload-time = "2025-07-29T18:39:00.611Z" },
+    { url = "https://files.pythonhosted.org/packages/51/66/273c2a06ae44562b104b61e6b14444da00061fd87652506579d7eb2c40b1/ml_dtypes-0.5.3-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c205cac07d24a29840c163d6469f61069ce4b065518519216297fc2f261f8db9", size = 4930911, upload-time = "2025-07-29T18:39:02.405Z" },
+    { url = "https://files.pythonhosted.org/packages/93/ab/606be3e87dc0821bd360c8c1ee46108025c31a4f96942b63907bb441b87d/ml_dtypes-0.5.3-cp314-cp314-win_amd64.whl", hash = "sha256:cd7c0bb22d4ff86d65ad61b5dd246812e8993fbc95b558553624c33e8b6903ea", size = 216664, upload-time = "2025-07-29T18:39:03.927Z" },
+    { url = "https://files.pythonhosted.org/packages/30/a2/e900690ca47d01dffffd66375c5de8c4f8ced0f1ef809ccd3b25b3e6b8fa/ml_dtypes-0.5.3-cp314-cp314-win_arm64.whl", hash = "sha256:9d55ea7f7baf2aed61bf1872116cefc9d0c3693b45cae3916897ee27ef4b835e", size = 160203, upload-time = "2025-07-29T18:39:05.671Z" },
+    { url = "https://files.pythonhosted.org/packages/53/21/783dfb51f40d2660afeb9bccf3612b99f6a803d980d2a09132b0f9d216ab/ml_dtypes-0.5.3-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:e12e29764a0e66a7a31e9b8bf1de5cc0423ea72979f45909acd4292de834ccd3", size = 689324, upload-time = "2025-07-29T18:39:07.567Z" },
+    { url = "https://files.pythonhosted.org/packages/09/f7/a82d249c711abf411ac027b7163f285487f5e615c3e0716c61033ce996ab/ml_dtypes-0.5.3-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:19f6c3a4f635c2fc9e2aa7d91416bd7a3d649b48350c51f7f715a09370a90d93", size = 5275917, upload-time = "2025-07-29T18:39:09.339Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/3c/541c4b30815ab90ebfbb51df15d0b4254f2f9f1e2b4907ab229300d5e6f2/ml_dtypes-0.5.3-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5ab039ffb40f3dc0aeeeba84fd6c3452781b5e15bef72e2d10bcb33e4bbffc39", size = 5285284, upload-time = "2025-07-29T18:39:11.532Z" },
 ]
 
 [[package]]
 name = "mlflow"
-version = "3.1.1"
+version = "3.5.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "alembic" },
+    { name = "cryptography" },
     { name = "docker" },
     { name = "flask" },
+    { name = "flask-cors" },
     { name = "graphene" },
     { name = "gunicorn", marker = "sys_platform != 'win32'" },
     { name = "matplotlib" },
     { name = "mlflow-skinny" },
+    { name = "mlflow-tracing" },
     { name = "numpy" },
     { name = "pandas" },
     { name = "pyarrow" },
@@ -2246,14 +2817,14 @@ dependencies = [
     { name = "sqlalchemy" },
     { name = "waitress", marker = "sys_platform == 'win32'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/2b/e1/0cba7a8fc2c81078b4d31948f65fb1580cee1831e955a86028159724d057/mlflow-3.1.1.tar.gz", hash = "sha256:ee98fe929d61625b72ae5010fbf12a7c6d15527790397827191fd6e8246c33e5", size = 24098836, upload-time = "2025-06-25T09:12:56.416Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/64/7e/516ba65bfa6f5857904ce18bcb738234004663dae1197cee082d48f1ad29/mlflow-3.5.1.tar.gz", hash = "sha256:32630f2aaadeb6dc6ccbde56247a1500518b38d0a7cc12f714be1703b6ee3ea1", size = 8300179, upload-time = "2025-10-22T18:11:47.263Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/a2/07/9f28e7e2b1c9552e64e6161cd3943b02349f8164176cea6b75e69d7df94a/mlflow-3.1.1-py3-none-any.whl", hash = "sha256:16853335292217fde203a645fd50f38d5567ce7818587ed5236040418918872e", size = 24673365, upload-time = "2025-06-25T09:12:53.482Z" },
+    { url = "https://files.pythonhosted.org/packages/98/e1/33cf2596dfbdfe49c2a4696e4321a90e835faeb46e590980461d1d4ef811/mlflow-3.5.1-py3-none-any.whl", hash = "sha256:ebbf5fef59787161a15f2878f210877a62d54d943ad6cea140621687b2393f85", size = 8773271, upload-time = "2025-10-22T18:11:44.6Z" },
 ]
 
 [[package]]
 name = "mlflow-skinny"
-version = "3.1.1"
+version = "3.5.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "cachetools" },
@@ -2264,19 +2835,83 @@ dependencies = [
     { name = "gitpython" },
     { name = "importlib-metadata" },
     { name = "opentelemetry-api" },
+    { name = "opentelemetry-proto" },
     { name = "opentelemetry-sdk" },
     { name = "packaging" },
     { name = "protobuf" },
     { name = "pydantic" },
+    { name = "python-dotenv" },
     { name = "pyyaml" },
     { name = "requests" },
     { name = "sqlparse" },
     { name = "typing-extensions" },
     { name = "uvicorn" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/dd/52/e63c0244a24ed23b5f82b30efffce150c19f126b8ef977b78a56f6d192c9/mlflow_skinny-3.1.1.tar.gz", hash = "sha256:9c2ea510eef6c115c7241305b65f7090d7fdc02399de2a6e8ddae5f285bb7a99", size = 1603411, upload-time = "2025-06-25T05:52:22.717Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/fb/1a/ede3fb7a4085bf640e2842c0a4d3d95ef665b21e6d0e92cfb7867ba58ef7/mlflow_skinny-3.5.1.tar.gz", hash = "sha256:4358a5489221cdecf53cf045e10df28919dedb9489965434ce3445f7cbabf365", size = 1927869, upload-time = "2025-10-22T17:58:41.623Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/f6/45/24d553e0f550f82aaadd8b9d08f1410a3d750c51733a5f43fcc6def1be00/mlflow_skinny-3.1.1-py3-none-any.whl", hash = "sha256:73b1be5d0ef3099c2d0e5ec3ca7fd0b85d4a6def7d7ab35feda9f06bf8bf7049", size = 1926660, upload-time = "2025-06-25T05:52:20.556Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/88/75690e7cdc6fe56374e24178055bb2a7385e1e29c51a8cbb2fb747892af1/mlflow_skinny-3.5.1-py3-none-any.whl", hash = "sha256:e5f96977d21a093a3ffda789bee90070855dbfe1b9d0703c0c3e34d2f8d7fba8", size = 2314304, upload-time = "2025-10-22T17:58:39.526Z" },
+]
+
+[[package]]
+name = "mlflow-tracing"
+version = "3.5.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cachetools" },
+    { name = "databricks-sdk" },
+    { name = "opentelemetry-api" },
+    { name = "opentelemetry-proto" },
+    { name = "opentelemetry-sdk" },
+    { name = "packaging" },
+    { name = "protobuf" },
+    { name = "pydantic" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/18/38/ade11b09edfee133078015656aec8a3854f1a6ed1bd6e6d9af333fcdaaf9/mlflow_tracing-3.5.1.tar.gz", hash = "sha256:bca266b1871692ae2ec812ed177cdc108ccef1cb3fb82725a8b959ec98d5fba0", size = 1056089, upload-time = "2025-10-22T17:56:12.047Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/29/7f/99006f6c261ef694363e8599ad858c223aa9918231e8bd7a1569041967ac/mlflow_tracing-3.5.1-py3-none-any.whl", hash = "sha256:4fd685347158e0d2c48f5bec3d15ecfc6fadc1dbb48073cb220ded438408fa65", size = 1273904, upload-time = "2025-10-22T17:56:10.748Z" },
+]
+
+[[package]]
+name = "mlx"
+version = "0.28.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "mlx-metal", marker = "sys_platform == 'darwin'" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f2/c9/d12ed6a8393450e28eb1f552b50200f83f138b1268b5f4e8074a76d745a2/mlx-0.28.0-cp312-cp312-macosx_13_0_arm64.whl", hash = "sha256:97866d5e454e8f2d7bc42aadcbfd7565d40f4755564785e4fb964812fbad604b", size = 564160, upload-time = "2025-08-07T07:50:34.652Z" },
+    { url = "https://files.pythonhosted.org/packages/71/4f/3951766a5edb75c0d2d860381f592d271b4c3b7241e730e78dd63926f5b4/mlx-0.28.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:5204ebf399439e5da374295f6c1b6961355824604eed7026c18edfe4c83e9243", size = 540098, upload-time = "2025-08-07T07:50:52.67Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/52/cb8eb03544eace055a500bd4a3b776a3ce48198d7b7b398e21a5a3256e89/mlx-0.28.0-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:34776bd3fe97bca7c6c76d77f6104e0d6b05b3626bb3cf9ed48d3a9bbd46c180", size = 540100, upload-time = "2025-08-07T07:50:49.095Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/fb/795f3540057642bcf3a95fe7d17c14ffaca2102511328eee6cd92d49223e/mlx-0.28.0-cp313-cp313-macosx_13_0_arm64.whl", hash = "sha256:78c88e5cc4188f538935b23803e10eaf084caa8bfeaa2a6de983038ecee3fd78", size = 564139, upload-time = "2025-08-07T07:50:31.487Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/4a/39609e5e3fea14c429e8a61f9754e61e4ed5289422223ad213df9116fd55/mlx-0.28.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:0b7a57a584ea5e807ec0a17c4eb179a71e01eeff9f25dff6950abad1e30443c2", size = 540205, upload-time = "2025-08-07T07:50:47.284Z" },
+    { url = "https://files.pythonhosted.org/packages/43/af/738ea855df6742a4ac4ee1c72f298ff6cf50f0af7e553e89a1a41060c12c/mlx-0.28.0-cp313-cp313-macosx_15_0_arm64.whl", hash = "sha256:a7cdcbd3faff45c18e9f51f95e9aa9410c71bbb4d5d86878a97eb996a0467505", size = 540201, upload-time = "2025-08-07T07:50:45.122Z" },
+]
+
+[[package]]
+name = "mlx-lm"
+version = "0.26.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "jinja2", marker = "sys_platform == 'darwin'" },
+    { name = "mlx", marker = "sys_platform == 'darwin'" },
+    { name = "numpy", marker = "sys_platform == 'darwin'" },
+    { name = "protobuf", marker = "sys_platform == 'darwin'" },
+    { name = "pyyaml", marker = "sys_platform == 'darwin'" },
+    { name = "transformers", marker = "sys_platform == 'darwin'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/af/4b/ed8ec01f182203b0897415a9d20f0cd8a141def77ad43deea18ffaba4c9c/mlx_lm-0.26.3.tar.gz", hash = "sha256:06cd74ee3eea920335c528e68feb854eede45fe4e5f149b464ac100c1dbeaded", size = 172096, upload-time = "2025-08-06T21:48:22.762Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/4a/ff/142ba3ec53282e179bab3ba5608c5edec3b419bcc816df63c141bcc6e2e9/mlx_lm-0.26.3-py3-none-any.whl", hash = "sha256:c6a9e44bd707822bc165ce638723ab11252e8334b7b3bf79c7d399c8c3d6d48e", size = 235187, upload-time = "2025-08-06T21:48:21.73Z" },
+]
+
+[[package]]
+name = "mlx-metal"
+version = "0.28.0"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f1/71/879284c71dfb12ded986a6532a4ab7df5c2794385ccf2766c1b40aee74cb/mlx_metal-0.28.0-py3-none-macosx_13_0_arm64.whl", hash = "sha256:ce08d40f1fad4f0b3bc87bfff5d603c7fe7dd141c082ba9ce9328b41e8f8d46b", size = 33840007, upload-time = "2025-08-07T07:53:07.437Z" },
+    { url = "https://files.pythonhosted.org/packages/06/90/44a261ccb9f6052c93c9da4faa4fc6d4f914938c51ecbbb68c546ab521b9/mlx_metal-0.28.0-py3-none-macosx_14_0_arm64.whl", hash = "sha256:424142ab843e2ac0b14edb58cf88d96723823c565291f46ddeeaa072abcc991e", size = 33196759, upload-time = "2025-08-07T07:52:59.436Z" },
+    { url = "https://files.pythonhosted.org/packages/72/59/8e4dee2893a56fc68a27eec7ec7ed9559c7ea01099313a9b8196373bf3cf/mlx_metal-0.28.0-py3-none-macosx_15_0_arm64.whl", hash = "sha256:214ece3781d44f57eb9686561594b28915ec5568df4a5a73da59c66880b204ed", size = 33167706, upload-time = "2025-08-07T07:53:03.852Z" },
 ]
 
 [[package]]
@@ -2340,59 +2975,65 @@ wheels = [
 
 [[package]]
 name = "multidict"
-version = "6.5.1"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/5c/43/2d90c414d9efc4587d6e7cebae9f2c2d8001bcb4f89ed514ae837e9dcbe6/multidict-6.5.1.tar.gz", hash = "sha256:a835ea8103f4723915d7d621529c80ef48db48ae0c818afcabe0f95aa1febc3a", size = 98690, upload-time = "2025-06-24T22:16:05.117Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/33/36/225fb9b890607d740f61957febf622f5c9cd9e641a93502c7877934d57ef/multidict-6.5.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:48f95fe064f63d9601ef7a3dce2fc2a437d5fcc11bca960bc8be720330b13b6a", size = 74287, upload-time = "2025-06-24T22:14:29.456Z" },
-    { url = "https://files.pythonhosted.org/packages/70/e5/c9eabb16ecf77275664413263527ab169e08371dfa6b168025d8f67261fd/multidict-6.5.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7b7b6e1ce9b61f721417c68eeeb37599b769f3b631e6b25c21f50f8f619420b9", size = 44092, upload-time = "2025-06-24T22:14:30.686Z" },
-    { url = "https://files.pythonhosted.org/packages/df/0b/dd9322a432c477a2e6d089bbb53acb68ed25515b8292dbc60f27e7e45d70/multidict-6.5.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8b83b055889bda09fc866c0a652cdb6c36eeeafc2858259c9a7171fe82df5773", size = 42565, upload-time = "2025-06-24T22:14:31.8Z" },
-    { url = "https://files.pythonhosted.org/packages/f9/ac/22f5b4e55a4bc99f9622de280f7da366c1d7f29ec4eec9d339cb2ba62019/multidict-6.5.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b7bd4d655dc460c7aebb73b58ed1c074e85f7286105b012556cf0f25c6d1dba3", size = 254896, upload-time = "2025-06-24T22:14:32.865Z" },
-    { url = "https://files.pythonhosted.org/packages/09/dc/2f6d96d4a80ec731579cb69532fac33cbbda2a838079ae0c47c6e8f5545b/multidict-6.5.1-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:aa6dcf25ced31cdce10f004506dbc26129f28a911b32ed10e54453a0842a6173", size = 236854, upload-time = "2025-06-24T22:14:34.185Z" },
-    { url = "https://files.pythonhosted.org/packages/4a/cb/ef38a69ee75e8b72e5cff9ed4cff92379eadd057a99eaf4893494bf6ab64/multidict-6.5.1-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:059fb556c3e6ce1a168496f92ef139ad839a47f898eaa512b1d43e5e05d78c6b", size = 265131, upload-time = "2025-06-24T22:14:35.534Z" },
-    { url = "https://files.pythonhosted.org/packages/c0/9e/85d9fe9e658e0edf566c02181248fa2aaf5e53134df0c80f7231ce5fc689/multidict-6.5.1-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f97680c839dd9fa208e9584b1c2a5f1224bd01d31961f7f7d94984408c4a6b9e", size = 262187, upload-time = "2025-06-24T22:14:36.891Z" },
-    { url = "https://files.pythonhosted.org/packages/2b/1c/b46ec1dd78c3faa55bffb354410c48fadd81029a144cd056828c82ca15b4/multidict-6.5.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7710c716243525cc05cd038c6e09f1807ee0fef2510a6e484450712c389c8d7f", size = 251220, upload-time = "2025-06-24T22:14:38.584Z" },
-    { url = "https://files.pythonhosted.org/packages/6b/6b/481ec5179ddc7da8b05077ebae2dd51da3df3ae3e5842020fbfa939167c1/multidict-6.5.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:83eb172b4856ffff2814bdcf9c7792c0439302faab1b31376817b067b26cd8f5", size = 249949, upload-time = "2025-06-24T22:14:40.033Z" },
-    { url = "https://files.pythonhosted.org/packages/00/e3/642f63e12c1b8e6662c23626a98e9d764fe5a63c3a6cb59002f6fdcb920f/multidict-6.5.1-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:562d4714fa43f6ebc043a657535e4575e7d6141a818c9b3055f0868d29a1a41b", size = 244438, upload-time = "2025-06-24T22:14:41.464Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/cf/797397f6d38b011912504aef213a4be43ef4ec134859caa47f94d810bad8/multidict-6.5.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:2d7def2fc47695c46a427b8f298fb5ace03d635c1fb17f30d6192c9a8fb69e70", size = 259921, upload-time = "2025-06-24T22:14:43.248Z" },
-    { url = "https://files.pythonhosted.org/packages/82/b2/ae914a2d84eba21e956fa3727060248ca23ed4a5bf1beb057df0d10f9de3/multidict-6.5.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:77bc8ab5c6bfe696eff564824e73a451fdeca22f3b960261750836cee02bcbfa", size = 252691, upload-time = "2025-06-24T22:14:45.57Z" },
-    { url = "https://files.pythonhosted.org/packages/01/fa/1ab4d79a236b871cfd40d36a1f9942906c630bd2b7822287bd3927addb62/multidict-6.5.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:9eec51891d3c210948ead894ec1483d48748abec08db5ce9af52cc13fef37aee", size = 246224, upload-time = "2025-06-24T22:14:47.316Z" },
-    { url = "https://files.pythonhosted.org/packages/78/dd/bf002fe04e952db73cad8ce10a5b5347358d0d17221aef156e050aff690b/multidict-6.5.1-cp312-cp312-win32.whl", hash = "sha256:189f0c2bd1c0ae5509e453707d0e187e030c9e873a0116d1f32d1c870d0fc347", size = 41354, upload-time = "2025-06-24T22:14:48.567Z" },
-    { url = "https://files.pythonhosted.org/packages/95/ce/508a8487d98fdc3e693755bc19c543a2af293f5ce96da398bd1974efb802/multidict-6.5.1-cp312-cp312-win_amd64.whl", hash = "sha256:e81f23b4b6f2a588f15d5cb554b2d8b482bb6044223d64b86bc7079cae9ebaad", size = 45072, upload-time = "2025-06-24T22:14:50.898Z" },
-    { url = "https://files.pythonhosted.org/packages/ae/da/4782cf2f274d0d56fff6c07fc5cc5a14acf821dec08350c17d66d0207a05/multidict-6.5.1-cp312-cp312-win_arm64.whl", hash = "sha256:79d13e06d5241f9c8479dfeaf0f7cce8f453a4a302c9a0b1fa9b1a6869ff7757", size = 42149, upload-time = "2025-06-24T22:14:53.138Z" },
-    { url = "https://files.pythonhosted.org/packages/19/3f/c2e07031111d2513d260157933a8697ad52a935d8a2a2b8b7b317ddd9a96/multidict-6.5.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:98011312f36d1e496f15454a95578d1212bc2ffc25650a8484752b06d304fd9b", size = 73588, upload-time = "2025-06-24T22:14:54.332Z" },
-    { url = "https://files.pythonhosted.org/packages/95/bb/f47aa21827202a9f889fd66de9a1db33d0e4bbaaa2567156e4efb3cc0e5e/multidict-6.5.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:bae589fb902b47bd94e6f539b34eefe55a1736099f616f614ec1544a43f95b05", size = 43756, upload-time = "2025-06-24T22:14:55.748Z" },
-    { url = "https://files.pythonhosted.org/packages/9f/ec/24549de092c9b0bc3167e0beb31a11be58e8595dbcfed2b7821795bb3923/multidict-6.5.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:6eb3bf26cd94eb306e4bc776d0964cc67a7967e4ad9299309f0ff5beec3c62be", size = 42222, upload-time = "2025-06-24T22:14:57.418Z" },
-    { url = "https://files.pythonhosted.org/packages/13/45/54452027ebc0ba660667aab67ae11afb9aaba91f4b5d63cddef045279d94/multidict-6.5.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e5e1a5a99c72d1531501406fcc06b6bf699ebd079dacd6807bb43fc0ff260e5c", size = 253014, upload-time = "2025-06-24T22:14:58.738Z" },
-    { url = "https://files.pythonhosted.org/packages/97/3c/76e7b4c0ce3a8bb43efca679674fba421333fbc8429134072db80e13dcb8/multidict-6.5.1-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:38755bcba18720cb2338bea23a5afcff234445ee75fa11518f6130e22f2ab970", size = 235939, upload-time = "2025-06-24T22:15:00.138Z" },
-    { url = "https://files.pythonhosted.org/packages/86/ce/48e3123a9af61ff2f60e3764b0b15cf4fca22b1299aac281252ac3a590d6/multidict-6.5.1-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f42fef9bcba3c32fd4e4a23c5757fc807d218b249573aaffa8634879f95feb73", size = 262940, upload-time = "2025-06-24T22:15:01.52Z" },
-    { url = "https://files.pythonhosted.org/packages/b3/ab/bccd739faf87051b55df619a0967c8545b4d4a4b90258c5f564ab1752f15/multidict-6.5.1-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:071b962f4cc87469cda90c7cc1c077b76496878b39851d7417a3d994e27fe2c6", size = 260652, upload-time = "2025-06-24T22:15:02.988Z" },
-    { url = "https://files.pythonhosted.org/packages/9a/9c/01f654aad28a5d0d74f2678c1541ae15e711f99603fd84c780078205966e/multidict-6.5.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:627ba4b7ce7c0115981f0fd91921f5d101dfb9972622178aeef84ccce1c2bbf3", size = 250011, upload-time = "2025-06-24T22:15:04.317Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/bc/edf08906e1db7385c6bf36e4179957307f50c44a889493e9b251255be79c/multidict-6.5.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:05dcaed3e5e54f0d0f99a39762b0195274b75016cbf246f600900305581cf1a2", size = 248242, upload-time = "2025-06-24T22:15:06.035Z" },
-    { url = "https://files.pythonhosted.org/packages/b7/c3/1ad054b88b889fda8b62ea9634ac7082567e8dc42b9b794a2c565ef102ab/multidict-6.5.1-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:11f5ecf3e741a18c578d118ad257c5588ca33cc7c46d51c0487d7ae76f072c32", size = 244683, upload-time = "2025-06-24T22:15:07.731Z" },
-    { url = "https://files.pythonhosted.org/packages/57/63/119a76b2095e1bb765816175cafeac7b520f564691abef2572fb80f4f246/multidict-6.5.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:b948eb625411c20b15088fca862c51a39140b9cf7875b5fb47a72bb249fa2f42", size = 257626, upload-time = "2025-06-24T22:15:09.013Z" },
-    { url = "https://files.pythonhosted.org/packages/26/a9/b91a76af5ff49bd088ee76d11eb6134227f5ea50bcd5f6738443b2fe8e05/multidict-6.5.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:fc993a96dfc8300befd03d03df46efdb1d8d5a46911b014e956a4443035f470d", size = 251077, upload-time = "2025-06-24T22:15:10.366Z" },
-    { url = "https://files.pythonhosted.org/packages/2a/fe/b1dc57aaa4de9f5a27543e28bd1f8bff00a316888b7344b5d33258b14b0a/multidict-6.5.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ee2d333380f22d35a56c6461f4579cfe186e143cd0b010b9524ac027de2a34cd", size = 244715, upload-time = "2025-06-24T22:15:11.76Z" },
-    { url = "https://files.pythonhosted.org/packages/51/55/47a82690f71d0141eea49a623bbcc00a4d28770efc7cba8ead75602c9b90/multidict-6.5.1-cp313-cp313-win32.whl", hash = "sha256:5891e3327e6a426ddd443c87339b967c84feb8c022dd425e0c025fa0fcd71e68", size = 41156, upload-time = "2025-06-24T22:15:13.139Z" },
-    { url = "https://files.pythonhosted.org/packages/25/b3/43306e4d7d3a9898574d1dc156b9607540dad581b1d767c992030751b82d/multidict-6.5.1-cp313-cp313-win_amd64.whl", hash = "sha256:fcdaa72261bff25fad93e7cb9bd7112bd4bac209148e698e380426489d8ed8a9", size = 44933, upload-time = "2025-06-24T22:15:14.639Z" },
-    { url = "https://files.pythonhosted.org/packages/30/e2/34cb83c8a4e01b28e2abf30dc90178aa63c9db042be22fa02472cb744b86/multidict-6.5.1-cp313-cp313-win_arm64.whl", hash = "sha256:84292145303f354a35558e601c665cdf87059d87b12777417e2e57ba3eb98903", size = 41967, upload-time = "2025-06-24T22:15:15.856Z" },
-    { url = "https://files.pythonhosted.org/packages/64/08/17d2de9cf749ea9589ecfb7532ab4988e8b113b7624826dba6b7527a58f3/multidict-6.5.1-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:f8316e58db799a1972afbc46770dfaaf20b0847003ab80de6fcb9861194faa3f", size = 80513, upload-time = "2025-06-24T22:15:16.946Z" },
-    { url = "https://files.pythonhosted.org/packages/3e/b9/c9392465a21f7dff164633348b4cf66eef55c4ee48bdcdc00f0a71792779/multidict-6.5.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:d3468f0db187aca59eb56e0aa9f7c8c5427bcb844ad1c86557b4886aeb4484d8", size = 46854, upload-time = "2025-06-24T22:15:18.116Z" },
-    { url = "https://files.pythonhosted.org/packages/2e/24/d79cbed5d0573304bc907dff0e5ad8788a4de891eec832809812b319930e/multidict-6.5.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:228533a5f99f1248cd79f6470779c424d63bc3e10d47c82511c65cc294458445", size = 45724, upload-time = "2025-06-24T22:15:19.241Z" },
-    { url = "https://files.pythonhosted.org/packages/ec/22/232be6c077183719c78131f0e3c3d7134eb2d839e6e50e1c1e69e5ef5965/multidict-6.5.1-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:527076fdf5854901b1246c589af9a8a18b4a308375acb0020b585f696a10c794", size = 251895, upload-time = "2025-06-24T22:15:20.564Z" },
-    { url = "https://files.pythonhosted.org/packages/57/80/85985e1441864b946e79538355b7b47f36206bf6bbaa2fa6d74d8232f2ab/multidict-6.5.1-cp313-cp313t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:9a17a17bad5c22f43e6a6b285dd9c16b1e8f8428202cd9bc22adaac68d0bbfed", size = 229357, upload-time = "2025-06-24T22:15:21.949Z" },
-    { url = "https://files.pythonhosted.org/packages/b1/14/0024d1428b05aedaeea211da232aa6b6ad5c556a8a38b0942df1e54e1fa5/multidict-6.5.1-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:efd1951edab4a6cb65108d411867811f2b283f4b972337fb4269e40142f7f6a6", size = 259262, upload-time = "2025-06-24T22:15:23.455Z" },
-    { url = "https://files.pythonhosted.org/packages/b1/cc/3fe63d61ffc9a48d62f36249e228e330144d990ac01f61169b615a3be471/multidict-6.5.1-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:c07d5f38b39acb4f8f61a7aa4166d140ed628245ff0441630df15340532e3b3c", size = 257998, upload-time = "2025-06-24T22:15:24.907Z" },
-    { url = "https://files.pythonhosted.org/packages/e8/e4/46b38b9a565ccc5d86f55787090670582d51ab0a0d37cfeaf4313b053f7b/multidict-6.5.1-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8a6605dc74cd333be279e1fcb568ea24f7bdf1cf09f83a77360ce4dd32d67f14", size = 247951, upload-time = "2025-06-24T22:15:26.274Z" },
-    { url = "https://files.pythonhosted.org/packages/af/78/58a9bc0674401f1f26418cd58a5ebf35ce91ead76a22b578908acfe0f4e2/multidict-6.5.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:8d64e30ae9ba66ce303a567548a06d64455d97c5dff7052fe428d154274d7174", size = 246786, upload-time = "2025-06-24T22:15:27.695Z" },
-    { url = "https://files.pythonhosted.org/packages/66/24/51142ccee295992e22881cccc54b291308423bbcc836fcf4d2edef1a88d0/multidict-6.5.1-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:2fb5dde79a7f6d98ac5e26a4c9de77ccd2c5224a7ce89aeac6d99df7bbe06464", size = 235030, upload-time = "2025-06-24T22:15:29.391Z" },
-    { url = "https://files.pythonhosted.org/packages/4b/9a/a6f7b75460d3e35b16bf7745c9e3ebb3293324a4295e586563bf50d361f4/multidict-6.5.1-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:8a0d22e8b07cf620e9aeb1582340d00f0031e6a1f3e39d9c2dcbefa8691443b4", size = 253964, upload-time = "2025-06-24T22:15:31.689Z" },
-    { url = "https://files.pythonhosted.org/packages/3d/f8/0b690674bf8f78604eb0a2b0a85d1380ff3003f270440d40def2a3de8cf4/multidict-6.5.1-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:0120ed5cff2082c7a0ed62a8f80f4f6ac266010c722381816462f279bfa19487", size = 247370, upload-time = "2025-06-24T22:15:33.114Z" },
-    { url = "https://files.pythonhosted.org/packages/7f/7d/ca55049d1041c517f294c1755c786539cb7a8dc5033361f20ce3a3d817be/multidict-6.5.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:3dea06ba27401c4b54317aa04791182dc9295e7aa623732dd459071a0e0f65db", size = 242920, upload-time = "2025-06-24T22:15:34.669Z" },
-    { url = "https://files.pythonhosted.org/packages/1e/65/f4afa14f0921751864bb3ef80267f15ecae423483e8da9bc5d3757632bfa/multidict-6.5.1-cp313-cp313t-win32.whl", hash = "sha256:93b21be44f3cfee3be68ed5cd8848a3c0420d76dbd12d74f7776bde6b29e5f33", size = 46968, upload-time = "2025-06-24T22:15:36.023Z" },
-    { url = "https://files.pythonhosted.org/packages/00/0a/13d08be1ca1523df515fb4efd3cf10f153e62d533f55c53f543cd73041e8/multidict-6.5.1-cp313-cp313t-win_amd64.whl", hash = "sha256:c5c18f8646a520cc34d00f65f9f6f77782b8a8c59fd8de10713e0de7f470b5d0", size = 52353, upload-time = "2025-06-24T22:15:37.247Z" },
-    { url = "https://files.pythonhosted.org/packages/4b/dd/84aaf725b236677597a9570d8c1c99af0ba03712149852347969e014d826/multidict-6.5.1-cp313-cp313t-win_arm64.whl", hash = "sha256:eb27128141474a1d545f0531b496c7c2f1c4beff50cb5a828f36eb62fef16c67", size = 44500, upload-time = "2025-06-24T22:15:38.445Z" },
-    { url = "https://files.pythonhosted.org/packages/07/9f/d4719ce55a1d8bf6619e8bb92f1e2e7399026ea85ae0c324ec77ee06c050/multidict-6.5.1-py3-none-any.whl", hash = "sha256:895354f4a38f53a1df2cc3fa2223fa714cff2b079a9f018a76cad35e7f0f044c", size = 12185, upload-time = "2025-06-24T22:16:03.816Z" },
+version = "6.6.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/69/7f/0652e6ed47ab288e3756ea9c0df8b14950781184d4bd7883f4d87dd41245/multidict-6.6.4.tar.gz", hash = "sha256:d2d4e4787672911b48350df02ed3fa3fffdc2f2e8ca06dd6afdf34189b76a9dd", size = 101843, upload-time = "2025-08-11T12:08:48.217Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/05/f6/512ffd8fd8b37fb2680e5ac35d788f1d71bbaf37789d21a820bdc441e565/multidict-6.6.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0ffb87be160942d56d7b87b0fdf098e81ed565add09eaa1294268c7f3caac4c8", size = 76516, upload-time = "2025-08-11T12:06:53.393Z" },
+    { url = "https://files.pythonhosted.org/packages/99/58/45c3e75deb8855c36bd66cc1658007589662ba584dbf423d01df478dd1c5/multidict-6.6.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d191de6cbab2aff5de6c5723101705fd044b3e4c7cfd587a1929b5028b9714b3", size = 45394, upload-time = "2025-08-11T12:06:54.555Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/ca/e8c4472a93a26e4507c0b8e1f0762c0d8a32de1328ef72fd704ef9cc5447/multidict-6.6.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:38a0956dd92d918ad5feff3db8fcb4a5eb7dba114da917e1a88475619781b57b", size = 43591, upload-time = "2025-08-11T12:06:55.672Z" },
+    { url = "https://files.pythonhosted.org/packages/05/51/edf414f4df058574a7265034d04c935aa84a89e79ce90fcf4df211f47b16/multidict-6.6.4-cp312-cp312-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:6865f6d3b7900ae020b495d599fcf3765653bc927951c1abb959017f81ae8287", size = 237215, upload-time = "2025-08-11T12:06:57.213Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/45/8b3d6dbad8cf3252553cc41abea09ad527b33ce47a5e199072620b296902/multidict-6.6.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0a2088c126b6f72db6c9212ad827d0ba088c01d951cee25e758c450da732c138", size = 258299, upload-time = "2025-08-11T12:06:58.946Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/e8/8ca2e9a9f5a435fc6db40438a55730a4bf4956b554e487fa1b9ae920f825/multidict-6.6.4-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:0f37bed7319b848097085d7d48116f545985db988e2256b2e6f00563a3416ee6", size = 242357, upload-time = "2025-08-11T12:07:00.301Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/84/80c77c99df05a75c28490b2af8f7cba2a12621186e0a8b0865d8e745c104/multidict-6.6.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:01368e3c94032ba6ca0b78e7ccb099643466cf24f8dc8eefcfdc0571d56e58f9", size = 268369, upload-time = "2025-08-11T12:07:01.638Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/e9/920bfa46c27b05fb3e1ad85121fd49f441492dca2449c5bcfe42e4565d8a/multidict-6.6.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8fe323540c255db0bffee79ad7f048c909f2ab0edb87a597e1c17da6a54e493c", size = 269341, upload-time = "2025-08-11T12:07:02.943Z" },
+    { url = "https://files.pythonhosted.org/packages/af/65/753a2d8b05daf496f4a9c367fe844e90a1b2cac78e2be2c844200d10cc4c/multidict-6.6.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b8eb3025f17b0a4c3cd08cda49acf312a19ad6e8a4edd9dbd591e6506d999402", size = 256100, upload-time = "2025-08-11T12:07:04.564Z" },
+    { url = "https://files.pythonhosted.org/packages/09/54/655be13ae324212bf0bc15d665a4e34844f34c206f78801be42f7a0a8aaa/multidict-6.6.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:bbc14f0365534d35a06970d6a83478b249752e922d662dc24d489af1aa0d1be7", size = 253584, upload-time = "2025-08-11T12:07:05.914Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/74/ab2039ecc05264b5cec73eb018ce417af3ebb384ae9c0e9ed42cb33f8151/multidict-6.6.4-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:75aa52fba2d96bf972e85451b99d8e19cc37ce26fd016f6d4aa60da9ab2b005f", size = 251018, upload-time = "2025-08-11T12:07:08.301Z" },
+    { url = "https://files.pythonhosted.org/packages/af/0a/ccbb244ac848e56c6427f2392741c06302bbfba49c0042f1eb3c5b606497/multidict-6.6.4-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:4fefd4a815e362d4f011919d97d7b4a1e566f1dde83dc4ad8cfb5b41de1df68d", size = 251477, upload-time = "2025-08-11T12:07:10.248Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/b0/0ed49bba775b135937f52fe13922bc64a7eaf0a3ead84a36e8e4e446e096/multidict-6.6.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:db9801fe021f59a5b375ab778973127ca0ac52429a26e2fd86aa9508f4d26eb7", size = 263575, upload-time = "2025-08-11T12:07:11.928Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/d9/7fb85a85e14de2e44dfb6a24f03c41e2af8697a6df83daddb0e9b7569f73/multidict-6.6.4-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:a650629970fa21ac1fb06ba25dabfc5b8a2054fcbf6ae97c758aa956b8dba802", size = 259649, upload-time = "2025-08-11T12:07:13.244Z" },
+    { url = "https://files.pythonhosted.org/packages/03/9e/b3a459bcf9b6e74fa461a5222a10ff9b544cb1cd52fd482fb1b75ecda2a2/multidict-6.6.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:452ff5da78d4720d7516a3a2abd804957532dd69296cb77319c193e3ffb87e24", size = 251505, upload-time = "2025-08-11T12:07:14.57Z" },
+    { url = "https://files.pythonhosted.org/packages/86/a2/8022f78f041dfe6d71e364001a5cf987c30edfc83c8a5fb7a3f0974cff39/multidict-6.6.4-cp312-cp312-win32.whl", hash = "sha256:8c2fcb12136530ed19572bbba61b407f655e3953ba669b96a35036a11a485793", size = 41888, upload-time = "2025-08-11T12:07:15.904Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/eb/d88b1780d43a56db2cba24289fa744a9d216c1a8546a0dc3956563fd53ea/multidict-6.6.4-cp312-cp312-win_amd64.whl", hash = "sha256:047d9425860a8c9544fed1b9584f0c8bcd31bcde9568b047c5e567a1025ecd6e", size = 46072, upload-time = "2025-08-11T12:07:17.045Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/16/b929320bf5750e2d9d4931835a4c638a19d2494a5b519caaaa7492ebe105/multidict-6.6.4-cp312-cp312-win_arm64.whl", hash = "sha256:14754eb72feaa1e8ae528468f24250dd997b8e2188c3d2f593f9eba259e4b364", size = 43222, upload-time = "2025-08-11T12:07:18.328Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/5d/e1db626f64f60008320aab00fbe4f23fc3300d75892a3381275b3d284580/multidict-6.6.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:f46a6e8597f9bd71b31cc708195d42b634c8527fecbcf93febf1052cacc1f16e", size = 75848, upload-time = "2025-08-11T12:07:19.912Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/aa/8b6f548d839b6c13887253af4e29c939af22a18591bfb5d0ee6f1931dae8/multidict-6.6.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:22e38b2bc176c5eb9c0a0e379f9d188ae4cd8b28c0f53b52bce7ab0a9e534657", size = 45060, upload-time = "2025-08-11T12:07:21.163Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/c6/f5e97e5d99a729bc2aa58eb3ebfa9f1e56a9b517cc38c60537c81834a73f/multidict-6.6.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5df8afd26f162da59e218ac0eefaa01b01b2e6cd606cffa46608f699539246da", size = 43269, upload-time = "2025-08-11T12:07:22.392Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/31/d54eb0c62516776f36fe67f84a732f97e0b0e12f98d5685bebcc6d396910/multidict-6.6.4-cp313-cp313-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:49517449b58d043023720aa58e62b2f74ce9b28f740a0b5d33971149553d72aa", size = 237158, upload-time = "2025-08-11T12:07:23.636Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/1c/8a10c1c25b23156e63b12165a929d8eb49a6ed769fdbefb06e6f07c1e50d/multidict-6.6.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ae9408439537c5afdca05edd128a63f56a62680f4b3c234301055d7a2000220f", size = 257076, upload-time = "2025-08-11T12:07:25.049Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/86/90e20b5771d6805a119e483fd3d1e8393e745a11511aebca41f0da38c3e2/multidict-6.6.4-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:87a32d20759dc52a9e850fe1061b6e41ab28e2998d44168a8a341b99ded1dba0", size = 240694, upload-time = "2025-08-11T12:07:26.458Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/49/484d3e6b535bc0555b52a0a26ba86e4d8d03fd5587d4936dc59ba7583221/multidict-6.6.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:52e3c8d43cdfff587ceedce9deb25e6ae77daba560b626e97a56ddcad3756879", size = 266350, upload-time = "2025-08-11T12:07:27.94Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/b4/aa4c5c379b11895083d50021e229e90c408d7d875471cb3abf721e4670d6/multidict-6.6.4-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ad8850921d3a8d8ff6fbef790e773cecfc260bbfa0566998980d3fa8f520bc4a", size = 267250, upload-time = "2025-08-11T12:07:29.303Z" },
+    { url = "https://files.pythonhosted.org/packages/80/e5/5e22c5bf96a64bdd43518b1834c6d95a4922cc2066b7d8e467dae9b6cee6/multidict-6.6.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:497a2954adc25c08daff36f795077f63ad33e13f19bfff7736e72c785391534f", size = 254900, upload-time = "2025-08-11T12:07:30.764Z" },
+    { url = "https://files.pythonhosted.org/packages/17/38/58b27fed927c07035abc02befacab42491e7388ca105e087e6e0215ead64/multidict-6.6.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:024ce601f92d780ca1617ad4be5ac15b501cc2414970ffa2bb2bbc2bd5a68fa5", size = 252355, upload-time = "2025-08-11T12:07:32.205Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/a1/dad75d23a90c29c02b5d6f3d7c10ab36c3197613be5d07ec49c7791e186c/multidict-6.6.4-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:a693fc5ed9bdd1c9e898013e0da4dcc640de7963a371c0bd458e50e046bf6438", size = 250061, upload-time = "2025-08-11T12:07:33.623Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/1a/ac2216b61c7f116edab6dc3378cca6c70dc019c9a457ff0d754067c58b20/multidict-6.6.4-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:190766dac95aab54cae5b152a56520fd99298f32a1266d66d27fdd1b5ac00f4e", size = 249675, upload-time = "2025-08-11T12:07:34.958Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/79/1916af833b800d13883e452e8e0977c065c4ee3ab7a26941fbfdebc11895/multidict-6.6.4-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:34d8f2a5ffdceab9dcd97c7a016deb2308531d5f0fced2bb0c9e1df45b3363d7", size = 261247, upload-time = "2025-08-11T12:07:36.588Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/65/d1f84fe08ac44a5fc7391cbc20a7cedc433ea616b266284413fd86062f8c/multidict-6.6.4-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:59e8d40ab1f5a8597abcef00d04845155a5693b5da00d2c93dbe88f2050f2812", size = 257960, upload-time = "2025-08-11T12:07:39.735Z" },
+    { url = "https://files.pythonhosted.org/packages/13/b5/29ec78057d377b195ac2c5248c773703a6b602e132a763e20ec0457e7440/multidict-6.6.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:467fe64138cfac771f0e949b938c2e1ada2b5af22f39692aa9258715e9ea613a", size = 250078, upload-time = "2025-08-11T12:07:41.525Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/0e/7e79d38f70a872cae32e29b0d77024bef7834b0afb406ddae6558d9e2414/multidict-6.6.4-cp313-cp313-win32.whl", hash = "sha256:14616a30fe6d0a48d0a48d1a633ab3b8bec4cf293aac65f32ed116f620adfd69", size = 41708, upload-time = "2025-08-11T12:07:43.405Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/34/746696dffff742e97cd6a23da953e55d0ea51fa601fa2ff387b3edcfaa2c/multidict-6.6.4-cp313-cp313-win_amd64.whl", hash = "sha256:40cd05eaeb39e2bc8939451f033e57feaa2ac99e07dbca8afe2be450a4a3b6cf", size = 45912, upload-time = "2025-08-11T12:07:45.082Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/87/3bac136181e271e29170d8d71929cdeddeb77f3e8b6a0c08da3a8e9da114/multidict-6.6.4-cp313-cp313-win_arm64.whl", hash = "sha256:f6eb37d511bfae9e13e82cb4d1af36b91150466f24d9b2b8a9785816deb16605", size = 43076, upload-time = "2025-08-11T12:07:46.746Z" },
+    { url = "https://files.pythonhosted.org/packages/64/94/0a8e63e36c049b571c9ae41ee301ada29c3fee9643d9c2548d7d558a1d99/multidict-6.6.4-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:6c84378acd4f37d1b507dfa0d459b449e2321b3ba5f2338f9b085cf7a7ba95eb", size = 82812, upload-time = "2025-08-11T12:07:48.402Z" },
+    { url = "https://files.pythonhosted.org/packages/25/1a/be8e369dfcd260d2070a67e65dd3990dd635cbd735b98da31e00ea84cd4e/multidict-6.6.4-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0e0558693063c75f3d952abf645c78f3c5dfdd825a41d8c4d8156fc0b0da6e7e", size = 48313, upload-time = "2025-08-11T12:07:49.679Z" },
+    { url = "https://files.pythonhosted.org/packages/26/5a/dd4ade298674b2f9a7b06a32c94ffbc0497354df8285f27317c66433ce3b/multidict-6.6.4-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3f8e2384cb83ebd23fd07e9eada8ba64afc4c759cd94817433ab8c81ee4b403f", size = 46777, upload-time = "2025-08-11T12:07:51.318Z" },
+    { url = "https://files.pythonhosted.org/packages/89/db/98aa28bc7e071bfba611ac2ae803c24e96dd3a452b4118c587d3d872c64c/multidict-6.6.4-cp313-cp313t-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:f996b87b420995a9174b2a7c1a8daf7db4750be6848b03eb5e639674f7963773", size = 229321, upload-time = "2025-08-11T12:07:52.965Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/bc/01ddda2a73dd9d167bd85d0e8ef4293836a8f82b786c63fb1a429bc3e678/multidict-6.6.4-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:cc356250cffd6e78416cf5b40dc6a74f1edf3be8e834cf8862d9ed5265cf9b0e", size = 249954, upload-time = "2025-08-11T12:07:54.423Z" },
+    { url = "https://files.pythonhosted.org/packages/06/78/6b7c0f020f9aa0acf66d0ab4eb9f08375bac9a50ff5e3edb1c4ccd59eafc/multidict-6.6.4-cp313-cp313t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:dadf95aa862714ea468a49ad1e09fe00fcc9ec67d122f6596a8d40caf6cec7d0", size = 228612, upload-time = "2025-08-11T12:07:55.914Z" },
+    { url = "https://files.pythonhosted.org/packages/00/44/3faa416f89b2d5d76e9d447296a81521e1c832ad6e40b92f990697b43192/multidict-6.6.4-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7dd57515bebffd8ebd714d101d4c434063322e4fe24042e90ced41f18b6d3395", size = 257528, upload-time = "2025-08-11T12:07:57.371Z" },
+    { url = "https://files.pythonhosted.org/packages/05/5f/77c03b89af0fcb16f018f668207768191fb9dcfb5e3361a5e706a11db2c9/multidict-6.6.4-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:967af5f238ebc2eb1da4e77af5492219fbd9b4b812347da39a7b5f5c72c0fa45", size = 256329, upload-time = "2025-08-11T12:07:58.844Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/e9/ed750a2a9afb4f8dc6f13dc5b67b514832101b95714f1211cd42e0aafc26/multidict-6.6.4-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2a4c6875c37aae9794308ec43e3530e4aa0d36579ce38d89979bbf89582002bb", size = 247928, upload-time = "2025-08-11T12:08:01.037Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/b5/e0571bc13cda277db7e6e8a532791d4403dacc9850006cb66d2556e649c0/multidict-6.6.4-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:7f683a551e92bdb7fac545b9c6f9fa2aebdeefa61d607510b3533286fcab67f5", size = 245228, upload-time = "2025-08-11T12:08:02.96Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/a3/69a84b0eccb9824491f06368f5b86e72e4af54c3067c37c39099b6687109/multidict-6.6.4-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:3ba5aaf600edaf2a868a391779f7a85d93bed147854925f34edd24cc70a3e141", size = 235869, upload-time = "2025-08-11T12:08:04.746Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/9d/28802e8f9121a6a0804fa009debf4e753d0a59969ea9f70be5f5fdfcb18f/multidict-6.6.4-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:580b643b7fd2c295d83cad90d78419081f53fd532d1f1eb67ceb7060f61cff0d", size = 243446, upload-time = "2025-08-11T12:08:06.332Z" },
+    { url = "https://files.pythonhosted.org/packages/38/ea/6c98add069b4878c1d66428a5f5149ddb6d32b1f9836a826ac764b9940be/multidict-6.6.4-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:37b7187197da6af3ee0b044dbc9625afd0c885f2800815b228a0e70f9a7f473d", size = 252299, upload-time = "2025-08-11T12:08:07.931Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/09/8fe02d204473e14c0af3affd50af9078839dfca1742f025cca765435d6b4/multidict-6.6.4-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:e1b93790ed0bc26feb72e2f08299691ceb6da5e9e14a0d13cc74f1869af327a0", size = 246926, upload-time = "2025-08-11T12:08:09.467Z" },
+    { url = "https://files.pythonhosted.org/packages/37/3d/7b1e10d774a6df5175ecd3c92bff069e77bed9ec2a927fdd4ff5fe182f67/multidict-6.6.4-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:a506a77ddee1efcca81ecbeae27ade3e09cdf21a8ae854d766c2bb4f14053f92", size = 243383, upload-time = "2025-08-11T12:08:10.981Z" },
+    { url = "https://files.pythonhosted.org/packages/50/b0/a6fae46071b645ae98786ab738447de1ef53742eaad949f27e960864bb49/multidict-6.6.4-cp313-cp313t-win32.whl", hash = "sha256:f93b2b2279883d1d0a9e1bd01f312d6fc315c5e4c1f09e112e4736e2f650bc4e", size = 47775, upload-time = "2025-08-11T12:08:12.439Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/0a/2436550b1520091af0600dff547913cb2d66fbac27a8c33bc1b1bccd8d98/multidict-6.6.4-cp313-cp313t-win_amd64.whl", hash = "sha256:6d46a180acdf6e87cc41dc15d8f5c2986e1e8739dc25dbb7dac826731ef381a4", size = 53100, upload-time = "2025-08-11T12:08:13.823Z" },
+    { url = "https://files.pythonhosted.org/packages/97/ea/43ac51faff934086db9c072a94d327d71b7d8b40cd5dcb47311330929ef0/multidict-6.6.4-cp313-cp313t-win_arm64.whl", hash = "sha256:756989334015e3335d087a27331659820d53ba432befdef6a718398b0a8493ad", size = 45501, upload-time = "2025-08-11T12:08:15.173Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/69/b547032297c7e63ba2af494edba695d781af8a0c6e89e4d06cf848b21d80/multidict-6.6.4-py3-none-any.whl", hash = "sha256:27d8f8e125c07cb954e54d75d04905a9bba8a439c1d84aca94949d4d03d8601c", size = 12313, upload-time = "2025-08-11T12:08:46.891Z" },
 ]
 
 [[package]]
@@ -2411,15 +3052,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/da/d9/f7f9379981e39b8c2511c9e0326d212accacb82f12fbfdc1aa2ce2a7b2b6/multiprocess-0.70.16-py39-none-any.whl", hash = "sha256:a0bafd3ae1b732eac64be2e72038231c1ba97724b60b09400d68f229fcc2fbf3", size = 133351, upload-time = "2024-01-28T18:52:31.981Z" },
 ]
 
-[[package]]
-name = "mypy-extensions"
-version = "1.1.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/a2/6e/371856a3fb9d31ca8dac321cda606860fa4548858c0cc45d9d1d4ca2628b/mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558", size = 6343, upload-time = "2025-04-22T14:54:24.164Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" },
-]
-
 [[package]]
 name = "myst-parser"
 version = "4.0.1"
@@ -2439,11 +3071,130 @@ wheels = [
 
 [[package]]
 name = "narwhals"
-version = "1.44.0"
+version = "2.1.2"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/56/e5/0b875d29e2a4d112c58fef6aac2ed3a73bbdd4d8d0dce722fd154357248a/narwhals-1.44.0.tar.gz", hash = "sha256:8cf0616d4f6f21225b3b56fcde96ccab6d05023561a0f162402aa9b8c33ad31d", size = 499250, upload-time = "2025-06-23T08:28:08.653Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/37/f0/b0550d9b84759f4d045fd43da2f811e8b23dc2001e38c3254456da7f3adb/narwhals-2.1.2.tar.gz", hash = "sha256:afb9597e76d5b38c2c4b7c37d27a2418b8cc8049a66b8a5aca9581c92ae8f8bf", size = 533772, upload-time = "2025-08-15T08:24:50.916Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/ff/fb/12f4a971467aac3cb7cbccbbfca5d0f05e23722068112c1ac4a393613ebe/narwhals-1.44.0-py3-none-any.whl", hash = "sha256:a170ea0bab4cf1f323d9f8bf17f2d7042c3d73802bea321996b39bf075d57de5", size = 365240, upload-time = "2025-06-23T08:28:06.314Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/01/824fff6789ce92a53242d24b6f5f3a982df2f610c51020f934bf878d2a99/narwhals-2.1.2-py3-none-any.whl", hash = "sha256:136b2f533a4eb3245c54254f137c5d14cef5c4668cff67dc6e911a602acd3547", size = 392064, upload-time = "2025-08-15T08:24:48.788Z" },
+]
+
+[[package]]
+name = "nemo-automodel"
+source = { editable = "3rdparty/Automodel-workspace/Automodel" }
+dependencies = [
+    { name = "bitsandbytes", marker = "platform_machine == 'x86_64' and sys_platform != 'darwin'" },
+    { name = "datasets" },
+    { name = "liger-kernel", marker = "platform_machine == 'x86_64' and sys_platform != 'darwin'" },
+    { name = "megatron-fsdp" },
+    { name = "pybind11" },
+    { name = "pyyaml" },
+    { name = "torch", version = "2.8.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
+    { name = "torch", version = "2.8.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin'" },
+    { name = "torchao" },
+    { name = "torchdata" },
+    { name = "transformers" },
+    { name = "wandb" },
+]
+
+[package.optional-dependencies]
+fa = [
+    { name = "flash-attn" },
+]
+moe = [
+    { name = "transformer-engine", extra = ["pytorch"] },
+]
+vlm = [
+    { name = "backoff" },
+    { name = "mistral-common", extra = ["opencv"] },
+    { name = "numba" },
+    { name = "numpy" },
+    { name = "pillow" },
+    { name = "qwen-vl-utils", extra = ["decord"] },
+    { name = "timm" },
+    { name = "torchcodec" },
+    { name = "transformers" },
+]
+
+[package.dev-dependencies]
+build = [
+    { name = "setuptools" },
+    { name = "torch", version = "2.8.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
+    { name = "torch", version = "2.8.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin'" },
+]
+dev = [
+    { name = "cut-cross-entropy" },
+]
+docs = [
+    { name = "myst-parser" },
+    { name = "nvidia-sphinx-theme" },
+    { name = "sphinx" },
+    { name = "sphinx-autobuild" },
+    { name = "sphinx-autodoc2" },
+    { name = "sphinx-copybutton" },
+]
+linting = [
+    { name = "import-linter" },
+    { name = "pre-commit" },
+    { name = "ruff" },
+]
+test = [
+    { name = "coverage" },
+    { name = "peft" },
+    { name = "pytest" },
+]
+
+[package.metadata]
+requires-dist = [
+    { name = "backoff", marker = "extra == 'vlm'" },
+    { name = "bitsandbytes", marker = "platform_machine == 'x86_64' and sys_platform != 'darwin'", specifier = "==0.45.5" },
+    { name = "datasets", specifier = ">=4.0.0" },
+    { name = "flash-attn", marker = "extra == 'fa'", specifier = "<=2.8.3" },
+    { name = "liger-kernel", marker = "platform_machine == 'x86_64' and sys_platform != 'darwin'", specifier = ">=0.5.9" },
+    { name = "megatron-fsdp" },
+    { name = "mistral-common", extras = ["opencv"], marker = "extra == 'vlm'" },
+    { name = "numba", marker = "extra == 'vlm'" },
+    { name = "numpy", marker = "extra == 'vlm'" },
+    { name = "pillow", marker = "extra == 'vlm'" },
+    { name = "pybind11" },
+    { name = "pyyaml" },
+    { name = "qwen-vl-utils", extras = ["decord"], marker = "extra == 'vlm'" },
+    { name = "timm", marker = "extra == 'vlm'", specifier = "==1.0.16" },
+    { name = "torch", marker = "sys_platform != 'darwin'", specifier = "<=2.8.0", index = "https://download.pytorch.org/whl/cu129" },
+    { name = "torch", marker = "sys_platform == 'darwin'", specifier = "<=2.8.0", index = "https://pypi.org/simple" },
+    { name = "torchao" },
+    { name = "torchcodec", marker = "extra == 'vlm'" },
+    { name = "torchdata" },
+    { name = "transformer-engine", extras = ["pytorch"], marker = "extra == 'moe'", specifier = "==2.8.0" },
+    { name = "transformers", specifier = "<=4.55.4" },
+    { name = "transformers", marker = "extra == 'vlm'", specifier = "<=4.55.4" },
+    { name = "wandb" },
+]
+provides-extras = ["vlm", "fa", "moe"]
+
+[package.metadata.requires-dev]
+build = [
+    { name = "setuptools" },
+    { name = "torch", marker = "sys_platform != 'darwin'", specifier = "<=2.8.0", index = "https://download.pytorch.org/whl/cu129" },
+    { name = "torch", marker = "sys_platform == 'darwin'", specifier = "<=2.8.0", index = "https://pypi.org/simple" },
+]
+dev = [{ name = "cut-cross-entropy", git = "https://github.com/apple/ml-cross-entropy.git?rev=87a86ab" }]
+docs = [
+    { name = "myst-parser" },
+    { name = "nvidia-sphinx-theme" },
+    { name = "sphinx" },
+    { name = "sphinx-autobuild" },
+    { name = "sphinx-autodoc2" },
+    { name = "sphinx-copybutton" },
+]
+linting = [
+    { name = "import-linter", specifier = "~=2.4" },
+    { name = "pre-commit", specifier = ">=4.2.0" },
+    { name = "ruff", specifier = "~=0.9.0" },
+]
+test = [
+    { name = "coverage" },
+    { name = "peft" },
+    { name = "pytest" },
 ]
 
 [[package]]
@@ -2460,35 +3211,59 @@ dependencies = [
     { name = "matplotlib" },
     { name = "mlflow" },
     { name = "ninja" },
+    { name = "num2words" },
     { name = "numpy" },
     { name = "nvidia-ml-py" },
+    { name = "nvidia-nvshmem-cu12", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "nvtx" },
     { name = "omegaconf" },
+    { name = "pillow" },
     { name = "plotly" },
+    { name = "pyzmq" },
     { name = "ray", extra = ["default"] },
     { name = "rich" },
     { name = "setuptools" },
+    { name = "swanlab" },
+    { name = "sympy" },
     { name = "tensorboard" },
     { name = "tiktoken" },
-    { name = "torch" },
+    { name = "torch", version = "2.8.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
+    { name = "torch", version = "2.8.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin'" },
     { name = "torchdata" },
+    { name = "torchvision", version = "0.23.0", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" },
+    { name = "torchvision", version = "0.23.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
+    { name = "torchvision", version = "0.23.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
     { name = "transformers" },
-    { name = "triton" },
+    { name = "triton", version = "3.4.0", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "wandb" },
 ]
 
 [package.optional-dependencies]
 automodel = [
+    { name = "causal-conv1d" },
     { name = "flash-attn" },
+    { name = "mamba-ssm" },
+    { name = "nemo-automodel" },
+    { name = "vllm" },
 ]
 mcore = [
     { name = "flash-attn" },
+    { name = "megatron-bridge" },
     { name = "megatron-core" },
-    { name = "nemo-tron" },
     { name = "transformer-engine", extra = ["pytorch"] },
+    { name = "vllm" },
+]
+penguin = [
+    { name = "penguin" },
 ]
 vllm = [
+    { name = "causal-conv1d" },
+    { name = "cuda-python" },
+    { name = "deep-ep" },
+    { name = "deep-gemm" },
     { name = "flash-attn" },
+    { name = "mamba-ssm" },
+    { name = "num2words" },
     { name = "vllm" },
 ]
 
@@ -2500,7 +3275,8 @@ build = [
     { name = "psutil" },
     { name = "pybind11" },
     { name = "setuptools" },
-    { name = "torch" },
+    { name = "torch", version = "2.8.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
+    { name = "torch", version = "2.8.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin'" },
 ]
 dev = [
     { name = "pre-commit" },
@@ -2510,17 +3286,23 @@ dev = [
     { name = "types-requests" },
 ]
 docs = [
+    { name = "gitpython" },
     { name = "myst-parser" },
     { name = "nvidia-sphinx-theme" },
+    { name = "python-dotenv" },
     { name = "sphinx" },
     { name = "sphinx-autobuild" },
     { name = "sphinx-autodoc2" },
     { name = "sphinx-copybutton" },
+    { name = "sphinx-design" },
+    { name = "sphinxcontrib-mermaid" },
+    { name = "swagger-plugin-for-sphinx" },
 ]
 test = [
     { name = "pytest" },
     { name = "pytest-asyncio" },
     { name = "pytest-cov" },
+    { name = "pytest-testmon" },
     { name = "pytest-timeout" },
 ]
 
@@ -2528,38 +3310,59 @@ test = [
 requires-dist = [
     { name = "accelerate", specifier = ">=0.26" },
     { name = "blobfile" },
+    { name = "causal-conv1d", marker = "extra == 'automodel'", git = "https://github.com/Dao-AILab/causal-conv1d?tag=v1.5.0.post8" },
+    { name = "causal-conv1d", marker = "extra == 'vllm'", git = "https://github.com/Dao-AILab/causal-conv1d?tag=v1.5.0.post8" },
     { name = "colored", specifier = "==2.2.3" },
+    { name = "cuda-python", marker = "extra == 'vllm'" },
     { name = "datasets", specifier = ">=4.0.0" },
     { name = "debugpy" },
-    { name = "flash-attn", marker = "extra == 'automodel'", specifier = "==2.7.4.post1" },
-    { name = "flash-attn", marker = "extra == 'mcore'", specifier = "==2.7.4.post1" },
-    { name = "flash-attn", marker = "extra == 'vllm'", specifier = "==2.7.4.post1" },
+    { name = "deep-ep", marker = "extra == 'vllm'", git = "https://github.com/deepseek-ai/DeepEP.git?rev=e3908bf5bd0cc6265bcb225d15cd8c996d4759ef" },
+    { name = "deep-gemm", marker = "extra == 'vllm'", git = "https://github.com/deepseek-ai/DeepGEMM.git?rev=7b6b5563b9d4c1ae07ffbce7f78ad3ac9204827c" },
+    { name = "flash-attn", marker = "extra == 'automodel'", specifier = "==2.8.1" },
+    { name = "flash-attn", marker = "extra == 'mcore'", specifier = "==2.8.1" },
+    { name = "flash-attn", marker = "extra == 'vllm'", specifier = "==2.8.1" },
     { name = "hydra-core" },
+    { name = "mamba-ssm", marker = "extra == 'automodel'", git = "https://github.com/state-spaces/mamba.git?rev=2e16fc3062cdcd4ebef27a9aa4442676e1c7edf4" },
+    { name = "mamba-ssm", marker = "extra == 'vllm'", git = "https://github.com/state-spaces/mamba.git?rev=2e16fc3062cdcd4ebef27a9aa4442676e1c7edf4" },
     { name = "math-verify" },
     { name = "matplotlib" },
+    { name = "megatron-bridge", marker = "extra == 'mcore'", editable = "3rdparty/Megatron-Bridge-workspace" },
     { name = "megatron-core", marker = "extra == 'mcore'", editable = "3rdparty/Megatron-LM-workspace" },
-    { name = "mlflow" },
-    { name = "nemo-tron", marker = "extra == 'mcore'", editable = "3rdparty/NeMo-workspace" },
+    { name = "mlflow", specifier = ">=3.5.0,<3.6.0" },
+    { name = "nemo-automodel", marker = "extra == 'automodel'", editable = "3rdparty/Automodel-workspace/Automodel" },
     { name = "ninja" },
+    { name = "num2words", specifier = ">=0.5.14" },
+    { name = "num2words", marker = "extra == 'vllm'", specifier = ">=0.5.14" },
     { name = "numpy" },
     { name = "nvidia-ml-py" },
+    { name = "nvidia-nvshmem-cu12", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" },
     { name = "nvtx" },
     { name = "omegaconf" },
+    { name = "penguin", marker = "extra == 'penguin'", editable = "3rdparty/Penguin-workspace" },
+    { name = "pillow", specifier = ">=11.3.0" },
     { name = "plotly" },
-    { name = "ray", extras = ["default"], specifier = "==2.46.0" },
+    { name = "pyzmq" },
+    { name = "ray", extras = ["default"], specifier = "==2.49.2" },
     { name = "rich" },
     { name = "setuptools" },
+    { name = "swanlab" },
+    { name = "sympy", specifier = ">=1.14.0" },
     { name = "tensorboard" },
     { name = "tiktoken" },
-    { name = "torch", specifier = "==2.7.0", index = "https://download.pytorch.org/whl/cu128" },
+    { name = "torch", marker = "sys_platform != 'darwin'", specifier = "==2.8.0", index = "https://download.pytorch.org/whl/cu129" },
+    { name = "torch", marker = "sys_platform == 'darwin'", specifier = "==2.8.0", index = "https://pypi.org/simple" },
     { name = "torchdata" },
-    { name = "transformer-engine", extras = ["pytorch"], marker = "extra == 'mcore'", specifier = "==2.3.0" },
-    { name = "transformers", specifier = ">=4.51.0" },
-    { name = "triton", index = "https://download.pytorch.org/whl/cu128" },
-    { name = "vllm", marker = "extra == 'vllm'", specifier = "==0.9.0" },
+    { name = "torchvision", marker = "sys_platform != 'darwin'", specifier = ">=0.22.0", index = "https://download.pytorch.org/whl/cu129" },
+    { name = "torchvision", marker = "sys_platform == 'darwin'", specifier = ">=0.22.0", index = "https://pypi.org/simple" },
+    { name = "transformer-engine", extras = ["pytorch"], marker = "extra == 'mcore'", specifier = "==2.8.0" },
+    { name = "transformers", specifier = ">=4.55.4" },
+    { name = "triton", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')", index = "https://download.pytorch.org/whl/cu129" },
+    { name = "vllm", marker = "extra == 'automodel'", specifier = "==0.11.0" },
+    { name = "vllm", marker = "extra == 'mcore'", specifier = "==0.11.0" },
+    { name = "vllm", marker = "extra == 'vllm'", specifier = "==0.11.0" },
     { name = "wandb" },
 ]
-provides-extras = ["automodel", "vllm", "mcore"]
+provides-extras = ["automodel", "vllm", "mcore", "penguin"]
 
 [package.metadata.requires-dev]
 build = [
@@ -2569,95 +3372,37 @@ build = [
     { name = "psutil" },
     { name = "pybind11" },
     { name = "setuptools" },
-    { name = "torch", specifier = "==2.7.0", index = "https://download.pytorch.org/whl/cu128" },
+    { name = "torch", marker = "sys_platform != 'darwin'", specifier = "==2.8.0", index = "https://download.pytorch.org/whl/cu129" },
+    { name = "torch", marker = "sys_platform == 'darwin'", specifier = "==2.8.0", index = "https://pypi.org/simple" },
 ]
 dev = [
-    { name = "pre-commit", specifier = "==3.6.0" },
+    { name = "pre-commit", specifier = ">=4.2.0" },
     { name = "pyrefly", specifier = "==0.24.2" },
     { name = "ruff", specifier = "==0.9.9" },
     { name = "types-pyyaml" },
     { name = "types-requests" },
 ]
 docs = [
+    { name = "gitpython", specifier = ">=3.1.45" },
     { name = "myst-parser" },
     { name = "nvidia-sphinx-theme" },
+    { name = "python-dotenv" },
     { name = "sphinx" },
     { name = "sphinx-autobuild" },
     { name = "sphinx-autodoc2" },
     { name = "sphinx-copybutton" },
+    { name = "sphinx-design" },
+    { name = "sphinxcontrib-mermaid" },
+    { name = "swagger-plugin-for-sphinx" },
 ]
 test = [
     { name = "pytest", specifier = ">=7.0.0" },
     { name = "pytest-asyncio" },
     { name = "pytest-cov" },
+    { name = "pytest-testmon" },
     { name = "pytest-timeout" },
 ]
 
-[[package]]
-name = "nemo-run"
-version = "0.5.0rc0.dev0"
-source = { git = "https://github.com/NVIDIA-NeMo/Run?rev=414f0077c648fde2c71bb1186e97ccbf96d6844c#414f0077c648fde2c71bb1186e97ccbf96d6844c" }
-dependencies = [
-    { name = "catalogue" },
-    { name = "cryptography" },
-    { name = "fabric" },
-    { name = "fiddle" },
-    { name = "inquirerpy" },
-    { name = "jinja2" },
-    { name = "networkx" },
-    { name = "omegaconf" },
-    { name = "packaging" },
-    { name = "rich" },
-    { name = "toml" },
-    { name = "torchx" },
-    { name = "typer" },
-]
-
-[[package]]
-name = "nemo-tron"
-source = { editable = "3rdparty/NeMo-workspace" }
-dependencies = [
-    { name = "braceexpand" },
-    { name = "cloudpickle" },
-    { name = "fiddle" },
-    { name = "h5py" },
-    { name = "hatchling" },
-    { name = "ijson" },
-    { name = "lightning" },
-    { name = "matplotlib" },
-    { name = "nemo-run" },
-    { name = "onnx" },
-    { name = "scikit-learn" },
-    { name = "webdataset" },
-    { name = "wget" },
-]
-
-[package.metadata]
-requires-dist = [
-    { name = "braceexpand" },
-    { name = "cloudpickle" },
-    { name = "fiddle" },
-    { name = "h5py" },
-    { name = "hatchling" },
-    { name = "ijson" },
-    { name = "lightning" },
-    { name = "matplotlib" },
-    { name = "nemo-run", git = "https://github.com/NVIDIA-NeMo/Run?rev=414f0077c648fde2c71bb1186e97ccbf96d6844c" },
-    { name = "onnx" },
-    { name = "scikit-learn" },
-    { name = "webdataset" },
-    { name = "wget" },
-]
-
-[[package]]
-name = "nest-asyncio"
-version = "1.6.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/83/f8/51569ac65d696c8ecbee95938f89d4abf00f47d58d48f6fbabfe8f0baefe/nest_asyncio-1.6.0.tar.gz", hash = "sha256:6f172d5449aca15afd6c646851f4e31e02c598d553a667e38cafa997cfec55fe", size = 7418, upload-time = "2024-01-21T14:25:19.227Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/a0/c4/c2971a3ba4c6103a3d10c4b0f24f461ddc027f0f09763220cf35ca1401b3/nest_asyncio-1.6.0-py3-none-any.whl", hash = "sha256:87af6efd6b5e897c81050477ef65c62e2b2f35d51703cae01aff2905b1852e1c", size = 5195, upload-time = "2024-01-21T14:25:17.223Z" },
-]
-
 [[package]]
 name = "networkx"
 version = "3.5"
@@ -2669,26 +3414,28 @@ wheels = [
 
 [[package]]
 name = "ninja"
-version = "1.11.1.4"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/95/d4/6b0324541018561c5e73e617bd16f20a4fc17d1179bb3b3520b6ca8beb7b/ninja-1.11.1.4.tar.gz", hash = "sha256:6aa39f6e894e0452e5b297327db00019383ae55d5d9c57c73b04f13bf79d438a", size = 201256, upload-time = "2025-03-22T06:46:43.46Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/4f/b1/3a61b348936b62a386465b1937cd778fa3a5748582e26d832dbab844ff27/ninja-1.11.1.4-py3-none-macosx_10_9_universal2.whl", hash = "sha256:b33923c8da88e8da20b6053e38deb433f53656441614207e01d283ad02c5e8e7", size = 279071, upload-time = "2025-03-22T06:46:17.806Z" },
-    { url = "https://files.pythonhosted.org/packages/12/42/4c94fdad51fcf1f039a156e97de9e4d564c2a8cc0303782d36f9bd893a4b/ninja-1.11.1.4-py3-none-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:cede0af00b58e27b31f2482ba83292a8e9171cdb9acc2c867a3b6e40b3353e43", size = 472026, upload-time = "2025-03-22T06:46:19.974Z" },
-    { url = "https://files.pythonhosted.org/packages/eb/7a/455d2877fe6cf99886849c7f9755d897df32eaf3a0fba47b56e615f880f7/ninja-1.11.1.4-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:096487995473320de7f65d622c3f1d16c3ad174797602218ca8c967f51ec38a0", size = 422814, upload-time = "2025-03-22T06:46:21.235Z" },
-    { url = "https://files.pythonhosted.org/packages/e3/ad/fb6cca942528e25e8e0ab0f0cf98fe007319bf05cf69d726c564b815c4af/ninja-1.11.1.4-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d3090d4488fadf6047d0d7a1db0c9643a8d391f0d94729554dbb89b5bdc769d7", size = 156965, upload-time = "2025-03-22T06:46:23.45Z" },
-    { url = "https://files.pythonhosted.org/packages/a8/e7/d94a1b60031b115dd88526834b3da69eaacdc3c1a6769773ca8e2b1386b5/ninja-1.11.1.4-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ecce44a00325a93631792974659cf253a815cc6da4ec96f89742925dfc295a0d", size = 179937, upload-time = "2025-03-22T06:46:24.728Z" },
-    { url = "https://files.pythonhosted.org/packages/08/cc/e9316a28235409e9363794fc3d0b3083e48dd80d441006de66421e55f364/ninja-1.11.1.4-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9c29bb66d2aa46a2409ab369ea804c730faec7652e8c22c1e428cc09216543e5", size = 157020, upload-time = "2025-03-22T06:46:26.046Z" },
-    { url = "https://files.pythonhosted.org/packages/e3/30/389b22300541aa5f2e9dad322c4de2f84be4e32aa4e8babd9160d620b5f1/ninja-1.11.1.4-py3-none-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:055f386fb550c2c9d6157e45e20a84d29c47968876b9c5794ae2aec46f952306", size = 130389, upload-time = "2025-03-22T06:46:27.174Z" },
-    { url = "https://files.pythonhosted.org/packages/a9/10/e27f35cb92813aabbb7ae771b1685b45be1cc8a0798ce7d4bfd08d142b93/ninja-1.11.1.4-py3-none-musllinux_1_1_aarch64.whl", hash = "sha256:f6186d7607bb090c3be1e10c8a56b690be238f953616626f5032238c66e56867", size = 372435, upload-time = "2025-03-22T06:46:28.637Z" },
-    { url = "https://files.pythonhosted.org/packages/c2/26/e3559619756739aae124c6abf7fe41f7e546ab1209cfbffb13137bff2d2e/ninja-1.11.1.4-py3-none-musllinux_1_1_i686.whl", hash = "sha256:cf4453679d15babc04ba023d68d091bb613091b67101c88f85d2171c6621c6eb", size = 419300, upload-time = "2025-03-22T06:46:30.392Z" },
-    { url = "https://files.pythonhosted.org/packages/35/46/809e4e9572570991b8e6f88f3583807d017371ab4cb09171cbc72a7eb3e4/ninja-1.11.1.4-py3-none-musllinux_1_1_ppc64le.whl", hash = "sha256:d4a6f159b08b0ac4aca5ee1572e3e402f969139e71d85d37c0e2872129098749", size = 420239, upload-time = "2025-03-22T06:46:32.442Z" },
-    { url = "https://files.pythonhosted.org/packages/e6/64/5cb5710d15f844edf02ada577f8eddfdcd116f47eec15850f3371a3a4b33/ninja-1.11.1.4-py3-none-musllinux_1_1_s390x.whl", hash = "sha256:c3b96bd875f3ef1db782470e9e41d7508905a0986571f219d20ffed238befa15", size = 415986, upload-time = "2025-03-22T06:46:33.821Z" },
-    { url = "https://files.pythonhosted.org/packages/95/b2/0e9ab1d926f423b12b09925f78afcc5e48b3c22e7121be3ddf6c35bf06a3/ninja-1.11.1.4-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:cf554e73f72c04deb04d0cf51f5fdb1903d9c9ca3d2344249c8ce3bd616ebc02", size = 379657, upload-time = "2025-03-22T06:46:36.166Z" },
-    { url = "https://files.pythonhosted.org/packages/c8/3e/fd6d330d0434168e7fe070d414b57dd99c4c133faa69c05b42a3cbdc6c13/ninja-1.11.1.4-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:cfdd09776436a1ff3c4a2558d3fc50a689fb9d7f1bdbc3e6f7b8c2991341ddb3", size = 454466, upload-time = "2025-03-22T06:46:37.413Z" },
-    { url = "https://files.pythonhosted.org/packages/e6/df/a25f3ad0b1c59d1b90564096e4fd89a6ca30d562b1e942f23880c3000b89/ninja-1.11.1.4-py3-none-win32.whl", hash = "sha256:2ab67a41c90bea5ec4b795bab084bc0b3b3bb69d3cd21ca0294fc0fc15a111eb", size = 255931, upload-time = "2025-03-22T06:46:39.171Z" },
-    { url = "https://files.pythonhosted.org/packages/5b/10/9b8fe9ac004847490cc7b54896124c01ce2d87d95dc60aabd0b8591addff/ninja-1.11.1.4-py3-none-win_amd64.whl", hash = "sha256:4617b3c12ff64b611a7d93fd9e378275512bb36eff8babff7c83f5116b4f8d66", size = 296461, upload-time = "2025-03-22T06:46:40.532Z" },
-    { url = "https://files.pythonhosted.org/packages/b9/58/612a17593c2d117f96c7f6b7f1e6570246bddc4b1e808519403a1417f217/ninja-1.11.1.4-py3-none-win_arm64.whl", hash = "sha256:5713cf50c5be50084a8693308a63ecf9e55c3132a78a41ab1363a28b6caaaee1", size = 271441, upload-time = "2025-03-22T06:46:42.147Z" },
+version = "1.13.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/43/73/79a0b22fc731989c708068427579e840a6cf4e937fe7ae5c5d0b7356ac22/ninja-1.13.0.tar.gz", hash = "sha256:4a40ce995ded54d9dc24f8ea37ff3bf62ad192b547f6c7126e7e25045e76f978", size = 242558, upload-time = "2025-08-11T15:10:19.421Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3c/74/d02409ed2aa865e051b7edda22ad416a39d81a84980f544f8de717cab133/ninja-1.13.0-py3-none-macosx_10_9_universal2.whl", hash = "sha256:fa2a8bfc62e31b08f83127d1613d10821775a0eb334197154c4d6067b7068ff1", size = 310125, upload-time = "2025-08-11T15:09:50.971Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/de/6e1cd6b84b412ac1ef327b76f0641aeb5dcc01e9d3f9eee0286d0c34fd93/ninja-1.13.0-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:3d00c692fb717fd511abeb44b8c5d00340c36938c12d6538ba989fe764e79630", size = 177467, upload-time = "2025-08-11T15:09:52.767Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/83/49320fb6e58ae3c079381e333575fdbcf1cca3506ee160a2dcce775046fa/ninja-1.13.0-py3-none-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:be7f478ff9f96a128b599a964fc60a6a87b9fa332ee1bd44fa243ac88d50291c", size = 187834, upload-time = "2025-08-11T15:09:54.115Z" },
+    { url = "https://files.pythonhosted.org/packages/56/c7/ba22748fb59f7f896b609cd3e568d28a0a367a6d953c24c461fe04fc4433/ninja-1.13.0-py3-none-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:60056592cf495e9a6a4bea3cd178903056ecb0943e4de45a2ea825edb6dc8d3e", size = 202736, upload-time = "2025-08-11T15:09:55.745Z" },
+    { url = "https://files.pythonhosted.org/packages/79/22/d1de07632b78ac8e6b785f41fa9aad7a978ec8c0a1bf15772def36d77aac/ninja-1.13.0-py3-none-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:1c97223cdda0417f414bf864cfb73b72d8777e57ebb279c5f6de368de0062988", size = 179034, upload-time = "2025-08-11T15:09:57.394Z" },
+    { url = "https://files.pythonhosted.org/packages/ed/de/0e6edf44d6a04dabd0318a519125ed0415ce437ad5a1ec9b9be03d9048cf/ninja-1.13.0-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fb46acf6b93b8dd0322adc3a4945452a4e774b75b91293bafcc7b7f8e6517dfa", size = 180716, upload-time = "2025-08-11T15:09:58.696Z" },
+    { url = "https://files.pythonhosted.org/packages/54/28/938b562f9057aaa4d6bfbeaa05e81899a47aebb3ba6751e36c027a7f5ff7/ninja-1.13.0-py3-none-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:4be9c1b082d244b1ad7ef41eb8ab088aae8c109a9f3f0b3e56a252d3e00f42c1", size = 146843, upload-time = "2025-08-11T15:10:00.046Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/fb/d06a3838de4f8ab866e44ee52a797b5491df823901c54943b2adb0389fbb/ninja-1.13.0-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:6739d3352073341ad284246f81339a384eec091d9851a886dfa5b00a6d48b3e2", size = 154402, upload-time = "2025-08-11T15:10:01.657Z" },
+    { url = "https://files.pythonhosted.org/packages/31/bf/0d7808af695ceddc763cf251b84a9892cd7f51622dc8b4c89d5012779f06/ninja-1.13.0-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:11be2d22027bde06f14c343f01d31446747dbb51e72d00decca2eb99be911e2f", size = 552388, upload-time = "2025-08-11T15:10:03.349Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/70/c99d0c2c809f992752453cce312848abb3b1607e56d4cd1b6cded317351a/ninja-1.13.0-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:aa45b4037b313c2f698bc13306239b8b93b4680eb47e287773156ac9e9304714", size = 472501, upload-time = "2025-08-11T15:10:04.735Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/43/c217b1153f0e499652f5e0766da8523ce3480f0a951039c7af115e224d55/ninja-1.13.0-py3-none-musllinux_1_2_i686.whl", hash = "sha256:5f8e1e8a1a30835eeb51db05cf5a67151ad37542f5a4af2a438e9490915e5b72", size = 638280, upload-time = "2025-08-11T15:10:06.512Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/45/9151bba2c8d0ae2b6260f71696330590de5850e5574b7b5694dce6023e20/ninja-1.13.0-py3-none-musllinux_1_2_ppc64le.whl", hash = "sha256:3d7d7779d12cb20c6d054c61b702139fd23a7a964ec8f2c823f1ab1b084150db", size = 642420, upload-time = "2025-08-11T15:10:08.35Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/fb/95752eb635bb8ad27d101d71bef15bc63049de23f299e312878fc21cb2da/ninja-1.13.0-py3-none-musllinux_1_2_riscv64.whl", hash = "sha256:d741a5e6754e0bda767e3274a0f0deeef4807f1fec6c0d7921a0244018926ae5", size = 585106, upload-time = "2025-08-11T15:10:09.818Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/31/aa56a1a286703800c0cbe39fb4e82811c277772dc8cd084f442dd8e2938a/ninja-1.13.0-py3-none-musllinux_1_2_s390x.whl", hash = "sha256:e8bad11f8a00b64137e9b315b137d8bb6cbf3086fbdc43bf1f90fd33324d2e96", size = 707138, upload-time = "2025-08-11T15:10:11.366Z" },
+    { url = "https://files.pythonhosted.org/packages/34/6f/5f5a54a1041af945130abdb2b8529cbef0cdcbbf9bcf3f4195378319d29a/ninja-1.13.0-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:b4f2a072db3c0f944c32793e91532d8948d20d9ab83da9c0c7c15b5768072200", size = 581758, upload-time = "2025-08-11T15:10:13.295Z" },
+    { url = "https://files.pythonhosted.org/packages/95/97/51359c77527d45943fe7a94d00a3843b81162e6c4244b3579fe8fc54cb9c/ninja-1.13.0-py3-none-win32.whl", hash = "sha256:8cfbb80b4a53456ae8a39f90ae3d7a2129f45ea164f43fadfa15dc38c4aef1c9", size = 267201, upload-time = "2025-08-11T15:10:15.158Z" },
+    { url = "https://files.pythonhosted.org/packages/29/45/c0adfbfb0b5895aa18cec400c535b4f7ff3e52536e0403602fc1a23f7de9/ninja-1.13.0-py3-none-win_amd64.whl", hash = "sha256:fb8ee8719f8af47fed145cced4a85f0755dd55d45b2bddaf7431fa89803c5f3e", size = 309975, upload-time = "2025-08-11T15:10:16.697Z" },
+    { url = "https://files.pythonhosted.org/packages/df/93/a7b983643d1253bb223234b5b226e69de6cda02b76cdca7770f684b795f5/ninja-1.13.0-py3-none-win_arm64.whl", hash = "sha256:3c0b40b1f0bba764644385319028650087b4c1b18cdfa6f45cb39a3669b81aa9", size = 290806, upload-time = "2025-08-11T15:10:18.018Z" },
 ]
 
 [[package]]
@@ -2715,6 +3462,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/d2/1d/1b658dbd2b9fa9c4c9f32accbfc0205d532c8c6194dc0f2a4c0428e7128a/nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9", size = 22314, upload-time = "2024-06-04T18:44:08.352Z" },
 ]
 
+[[package]]
+name = "num2words"
+version = "0.5.14"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "docopt" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/f6/58/ad645bd38b4b648eb2fc2ba1b909398e54eb0cbb6a7dbd2b4953e38c9621/num2words-0.5.14.tar.gz", hash = "sha256:b066ec18e56b6616a3b38086b5747daafbaa8868b226a36127e0451c0cf379c6", size = 218213, upload-time = "2024-12-17T20:17:10.191Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d6/5b/545e9267a1cc080c8a1be2746113a063e34bcdd0f5173fd665a5c13cb234/num2words-0.5.14-py3-none-any.whl", hash = "sha256:1c8e5b00142fc2966fd8d685001e36c4a9911e070d1b120e1beb721fa1edb33d", size = 163525, upload-time = "2024-12-17T20:17:06.074Z" },
+]
+
 [[package]]
 name = "numba"
 version = "0.61.2"
@@ -2739,24 +3498,24 @@ wheels = [
 
 [[package]]
 name = "numcodecs"
-version = "0.16.1"
+version = "0.16.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "numpy" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/00/35/49da850ce5371da3930d099da364a73ce9ae4fc64075e521674b48f4804d/numcodecs-0.16.1.tar.gz", hash = "sha256:c47f20d656454568c6b4697ce02081e6bbb512f198738c6a56fafe8029c97fb1", size = 6268134, upload-time = "2025-05-22T13:33:04.098Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/a1/1d/837d946aab385abe1e472ec08a6816d84b00f4ceeae5445eb8f25c5c6ca9/numcodecs-0.16.2.tar.gz", hash = "sha256:9922dae0c3b01b5bed3b4bae239f4787e891daa3262c27971298669d029d10e9", size = 6271668, upload-time = "2025-08-13T16:09:26.125Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/b7/ee/e2a903c88fed347dc74c70bbd7a8dab9aa22bb0dac68c5bc6393c2e9373b/numcodecs-0.16.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:1abe0651ecb6f207656ebfc802effa55c4ae3136cf172c295a067749a2699122", size = 1663434, upload-time = "2025-05-22T13:32:47.26Z" },
-    { url = "https://files.pythonhosted.org/packages/f2/f0/37819d4f6896b1ac43a164ffd3ab99d7cbf63bf63cb375fef97aedaef4f0/numcodecs-0.16.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:abb39b7102d0816c8563669cdddca40392d34d0cbf31e3e996706b244586a458", size = 1150402, upload-time = "2025-05-22T13:32:48.574Z" },
-    { url = "https://files.pythonhosted.org/packages/60/3c/5059a29750305b80b7428b1e6695878dea9ea3b537d7fba57875e4bbc2c7/numcodecs-0.16.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3359a951f8b23317f12736a7ad1e7375ec3d735465f92049c76d032ebca4c40", size = 8237455, upload-time = "2025-05-22T13:32:50.052Z" },
-    { url = "https://files.pythonhosted.org/packages/1b/f5/515f98d659ab0cbe3738da153eddae22186fd38f05a808511e10f04cf679/numcodecs-0.16.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:82cc70592ec18060786b1bfa0da23afd2a7807d7975d766e626954d6628ec609", size = 8770711, upload-time = "2025-05-22T13:32:52.198Z" },
-    { url = "https://files.pythonhosted.org/packages/a2/3a/9fc6104f888af11bad804ebd32dffe0bcb83337f4525b4fe5b379942fefd/numcodecs-0.16.1-cp312-cp312-win_amd64.whl", hash = "sha256:4b48ddc8a7d132b7808bc53eb2705342de5c1e39289d725f988bd143c0fd86df", size = 788701, upload-time = "2025-05-22T13:32:54.28Z" },
-    { url = "https://files.pythonhosted.org/packages/5e/1e/73ffb1074f03d52cb1c4f4deaba26a2008ca45262f3622ed26dbec7a7362/numcodecs-0.16.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:2ad8ee940315f59188accfc3f2d39726a4ca0d76b49bf8d0018e121f01c49028", size = 1659453, upload-time = "2025-05-22T13:32:55.558Z" },
-    { url = "https://files.pythonhosted.org/packages/42/72/5affb1ce92b7a6becee17921de7c6b521a48fa61fc3d36d9f1eea2cf83f5/numcodecs-0.16.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:179ca7bf3525a0f7379df7767d87dd495253de44597cb7e511198b28b09da633", size = 1143932, upload-time = "2025-05-22T13:32:56.908Z" },
-    { url = "https://files.pythonhosted.org/packages/e3/f1/b092679d84c67c6ed62e4df5781d89bbb089f24a0df4187cbab9db51cf6b/numcodecs-0.16.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6e2babbb50bf348ae982818d5560af330eab0dcd925fb0e49509785ad57d11db", size = 8187716, upload-time = "2025-05-22T13:32:58.421Z" },
-    { url = "https://files.pythonhosted.org/packages/a8/e8/86e7741adb43261aff409b53c53c8bac2797bfca055d64dd65dc731d5141/numcodecs-0.16.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a4b29d8d3284b72bfad4fb83d672a17f497ae86ee1ef8087bac7222b620d3d91", size = 8728650, upload-time = "2025-05-22T13:33:00.337Z" },
-    { url = "https://files.pythonhosted.org/packages/21/03/87c5c217232aa3515d350728c6dcefca252fa582246100ef68a51fbda456/numcodecs-0.16.1-cp313-cp313-win_amd64.whl", hash = "sha256:06489635f43e1a959aea73cb830d78cf3adb07ac5f34daccb92091e4d9ac6b07", size = 785553, upload-time = "2025-05-22T13:33:02.587Z" },
+    { url = "https://files.pythonhosted.org/packages/03/c3/5470273d6d5c986521140ccec6476664ea4e03c0cfc51b370fb03368bb41/numcodecs-0.16.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:30f04c2b7bb802133866e7fb554d47943864f977dfe8a95c814eb801c797df3c", size = 1668488, upload-time = "2025-08-13T16:09:08.942Z" },
+    { url = "https://files.pythonhosted.org/packages/db/bf/cc1aaea87371097d6b5236ec44f8eb96387b52204b4e671fac716e5de325/numcodecs-0.16.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8ee0b2776cf47b7702ba0ccc0b6afaad28afbf8d5bb7b9a5274b5f08ecc651b2", size = 1155083, upload-time = "2025-08-13T16:09:10.641Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/c1/ba5ab0cf4c4d737635d20d8b72a61c26f8f99c0529606dfbfa3e5d3a4221/numcodecs-0.16.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4c8e94f0e90aaf4d01b2e26476d3b081c2cf8c17036af45e04e735de9c0cf64f", size = 8260568, upload-time = "2025-08-13T16:09:12.063Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/64/7177bf632520705893683fa4ca202ed540450bf971c0453ad1351baa2007/numcodecs-0.16.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7b565b16366749011e290343617571db861b2b2e58b038697afde6d02f537c91", size = 8792262, upload-time = "2025-08-13T16:09:14.058Z" },
+    { url = "https://files.pythonhosted.org/packages/10/90/df01799f4c1bb8618b842582d10d362829e980c4d5eb9701c1aeadf5c4e3/numcodecs-0.16.2-cp312-cp312-win_amd64.whl", hash = "sha256:e4cfdde4e99bf47580f4eb3a876630c73ba14e4a1380fec5959ac727e22ce0d2", size = 803444, upload-time = "2025-08-13T16:09:16.09Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/e3/f61c422259a4b6c8c2496d284f85ed17f8686b3a53feb797d3bd66ef499c/numcodecs-0.16.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0f965ccb2f6d215ffd2e3239ec02e33139d7ce311ff49704d340704b81dda653", size = 1664476, upload-time = "2025-08-13T16:09:17.327Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/a7/fa4d66b86e277643d135af263efc0dd1f98cf1228d3b4554b843c0c1a09b/numcodecs-0.16.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:b4bc110b73d59de7f46310c680f075f9007ead915174c895368274c646c9ea74", size = 1148718, upload-time = "2025-08-13T16:09:19.075Z" },
+    { url = "https://files.pythonhosted.org/packages/17/43/9656a6b0ed7250ca3a5c126a6077a29398c3dca9176224dba4634847a4a4/numcodecs-0.16.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:51615cf2811343a8a3fa42953cb4120ac7187875a161260444f53ada5710063e", size = 8205330, upload-time = "2025-08-13T16:09:20.904Z" },
+    { url = "https://files.pythonhosted.org/packages/55/7f/0ab8db32ef9b51c60f7b759c2b155e1edcb08febb508c22a9d04b19ec735/numcodecs-0.16.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8a18ced1ecbdd0e9ee716820dbb3a094c896eed8005273bbcab9980bdac270ae", size = 8750769, upload-time = "2025-08-13T16:09:22.516Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/0c/25f96c7969bdbfcc1427dc82eba92f2ef4df84c63369c95ab99af6404c23/numcodecs-0.16.2-cp313-cp313-win_amd64.whl", hash = "sha256:f640ed8406e1eb5806787a3e5be223d455b75c99eb2088a290947ed6dbd77e8e", size = 800281, upload-time = "2025-08-13T16:09:24.691Z" },
 ]
 
 [package.optional-dependencies]
@@ -2766,142 +3525,120 @@ crc32c = [
 
 [[package]]
 name = "numpy"
-version = "2.2.6"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/76/21/7d2a95e4bba9dc13d043ee156a356c0a8f0c6309dff6b21b4d71a073b8a8/numpy-2.2.6.tar.gz", hash = "sha256:e29554e2bef54a90aa5cc07da6ce955accb83f21ab5de01a62c8478897b264fd", size = 20276440, upload-time = "2025-05-17T22:38:04.611Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/82/5d/c00588b6cf18e1da539b45d3598d3557084990dcc4331960c15ee776ee41/numpy-2.2.6-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:41c5a21f4a04fa86436124d388f6ed60a9343a6f767fced1a8a71c3fbca038ff", size = 20875348, upload-time = "2025-05-17T21:34:39.648Z" },
-    { url = "https://files.pythonhosted.org/packages/66/ee/560deadcdde6c2f90200450d5938f63a34b37e27ebff162810f716f6a230/numpy-2.2.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:de749064336d37e340f640b05f24e9e3dd678c57318c7289d222a8a2f543e90c", size = 14119362, upload-time = "2025-05-17T21:35:01.241Z" },
-    { url = "https://files.pythonhosted.org/packages/3c/65/4baa99f1c53b30adf0acd9a5519078871ddde8d2339dc5a7fde80d9d87da/numpy-2.2.6-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:894b3a42502226a1cac872f840030665f33326fc3dac8e57c607905773cdcde3", size = 5084103, upload-time = "2025-05-17T21:35:10.622Z" },
-    { url = "https://files.pythonhosted.org/packages/cc/89/e5a34c071a0570cc40c9a54eb472d113eea6d002e9ae12bb3a8407fb912e/numpy-2.2.6-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:71594f7c51a18e728451bb50cc60a3ce4e6538822731b2933209a1f3614e9282", size = 6625382, upload-time = "2025-05-17T21:35:21.414Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/35/8c80729f1ff76b3921d5c9487c7ac3de9b2a103b1cd05e905b3090513510/numpy-2.2.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f2618db89be1b4e05f7a1a847a9c1c0abd63e63a1607d892dd54668dd92faf87", size = 14018462, upload-time = "2025-05-17T21:35:42.174Z" },
-    { url = "https://files.pythonhosted.org/packages/8c/3d/1e1db36cfd41f895d266b103df00ca5b3cbe965184df824dec5c08c6b803/numpy-2.2.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd83c01228a688733f1ded5201c678f0c53ecc1006ffbc404db9f7a899ac6249", size = 16527618, upload-time = "2025-05-17T21:36:06.711Z" },
-    { url = "https://files.pythonhosted.org/packages/61/c6/03ed30992602c85aa3cd95b9070a514f8b3c33e31124694438d88809ae36/numpy-2.2.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:37c0ca431f82cd5fa716eca9506aefcabc247fb27ba69c5062a6d3ade8cf8f49", size = 15505511, upload-time = "2025-05-17T21:36:29.965Z" },
-    { url = "https://files.pythonhosted.org/packages/b7/25/5761d832a81df431e260719ec45de696414266613c9ee268394dd5ad8236/numpy-2.2.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fe27749d33bb772c80dcd84ae7e8df2adc920ae8297400dabec45f0dedb3f6de", size = 18313783, upload-time = "2025-05-17T21:36:56.883Z" },
-    { url = "https://files.pythonhosted.org/packages/57/0a/72d5a3527c5ebffcd47bde9162c39fae1f90138c961e5296491ce778e682/numpy-2.2.6-cp312-cp312-win32.whl", hash = "sha256:4eeaae00d789f66c7a25ac5f34b71a7035bb474e679f410e5e1a94deb24cf2d4", size = 6246506, upload-time = "2025-05-17T21:37:07.368Z" },
-    { url = "https://files.pythonhosted.org/packages/36/fa/8c9210162ca1b88529ab76b41ba02d433fd54fecaf6feb70ef9f124683f1/numpy-2.2.6-cp312-cp312-win_amd64.whl", hash = "sha256:c1f9540be57940698ed329904db803cf7a402f3fc200bfe599334c9bd84a40b2", size = 12614190, upload-time = "2025-05-17T21:37:26.213Z" },
-    { url = "https://files.pythonhosted.org/packages/f9/5c/6657823f4f594f72b5471f1db1ab12e26e890bb2e41897522d134d2a3e81/numpy-2.2.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0811bb762109d9708cca4d0b13c4f67146e3c3b7cf8d34018c722adb2d957c84", size = 20867828, upload-time = "2025-05-17T21:37:56.699Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/9e/14520dc3dadf3c803473bd07e9b2bd1b69bc583cb2497b47000fed2fa92f/numpy-2.2.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:287cc3162b6f01463ccd86be154f284d0893d2b3ed7292439ea97eafa8170e0b", size = 14143006, upload-time = "2025-05-17T21:38:18.291Z" },
-    { url = "https://files.pythonhosted.org/packages/4f/06/7e96c57d90bebdce9918412087fc22ca9851cceaf5567a45c1f404480e9e/numpy-2.2.6-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:f1372f041402e37e5e633e586f62aa53de2eac8d98cbfb822806ce4bbefcb74d", size = 5076765, upload-time = "2025-05-17T21:38:27.319Z" },
-    { url = "https://files.pythonhosted.org/packages/73/ed/63d920c23b4289fdac96ddbdd6132e9427790977d5457cd132f18e76eae0/numpy-2.2.6-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:55a4d33fa519660d69614a9fad433be87e5252f4b03850642f88993f7b2ca566", size = 6617736, upload-time = "2025-05-17T21:38:38.141Z" },
-    { url = "https://files.pythonhosted.org/packages/85/c5/e19c8f99d83fd377ec8c7e0cf627a8049746da54afc24ef0a0cb73d5dfb5/numpy-2.2.6-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f92729c95468a2f4f15e9bb94c432a9229d0d50de67304399627a943201baa2f", size = 14010719, upload-time = "2025-05-17T21:38:58.433Z" },
-    { url = "https://files.pythonhosted.org/packages/19/49/4df9123aafa7b539317bf6d342cb6d227e49f7a35b99c287a6109b13dd93/numpy-2.2.6-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1bc23a79bfabc5d056d106f9befb8d50c31ced2fbc70eedb8155aec74a45798f", size = 16526072, upload-time = "2025-05-17T21:39:22.638Z" },
-    { url = "https://files.pythonhosted.org/packages/b2/6c/04b5f47f4f32f7c2b0e7260442a8cbcf8168b0e1a41ff1495da42f42a14f/numpy-2.2.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e3143e4451880bed956e706a3220b4e5cf6172ef05fcc397f6f36a550b1dd868", size = 15503213, upload-time = "2025-05-17T21:39:45.865Z" },
-    { url = "https://files.pythonhosted.org/packages/17/0a/5cd92e352c1307640d5b6fec1b2ffb06cd0dabe7d7b8227f97933d378422/numpy-2.2.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b4f13750ce79751586ae2eb824ba7e1e8dba64784086c98cdbbcc6a42112ce0d", size = 18316632, upload-time = "2025-05-17T21:40:13.331Z" },
-    { url = "https://files.pythonhosted.org/packages/f0/3b/5cba2b1d88760ef86596ad0f3d484b1cbff7c115ae2429678465057c5155/numpy-2.2.6-cp313-cp313-win32.whl", hash = "sha256:5beb72339d9d4fa36522fc63802f469b13cdbe4fdab4a288f0c441b74272ebfd", size = 6244532, upload-time = "2025-05-17T21:43:46.099Z" },
-    { url = "https://files.pythonhosted.org/packages/cb/3b/d58c12eafcb298d4e6d0d40216866ab15f59e55d148a5658bb3132311fcf/numpy-2.2.6-cp313-cp313-win_amd64.whl", hash = "sha256:b0544343a702fa80c95ad5d3d608ea3599dd54d4632df855e4c8d24eb6ecfa1c", size = 12610885, upload-time = "2025-05-17T21:44:05.145Z" },
-    { url = "https://files.pythonhosted.org/packages/6b/9e/4bf918b818e516322db999ac25d00c75788ddfd2d2ade4fa66f1f38097e1/numpy-2.2.6-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0bca768cd85ae743b2affdc762d617eddf3bcf8724435498a1e80132d04879e6", size = 20963467, upload-time = "2025-05-17T21:40:44Z" },
-    { url = "https://files.pythonhosted.org/packages/61/66/d2de6b291507517ff2e438e13ff7b1e2cdbdb7cb40b3ed475377aece69f9/numpy-2.2.6-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:fc0c5673685c508a142ca65209b4e79ed6740a4ed6b2267dbba90f34b0b3cfda", size = 14225144, upload-time = "2025-05-17T21:41:05.695Z" },
-    { url = "https://files.pythonhosted.org/packages/e4/25/480387655407ead912e28ba3a820bc69af9adf13bcbe40b299d454ec011f/numpy-2.2.6-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:5bd4fc3ac8926b3819797a7c0e2631eb889b4118a9898c84f585a54d475b7e40", size = 5200217, upload-time = "2025-05-17T21:41:15.903Z" },
-    { url = "https://files.pythonhosted.org/packages/aa/4a/6e313b5108f53dcbf3aca0c0f3e9c92f4c10ce57a0a721851f9785872895/numpy-2.2.6-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:fee4236c876c4e8369388054d02d0e9bb84821feb1a64dd59e137e6511a551f8", size = 6712014, upload-time = "2025-05-17T21:41:27.321Z" },
-    { url = "https://files.pythonhosted.org/packages/b7/30/172c2d5c4be71fdf476e9de553443cf8e25feddbe185e0bd88b096915bcc/numpy-2.2.6-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e1dda9c7e08dc141e0247a5b8f49cf05984955246a327d4c48bda16821947b2f", size = 14077935, upload-time = "2025-05-17T21:41:49.738Z" },
-    { url = "https://files.pythonhosted.org/packages/12/fb/9e743f8d4e4d3c710902cf87af3512082ae3d43b945d5d16563f26ec251d/numpy-2.2.6-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f447e6acb680fd307f40d3da4852208af94afdfab89cf850986c3ca00562f4fa", size = 16600122, upload-time = "2025-05-17T21:42:14.046Z" },
-    { url = "https://files.pythonhosted.org/packages/12/75/ee20da0e58d3a66f204f38916757e01e33a9737d0b22373b3eb5a27358f9/numpy-2.2.6-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:389d771b1623ec92636b0786bc4ae56abafad4a4c513d36a55dce14bd9ce8571", size = 15586143, upload-time = "2025-05-17T21:42:37.464Z" },
-    { url = "https://files.pythonhosted.org/packages/76/95/bef5b37f29fc5e739947e9ce5179ad402875633308504a52d188302319c8/numpy-2.2.6-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8e9ace4a37db23421249ed236fdcdd457d671e25146786dfc96835cd951aa7c1", size = 18385260, upload-time = "2025-05-17T21:43:05.189Z" },
-    { url = "https://files.pythonhosted.org/packages/09/04/f2f83279d287407cf36a7a8053a5abe7be3622a4363337338f2585e4afda/numpy-2.2.6-cp313-cp313t-win32.whl", hash = "sha256:038613e9fb8c72b0a41f025a7e4c3f0b7a1b5d768ece4796b674c8f3fe13efff", size = 6377225, upload-time = "2025-05-17T21:43:16.254Z" },
-    { url = "https://files.pythonhosted.org/packages/67/0e/35082d13c09c02c011cf21570543d202ad929d961c02a147493cb0c2bdf5/numpy-2.2.6-cp313-cp313t-win_amd64.whl", hash = "sha256:6031dd6dfecc0cf9f668681a37648373bddd6421fff6c66ec1624eed0180ee06", size = 12771374, upload-time = "2025-05-17T21:43:35.479Z" },
+version = "1.26.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/65/6e/09db70a523a96d25e115e71cc56a6f9031e7b8cd166c1ac8438307c14058/numpy-1.26.4.tar.gz", hash = "sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010", size = 15786129, upload-time = "2024-02-06T00:26:44.495Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/95/12/8f2020a8e8b8383ac0177dc9570aad031a3beb12e38847f7129bacd96228/numpy-1.26.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b3ce300f3644fb06443ee2222c2201dd3a89ea6040541412b8fa189341847218", size = 20335901, upload-time = "2024-02-05T23:55:32.801Z" },
+    { url = "https://files.pythonhosted.org/packages/75/5b/ca6c8bd14007e5ca171c7c03102d17b4f4e0ceb53957e8c44343a9546dcc/numpy-1.26.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:03a8c78d01d9781b28a6989f6fa1bb2c4f2d51201cf99d3dd875df6fbd96b23b", size = 13685868, upload-time = "2024-02-05T23:55:56.28Z" },
+    { url = "https://files.pythonhosted.org/packages/79/f8/97f10e6755e2a7d027ca783f63044d5b1bc1ae7acb12afe6a9b4286eac17/numpy-1.26.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9fad7dcb1aac3c7f0584a5a8133e3a43eeb2fe127f47e3632d43d677c66c102b", size = 13925109, upload-time = "2024-02-05T23:56:20.368Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/50/de23fde84e45f5c4fda2488c759b69990fd4512387a8632860f3ac9cd225/numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:675d61ffbfa78604709862923189bad94014bef562cc35cf61d3a07bba02a7ed", size = 17950613, upload-time = "2024-02-05T23:56:56.054Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/0c/9c603826b6465e82591e05ca230dfc13376da512b25ccd0894709b054ed0/numpy-1.26.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ab47dbe5cc8210f55aa58e4805fe224dac469cde56b9f731a4c098b91917159a", size = 13572172, upload-time = "2024-02-05T23:57:21.56Z" },
+    { url = "https://files.pythonhosted.org/packages/76/8c/2ba3902e1a0fc1c74962ea9bb33a534bb05984ad7ff9515bf8d07527cadd/numpy-1.26.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:1dda2e7b4ec9dd512f84935c5f126c8bd8b9f2fc001e9f54af255e8c5f16b0e0", size = 17786643, upload-time = "2024-02-05T23:57:56.585Z" },
+    { url = "https://files.pythonhosted.org/packages/28/4a/46d9e65106879492374999e76eb85f87b15328e06bd1550668f79f7b18c6/numpy-1.26.4-cp312-cp312-win32.whl", hash = "sha256:50193e430acfc1346175fcbdaa28ffec49947a06918b7b92130744e81e640110", size = 5677803, upload-time = "2024-02-05T23:58:08.963Z" },
+    { url = "https://files.pythonhosted.org/packages/16/2e/86f24451c2d530c88daf997cb8d6ac622c1d40d19f5a031ed68a4b73a374/numpy-1.26.4-cp312-cp312-win_amd64.whl", hash = "sha256:08beddf13648eb95f8d867350f6a018a4be2e5ad54c8d8caed89ebca558b2818", size = 15517754, upload-time = "2024-02-05T23:58:36.364Z" },
 ]
 
 [[package]]
 name = "nvidia-cublas-cu12"
-version = "12.8.3.14"
+version = "12.9.1.4"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/82/df/4b01f10069e23c641f116c62fc31e31e8dc361a153175d81561d15c8143b/nvidia_cublas_cu12-12.8.3.14-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:3f0e05e7293598cf61933258b73e66a160c27d59c4422670bf0b79348c04be44", size = 609620630, upload-time = "2025-01-23T17:55:00.753Z" },
+    { url = "https://files.pythonhosted.org/packages/77/3c/aa88abe01f3be3d1f8f787d1d33dc83e76fec05945f9a28fbb41cfb99cd5/nvidia_cublas_cu12-12.9.1.4-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:453611eb21a7c1f2c2156ed9f3a45b691deda0440ec550860290dc901af5b4c2", size = 581242350, upload-time = "2025-06-05T20:04:51.979Z" },
 ]
 
 [[package]]
 name = "nvidia-cuda-cupti-cu12"
-version = "12.8.57"
+version = "12.9.79"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/39/6f/3683ecf4e38931971946777d231c2df00dd5c1c4c2c914c42ad8f9f4dca6/nvidia_cuda_cupti_cu12-12.8.57-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8e0b2eb847de260739bee4a3f66fac31378f4ff49538ff527a38a01a9a39f950", size = 10237547, upload-time = "2025-01-23T17:47:56.863Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/2e/b84e32197e33f39907b455b83395a017e697c07a449a2b15fd07fc1c9981/nvidia_cuda_cupti_cu12-12.9.79-py3-none-manylinux_2_25_x86_64.whl", hash = "sha256:096bcf334f13e1984ba36685ad4c1d6347db214de03dbb6eebb237b41d9d934f", size = 10814997, upload-time = "2025-06-05T20:01:10.168Z" },
 ]
 
 [[package]]
 name = "nvidia-cuda-nvrtc-cu12"
-version = "12.8.61"
+version = "12.9.86"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/d4/22/32029d4583f7b19cfe75c84399cbcfd23f2aaf41c66fc8db4da460104fff/nvidia_cuda_nvrtc_cu12-12.8.61-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:a0fa9c2a21583105550ebd871bd76e2037205d56f33f128e69f6d2a55e0af9ed", size = 88024585, upload-time = "2025-01-23T17:50:10.722Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/85/e4af82cc9202023862090bfca4ea827d533329e925c758f0cde964cb54b7/nvidia_cuda_nvrtc_cu12-12.9.86-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:210cf05005a447e29214e9ce50851e83fc5f4358df8b453155d5e1918094dcb4", size = 89568129, upload-time = "2025-06-05T20:02:41.973Z" },
 ]
 
 [[package]]
 name = "nvidia-cuda-runtime-cu12"
-version = "12.8.57"
+version = "12.9.79"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/16/f6/0e1ef31f4753a44084310ba1a7f0abaf977ccd810a604035abb43421c057/nvidia_cuda_runtime_cu12-12.8.57-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:75342e28567340b7428ce79a5d6bb6ca5ff9d07b69e7ce00d2c7b4dc23eff0be", size = 954762, upload-time = "2025-01-23T17:47:22.21Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/46/a92db19b8309581092a3add7e6fceb4c301a3fd233969856a8cbf042cd3c/nvidia_cuda_runtime_cu12-12.9.79-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:25bba2dfb01d48a9b59ca474a1ac43c6ebf7011f1b0b8cc44f54eb6ac48a96c3", size = 3493179, upload-time = "2025-06-05T20:00:53.735Z" },
 ]
 
 [[package]]
 name = "nvidia-cudnn-cu12"
-version = "9.7.1.26"
+version = "9.10.2.21"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "nvidia-cublas-cu12", marker = "(python_full_version >= '3.13' and platform_machine == 'arm64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and platform_machine != 'arm64' and sys_platform == 'linux')" },
+    { name = "nvidia-cublas-cu12", marker = "platform_machine != 'aarch64' and sys_platform == 'linux'" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/25/dc/dc825c4b1c83b538e207e34f48f86063c88deaa35d46c651c7c181364ba2/nvidia_cudnn_cu12-9.7.1.26-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:6d011159a158f3cfc47bf851aea79e31bcff60d530b70ef70474c84cac484d07", size = 726851421, upload-time = "2025-02-06T22:18:29.812Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467, upload-time = "2025-06-06T21:54:08.597Z" },
 ]
 
 [[package]]
 name = "nvidia-cufft-cu12"
-version = "11.3.3.41"
+version = "11.4.1.4"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "nvidia-nvjitlink-cu12", marker = "(python_full_version >= '3.13' and platform_machine == 'arm64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and platform_machine != 'arm64' and sys_platform == 'linux')" },
+    { name = "nvidia-nvjitlink-cu12", marker = "platform_machine != 'aarch64' and sys_platform == 'linux'" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/ac/26/b53c493c38dccb1f1a42e1a21dc12cba2a77fbe36c652f7726d9ec4aba28/nvidia_cufft_cu12-11.3.3.41-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:da650080ab79fcdf7a4b06aa1b460e99860646b176a43f6208099bdc17836b6a", size = 193118795, upload-time = "2025-01-23T17:56:30.536Z" },
+    { url = "https://files.pythonhosted.org/packages/95/f4/61e6996dd20481ee834f57a8e9dca28b1869366a135e0d42e2aa8493bdd4/nvidia_cufft_cu12-11.4.1.4-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c67884f2a7d276b4b80eb56a79322a95df592ae5e765cf1243693365ccab4e28", size = 200877592, upload-time = "2025-06-05T20:05:45.862Z" },
 ]
 
 [[package]]
 name = "nvidia-cufile-cu12"
-version = "1.13.0.11"
+version = "1.14.1.1"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/e5/9c/1f3264d0a84c8a031487fb7f59780fc78fa6f1c97776233956780e3dc3ac/nvidia_cufile_cu12-1.13.0.11-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:483f434c541806936b98366f6d33caef5440572de8ddf38d453213729da3e7d4", size = 1197801, upload-time = "2025-01-23T17:57:07.247Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/28/b960e06d705a440c030edd84e16888ee14c743390bdb2a6368e92ffe8ef8/nvidia_cufile_cu12-1.14.1.1-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9552e2231792e94b1ff17bc99e958cc0e6bbbaa4a9d91fa2dbeed97716628fe6", size = 1210714, upload-time = "2025-06-05T20:06:11.898Z" },
 ]
 
 [[package]]
 name = "nvidia-curand-cu12"
-version = "10.3.9.55"
+version = "10.3.10.19"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/bd/fc/7be5d0082507269bb04ac07cc614c84b78749efb96e8cf4100a8a1178e98/nvidia_curand_cu12-10.3.9.55-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:8387d974240c91f6a60b761b83d4b2f9b938b7e0b9617bae0f0dafe4f5c36b86", size = 63618038, upload-time = "2025-01-23T17:57:41.838Z" },
+    { url = "https://files.pythonhosted.org/packages/31/44/193a0e171750ca9f8320626e8a1f2381e4077a65e69e2fb9708bd479e34a/nvidia_curand_cu12-10.3.10.19-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:49b274db4780d421bd2ccd362e1415c13887c53c214f0d4b761752b8f9f6aa1e", size = 68295626, upload-time = "2025-05-01T19:39:38.885Z" },
 ]
 
 [[package]]
 name = "nvidia-cusolver-cu12"
-version = "11.7.2.55"
+version = "11.7.5.82"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "nvidia-cublas-cu12", marker = "(python_full_version >= '3.13' and platform_machine == 'arm64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and platform_machine != 'arm64' and sys_platform == 'linux')" },
-    { name = "nvidia-cusparse-cu12", marker = "(python_full_version >= '3.13' and platform_machine == 'arm64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and platform_machine != 'arm64' and sys_platform == 'linux')" },
-    { name = "nvidia-nvjitlink-cu12", marker = "(python_full_version >= '3.13' and platform_machine == 'arm64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and platform_machine != 'arm64' and sys_platform == 'linux')" },
+    { name = "nvidia-cublas-cu12", marker = "platform_machine != 'aarch64' and sys_platform == 'linux'" },
+    { name = "nvidia-cusparse-cu12", marker = "platform_machine != 'aarch64' and sys_platform == 'linux'" },
+    { name = "nvidia-nvjitlink-cu12", marker = "platform_machine != 'aarch64' and sys_platform == 'linux'" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/c2/08/953675873a136d96bb12f93b49ba045d1107bc94d2551c52b12fa6c7dec3/nvidia_cusolver_cu12-11.7.2.55-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4d1354102f1e922cee9db51920dba9e2559877cf6ff5ad03a00d853adafb191b", size = 260373342, upload-time = "2025-01-23T17:58:56.406Z" },
+    { url = "https://files.pythonhosted.org/packages/33/40/79b0c64d44d6c166c0964ec1d803d067f4a145cca23e23925fd351d0e642/nvidia_cusolver_cu12-11.7.5.82-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:15da72d1340d29b5b3cf3fd100e3cd53421dde36002eda6ed93811af63c40d88", size = 338117415, upload-time = "2025-06-05T20:07:16.809Z" },
 ]
 
 [[package]]
 name = "nvidia-cusparse-cu12"
-version = "12.5.7.53"
+version = "12.5.10.65"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "nvidia-nvjitlink-cu12", marker = "(python_full_version >= '3.13' and platform_machine == 'arm64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and platform_machine != 'arm64' and sys_platform == 'linux')" },
+    { name = "nvidia-nvjitlink-cu12", marker = "platform_machine != 'aarch64' and sys_platform == 'linux'" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/c2/ab/31e8149c66213b846c082a3b41b1365b831f41191f9f40c6ddbc8a7d550e/nvidia_cusparse_cu12-12.5.7.53-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3c1b61eb8c85257ea07e9354606b26397612627fdcd327bfd91ccf6155e7c86d", size = 292064180, upload-time = "2025-01-23T18:00:23.233Z" },
+    { url = "https://files.pythonhosted.org/packages/12/46/b0fd4b04f86577921feb97d8e2cf028afe04f614d17fb5013de9282c9216/nvidia_cusparse_cu12-12.5.10.65-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:73060ce019ac064a057267c585bf1fd5a353734151f87472ff02b2c5c9984e78", size = 366465088, upload-time = "2025-06-05T20:08:20.413Z" },
 ]
 
 [[package]]
 name = "nvidia-cusparselt-cu12"
-version = "0.6.3"
+version = "0.7.1"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/3b/9a/72ef35b399b0e183bc2e8f6f558036922d453c4d8237dab26c666a04244b/nvidia_cusparselt_cu12-0.6.3-py3-none-manylinux2014_x86_64.whl", hash = "sha256:e5c8a26c36445dd2e6812f1177978a24e2d37cacce7e090f297a688d1ec44f46", size = 156785796, upload-time = "2024-10-15T21:29:17.709Z" },
+    { url = "https://files.pythonhosted.org/packages/56/79/12978b96bd44274fe38b5dde5cfb660b1d114f70a65ef962bcbbed99b549/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f1bb701d6b930d5a7cea44c19ceb973311500847f81b634d802b7b539dc55623", size = 287193691, upload-time = "2025-02-26T00:15:44.104Z" },
 ]
 
 [[package]]
@@ -2915,65 +3652,90 @@ wheels = [
 
 [[package]]
 name = "nvidia-modelopt"
-version = "0.31.0"
+version = "0.33.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "ninja", marker = "sys_platform != 'darwin'" },
     { name = "numpy", marker = "sys_platform != 'darwin'" },
+    { name = "nvidia-ml-py", marker = "sys_platform != 'darwin'" },
     { name = "nvidia-modelopt-core", marker = "sys_platform != 'darwin'" },
     { name = "packaging", marker = "sys_platform != 'darwin'" },
+    { name = "pulp", marker = "sys_platform != 'darwin'" },
     { name = "pydantic", marker = "sys_platform != 'darwin'" },
+    { name = "regex", marker = "sys_platform != 'darwin'" },
     { name = "rich", marker = "sys_platform != 'darwin'" },
+    { name = "safetensors", marker = "sys_platform != 'darwin'" },
     { name = "scipy", marker = "sys_platform != 'darwin'" },
+    { name = "torch", version = "2.8.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin'" },
+    { name = "torchprofile", marker = "sys_platform != 'darwin'" },
+    { name = "torchvision", version = "0.23.0", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" },
+    { name = "torchvision", version = "0.23.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
     { name = "tqdm", marker = "sys_platform != 'darwin'" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/94/d7/9201b1618ccf6babea08b07fb3f3266f319f1993afc5f1812f4bf9603080/nvidia_modelopt-0.31.0-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:77495c50700ef9ed1782f4999e17265751a0f4002a7f1185dee7bb46d5d05039", size = 717204, upload-time = "2025-06-05T19:35:29.397Z" },
-    { url = "https://files.pythonhosted.org/packages/6f/6a/db5b41905cccc3f1d36b12cbb3f84dc40d0c352935d88f164047c6059f4d/nvidia_modelopt-0.31.0-py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:8b1905122b0615aeff78f65aa39920c3971d6ebd1966b7ac57ee8da271d49913", size = 717202, upload-time = "2025-06-05T19:35:53.512Z" },
-]
-
-[package.optional-dependencies]
-torch = [
-    { name = "nvidia-ml-py", marker = "sys_platform != 'darwin'" },
-    { name = "pulp", marker = "sys_platform != 'darwin'" },
-    { name = "regex", marker = "sys_platform != 'darwin'" },
-    { name = "safetensors", marker = "sys_platform != 'darwin'" },
-    { name = "torch", marker = "sys_platform != 'darwin'" },
-    { name = "torchprofile", marker = "sys_platform != 'darwin'" },
-    { name = "torchvision", marker = "sys_platform != 'darwin'" },
+    { url = "https://files.pythonhosted.org/packages/ca/cb/4af39357792a96f334c7877ea0380c9337aec210ff4794a7dd95beb7c349/nvidia_modelopt-0.33.1-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:6c51091683a117cd40fdb96a0ec28579f2276f6b627db7ccddc370df544e1dd7", size = 751683, upload-time = "2025-08-12T18:37:48.832Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/b1/fc2f468d140ef58e90fac584759d0cc449db9bc4f64668cdff750ef38fef/nvidia_modelopt-0.33.1-py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:ef78a98901890f265596ec413dffac177d4a1865201d89a14f29f4fa0cf8e710", size = 751683, upload-time = "2025-08-12T18:36:59.964Z" },
 ]
 
 [[package]]
 name = "nvidia-modelopt-core"
-version = "0.31.0"
+version = "0.33.1"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/7d/9f/4114d67eeb2cb3abd8b955ebb73c654d73994f16b7bec0d12884764f5807/nvidia_modelopt_core-0.31.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:3a03f3b081322bdad71982de37898e5407c6a85c65d02a26470b735d8a454e74", size = 1335144, upload-time = "2025-06-05T19:39:28.776Z" },
-    { url = "https://files.pythonhosted.org/packages/60/0b/81540db9bac816fa814baec0a7df976101d756e154764494dad8850035cb/nvidia_modelopt_core-0.31.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:4739f00f1797699fe4b9c256a5b75114b66e22749250dc87128491a8bdb2ce5a", size = 1359154, upload-time = "2025-06-05T19:41:04.928Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/b5/ba79b1c52b634b24e45dca409f133f947217a5c7ec5c256266e4ec5fa3eb/nvidia_modelopt_core-0.33.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:1ddd9279d8312f8e972b302692a26e6180f1c9fd277232f5925a5589f42b1b76", size = 1338081, upload-time = "2025-08-12T18:40:36.156Z" },
+    { url = "https://files.pythonhosted.org/packages/13/40/4427583475dfd8eb1b8c7522d75d4d059f0512ff03dcc62d6986a22ab918/nvidia_modelopt_core-0.33.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:69d5ace564f2b056c916117be2023f2b7fc01cd1501073915e6b2ced2b8a5394", size = 1363366, upload-time = "2025-08-12T18:39:28.854Z" },
 ]
 
 [[package]]
 name = "nvidia-nccl-cu12"
-version = "2.26.2"
+version = "2.27.3"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/67/ca/f42388aed0fddd64ade7493dbba36e1f534d4e6fdbdd355c6a90030ae028/nvidia_nccl_cu12-2.26.2-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:694cf3879a206553cc9d7dbda76b13efaf610fdb70a50cba303de1b0d1530ac6", size = 201319755, upload-time = "2025-03-13T00:29:55.296Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/5b/4e4fff7bad39adf89f735f2bc87248c81db71205b62bcc0d5ca5b606b3c3/nvidia_nccl_cu12-2.27.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adf27ccf4238253e0b826bce3ff5fa532d65fc42322c8bfdfaf28024c0fbe039", size = 322364134, upload-time = "2025-06-03T21:58:04.013Z" },
 ]
 
 [[package]]
 name = "nvidia-nvjitlink-cu12"
-version = "12.8.61"
+version = "12.9.86"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/03/f8/9d85593582bd99b8d7c65634d2304780aefade049b2b94d96e44084be90b/nvidia_nvjitlink_cu12-12.8.61-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:45fd79f2ae20bd67e8bc411055939049873bfd8fac70ff13bd4865e0b9bdab17", size = 39243473, upload-time = "2025-01-23T18:03:03.509Z" },
+    { url = "https://files.pythonhosted.org/packages/46/0c/c75bbfb967457a0b7670b8ad267bfc4fffdf341c074e0a80db06c24ccfd4/nvidia_nvjitlink_cu12-12.9.86-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:e3f1171dbdc83c5932a45f0f4c99180a70de9bd2718c1ab77d14104f6d7147f9", size = 39748338, upload-time = "2025-06-05T20:10:25.613Z" },
+]
+
+[[package]]
+name = "nvidia-nvshmem-cu12"
+version = "3.3.24"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ca/ce/6b73d2c3cdeb2202a4a79115e543087ca024306c4d290fffd5cfc8d5009d/nvidia_nvshmem_cu12-3.3.24-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f8666e4d2adffe846c264a836263b53fa5d7b725f0c508e36b40c3d4f9665e2a", size = 138990167, upload-time = "2025-08-22T19:56:19.001Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/49/7e1e3e98f5b8ae79f21260f9a90d8d985e5ad67b69b90b09456fc3c01a18/nvidia_nvshmem_cu12-3.3.24-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0032831c0ec4fdc64c3bd8daeae588f6647ee4afc3376c5871218546acac0e81", size = 139158697, upload-time = "2025-08-22T19:56:39.552Z" },
 ]
 
 [[package]]
 name = "nvidia-nvtx-cu12"
-version = "12.8.55"
+version = "12.9.79"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/86/ed/bb230dce7741f2778ba2ae3e8778fdb8bc58eee9fd95f07bf7b2d18e8081/nvidia_nvtx_cu12-12.9.79-py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:fec150986817f2b4e7eed72ed059f2dcb9ba3856b9a96134e448eac946a6952f", size = 85504, upload-time = "2025-06-05T20:03:10.21Z" },
+]
+
+[[package]]
+name = "nvidia-resiliency-ext"
+version = "0.4.1"
 source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "defusedxml" },
+    { name = "nvidia-ml-py" },
+    { name = "packaging" },
+    { name = "psutil" },
+    { name = "pynvml" },
+    { name = "pyyaml" },
+    { name = "torch", version = "2.8.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
+    { name = "torch", version = "2.8.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin'" },
+]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/8d/cd/0e8c51b2ae3a58f054f2e7fe91b82d201abfb30167f2431e9bd92d532f42/nvidia_nvtx_cu12-12.8.55-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2dd0780f1a55c21d8e06a743de5bd95653de630decfff40621dbde78cc307102", size = 89896, upload-time = "2025-01-23T17:50:44.487Z" },
+    { url = "https://files.pythonhosted.org/packages/70/05/38d491962273c7905708762279f440520eb79f3c00b67a023497215ad023/nvidia_resiliency_ext-0.4.1-cp312-cp312-manylinux_2_31_aarch64.whl", hash = "sha256:b3bd5f01535574b16d0f38bca6e39afe3806c4a2896eee1b321cd944e00025a7", size = 444570, upload-time = "2025-07-17T03:50:58.877Z" },
+    { url = "https://files.pythonhosted.org/packages/18/8b/4cb8aa2bbdf3705d3034c3f3dacdadb03b3b7dd3dc7f5200e64663fb477f/nvidia_resiliency_ext-0.4.1-cp312-cp312-manylinux_2_31_x86_64.whl", hash = "sha256:ca9f8de465af345952bedbea53c90c0e2323d88cfd830ded0e806fad91845c0e", size = 450280, upload-time = "2025-07-17T03:49:55.327Z" },
 ]
 
 [[package]]
@@ -2990,16 +3752,16 @@ wheels = [
 
 [[package]]
 name = "nvtx"
-version = "0.2.12"
+version = "0.2.13"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/56/05/160dc24b6cd1e21e5b00d55a46abac5802ed7c15c675e6ce25febad2b0d7/nvtx-0.2.12.tar.gz", hash = "sha256:b871fae9b80b004e624b5755291799794287016fa6a0c8fd0fb3255393ae3bc8", size = 110848, upload-time = "2025-05-26T10:32:33.824Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/97/02/b3fd3da4ba51764cfc0e4d2b22d5a61511fa79d825344d4704f8429c0bd6/nvtx-0.2.13.tar.gz", hash = "sha256:9db7ba135168e14e1f038866100bf8ed42d3e00b404e9bc7b6280ee3af828b92", size = 112104, upload-time = "2025-08-05T03:27:16.383Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/08/e4/944e63039a0d652c843ecffb42700e2b4f596b745ac9ac6ebed937f1bce5/nvtx-0.2.12-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9ea22a86eca22fd52e3c2905654182da1fcebea6f0107e87d7dc4ec6871604ca", size = 539647, upload-time = "2025-05-25T08:52:12.911Z" },
-    { url = "https://files.pythonhosted.org/packages/cf/7b/6e25716c92039a3ecc2f6f4e1380b5492b0d23af78ea862cb84e8ffe0d7b/nvtx-0.2.12-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a279d880c27ec8c72632a0685456c170e7b12da2839c861ee461c121692aea6", size = 543614, upload-time = "2025-05-25T08:44:44.678Z" },
-    { url = "https://files.pythonhosted.org/packages/7d/96/eb1078d7509b72e3e4b6dd7ff12a698951e81dcc5f20a3ad7f35d7455700/nvtx-0.2.12-cp312-cp312-win_amd64.whl", hash = "sha256:2f93e07add2544a85c202b3c710945b54b3abb6660a6a7e447395cb024938b35", size = 98894, upload-time = "2025-05-25T08:42:59.068Z" },
-    { url = "https://files.pythonhosted.org/packages/55/78/88563935649f9202735ac5686fc451d3fa9f34e6592787ba224244c3570a/nvtx-0.2.12-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:871e54f95929a6c7c39b85d4111bf6af8ab43325bbc36c97a179270443896ef7", size = 520074, upload-time = "2025-05-25T08:52:50.144Z" },
-    { url = "https://files.pythonhosted.org/packages/86/0c/62b1f76c84a8bed267421d11114953b5da631daeb0ec7894a91252f79b5d/nvtx-0.2.12-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2b82ad84ac8d5408851947d1d2cef3e8e627627cc2290e5150c8af0dda1e3f63", size = 524516, upload-time = "2025-05-25T08:45:06.598Z" },
-    { url = "https://files.pythonhosted.org/packages/eb/41/e74ec826e1585ad6d31f41de96f6faae8ffc712a45c2b880baca4ae87a64/nvtx-0.2.12-cp313-cp313-win_amd64.whl", hash = "sha256:a37e063c3c745a4c6b561993a2dae2f67fcc26f2a2c2653f24eeae5810a2180d", size = 97070, upload-time = "2025-05-25T08:43:41.323Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/73/ad21e09dc2534f1e9723bbe5871fa5f03361ac51ca4d411fea6f765b5b6a/nvtx-0.2.13-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3435cbbffa132f6aaba3abdb01e71a1b961a20858b4cb791883895a25b9305d6", size = 539358, upload-time = "2025-08-04T19:33:16.494Z" },
+    { url = "https://files.pythonhosted.org/packages/12/ab/762da984e7671f7c34ae87e5b70523c3eeb4563759268bfaea07c97f32a6/nvtx-0.2.13-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:453d838dd1424a04303281ee57a73e2b8dca0e03039bc609a945861b8fe7d7d9", size = 545588, upload-time = "2025-08-04T19:37:40.64Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/b6/55bc5916386db70b93cbf543b1e880ead786d9ff0cdcfa262f5a2af46c74/nvtx-0.2.13-cp312-cp312-win_amd64.whl", hash = "sha256:0722d743e0e41e1fb866ebe6446e0cd0d268ca8671313f8da4f8c969956b74d3", size = 99123, upload-time = "2025-08-04T19:24:24.391Z" },
+    { url = "https://files.pythonhosted.org/packages/41/73/98c0669d5f9387a36d56b0e62ea3919124dd8dd7582d896ed1cae2998f57/nvtx-0.2.13-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1561d2111c698b1b1075899ff9c3fa7ba83603fc27c2e8ef567de6bbbe85ce1", size = 519840, upload-time = "2025-08-04T19:34:00.877Z" },
+    { url = "https://files.pythonhosted.org/packages/14/4b/21e975997def8a387543ba2bbe227551ad466781c39fc67f37f53555f37e/nvtx-0.2.13-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:edd7b729ed0211350258a21dd13422f59bc521de2b2fd21feb6c177af492f4e1", size = 524711, upload-time = "2025-08-04T19:38:03.559Z" },
+    { url = "https://files.pythonhosted.org/packages/21/d7/0ca146afd875f1e02636323840960071f768b5d8ba3e7d37f2ac9192bfd9/nvtx-0.2.13-cp313-cp313-win_amd64.whl", hash = "sha256:f0524bb71443d5a1f19a6409a9a81405fc437e53c5edfc4c44b6f4504ccf46e3", size = 97317, upload-time = "2025-08-04T19:24:46.391Z" },
 ]
 
 [[package]]
@@ -3042,9 +3804,43 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/84/dd/6abe5d7bd23f5ed3ade8352abf30dff1c7a9e97fc1b0a17b5d7c726e98a9/onnx-1.18.0-cp313-cp313t-win_amd64.whl", hash = "sha256:a69afd0baa372162948b52c13f3aa2730123381edf926d7ef3f68ca7cec6d0d0", size = 15865055, upload-time = "2025-05-12T22:03:06.663Z" },
 ]
 
+[[package]]
+name = "onnx-ir"
+version = "0.1.7"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "ml-dtypes", version = "0.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.13'" },
+    { name = "ml-dtypes", version = "0.5.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.13'" },
+    { name = "numpy" },
+    { name = "onnx" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/6a/14/4a003926218f8edee6da19546f69a1831b74cdd993eaf5ff50a2fb168e70/onnx_ir-0.1.7.tar.gz", hash = "sha256:4734b7587807ca657158b042c138879c3f454756fae74e949f6c99f0107d8df6", size = 107944, upload-time = "2025-08-22T15:01:16.383Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/84/cc/35e8490072f61aa54221742b4c9a0c947ef78ead5034481ca9ac655024ef/onnx_ir-0.1.7-py3-none-any.whl", hash = "sha256:8a0441909676f1ab6b22186d79f8d0faf8739177f50d15baeac88e7e1255aae8", size = 124382, upload-time = "2025-08-22T15:01:15.063Z" },
+]
+
+[[package]]
+name = "onnxscript"
+version = "0.4.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "ml-dtypes", version = "0.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.13'" },
+    { name = "ml-dtypes", version = "0.5.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.13'" },
+    { name = "numpy" },
+    { name = "onnx" },
+    { name = "onnx-ir" },
+    { name = "packaging" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/a8/9f/45aed9951d3fa50a97b910487186ef9c15ad08d3c9cb3605aabd99f65f92/onnxscript-0.4.0.tar.gz", hash = "sha256:de618eeb6e0c57f5a70f85909ab1f829cbb2053ad55f8f2fcc2701fa29b7adfc", size = 567393, upload-time = "2025-08-22T21:05:46.416Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ec/b6/c1ca749dfe58c23da542b1c71c334a3faa7191647b3335623870ef371e93/onnxscript-0.4.0-py3-none-any.whl", hash = "sha256:3d41f5b190bab9f1a4ace6075c6960ad676719766240b3450f292173bde90242", size = 660871, upload-time = "2025-08-22T21:05:48.332Z" },
+]
+
 [[package]]
 name = "openai"
-version = "1.91.0"
+version = "2.6.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
@@ -3056,9 +3852,33 @@ dependencies = [
     { name = "tqdm" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/0f/e2/a22f2973b729eff3f1f429017bdf717930c5de0fbf9e14017bae330e4e7a/openai-1.91.0.tar.gz", hash = "sha256:d6b07730d2f7c6745d0991997c16f85cddfc90ddcde8d569c862c30716b9fc90", size = 472529, upload-time = "2025-06-23T18:27:10.961Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/ee/c7/e42bcd89dfd47fec8a30b9e20f93e512efdbfbb3391b05bbb79a2fb295fa/openai-2.6.0.tar.gz", hash = "sha256:f119faf7fc07d7e558c1e7c32c873e241439b01bd7480418234291ee8c8f4b9d", size = 592904, upload-time = "2025-10-20T17:17:24.588Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c0/0a/58e9dcd34abe273eaeac3807a8483073767b5609d01bb78ea2f048e515a0/openai-2.6.0-py3-none-any.whl", hash = "sha256:f33fa12070fe347b5787a7861c8dd397786a4a17e1c3186e239338dac7e2e743", size = 1005403, upload-time = "2025-10-20T17:17:22.091Z" },
+]
+
+[[package]]
+name = "openai-harmony"
+version = "0.0.4"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pydantic" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/92/94/01509d510bebf6606614e51113e5a415ced15b8f34aa98a8bf2539314650/openai_harmony-0.0.4.tar.gz", hash = "sha256:5c67ac6df349236fb7b64f57c3dbb0273efcdca24314daa108f2a482c427106c", size = 279848, upload-time = "2025-08-09T01:43:24.974Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/7a/d2/f99bdd6fc737d6b3cf0df895508d621fc9a386b375a1230ee81d46c5436e/openai-1.91.0-py3-none-any.whl", hash = "sha256:207f87aa3bc49365e014fac2f7e291b99929f4fe126c4654143440e0ad446a5f", size = 735837, upload-time = "2025-06-23T18:27:08.913Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/3e/6bb75a4d15a6aad0ba1b23193ca0d2c202cc1f3364ba840833374b7c9c1a/openai_harmony-0.0.4-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:3586d90c899cd41f8624e7b82a48c289f6e4be56c66304ecaf3a0ba88963a73f", size = 2772770, upload-time = "2025-08-09T01:43:14.839Z" },
+    { url = "https://files.pythonhosted.org/packages/34/41/2f256fba6762d028ed6f935f0015f71d81927a52b9a1c873679a409b72bf/openai_harmony-0.0.4-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:ef21a1e2384a65c62d5ec5e1cded9fe026f1d032d5c5d725110d1a8d330d8f54", size = 2633682, upload-time = "2025-08-09T01:43:12.681Z" },
+    { url = "https://files.pythonhosted.org/packages/05/88/ade63bd8f36603610040e7cc086bc134d57a99a742e05f7fcddfdf822ee1/openai_harmony-0.0.4-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3cf2344366f10981bbc0f6d9949a0b2bb87151d209ed295943ed6ad8eda37932", size = 2963206, upload-time = "2025-08-09T01:43:02.433Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/ef/a65a0ff177fdf67bc0afd18bb9e7ad690d1b553a8eb5ebf27f601b22dbd0/openai_harmony-0.0.4-cp38-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2d8d16d84702059833fb03b841b28c25600c54e83cadccef79af44e1c81166b1", size = 2724854, upload-time = "2025-08-09T01:43:04.606Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/a1/ebaf0f55601a98609641283884d52dbfe9a1cf34b04f1cf80acb1560ab74/openai_harmony-0.0.4-cp38-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:97f1fe3909733212cc6b36f0f199b1421a9c57b79ec665f0322bd604cec47340", size = 2984312, upload-time = "2025-08-09T01:43:08.908Z" },
+    { url = "https://files.pythonhosted.org/packages/45/24/246f6f470bfbc89a117714b68f27cdaee12b31166237a227cc657780cc1d/openai_harmony-0.0.4-cp38-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:567cc568b6bf7b4d041b0c9aa7d6b2c9394f8af6065bc87fa6d23f207b5af9a7", size = 3447870, upload-time = "2025-08-09T01:43:06.734Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/ec/dcdcace0ffcf3a532cca910e0c351b62d3a7decf0b091ea8cf856d2a67a6/openai_harmony-0.0.4-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:31e9bcac0902a309e2fc688e52f247eec7fffcd00d17e958b9a83a8fea6519c2", size = 3049306, upload-time = "2025-08-09T01:43:11.019Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/39/172f1048d935db1523a82b45fee5231ad6c622645e566706e6bcf3731da8/openai_harmony-0.0.4-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:96a63199c0d81095b5d5d1ae8ca82b64c1c13d18d4e30323ae9e8ab31bc80a3d", size = 3121347, upload-time = "2025-08-09T01:43:16.705Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/36/8ee4ca5d0b25587121fd3621e6a6106fba80218cb6d159e1670aeb2b22ef/openai_harmony-0.0.4-cp38-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:d38f2639f6bf7c3c34a5dfd79e29075811ae2fa9b895a63e76767f74a47a971e", size = 2952326, upload-time = "2025-08-09T01:43:18.841Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/a0/ec8906393968679e269e23e957e11ff419978d1d077fb9af9561b161c988/openai_harmony-0.0.4-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:038f1d6772d1be5213b36ae76e5d042022395ec35c428a73ccb8b839b2cecf6a", size = 3015832, upload-time = "2025-08-09T01:43:21.076Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/bd/aa9e6e5cf140716dbcae17402fac2a81a9ebb3f934059ac0eec61cb447fc/openai_harmony-0.0.4-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:15e6d53a66502491a3675a536df30e271f976e6c5efe68250a65191efcb85c4f", size = 3221129, upload-time = "2025-08-09T01:43:23.146Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/22/2c7e1728689c7fa98a259ca2d14e718ea7af964516a617a9784f0d35d88a/openai_harmony-0.0.4-cp38-abi3-win32.whl", hash = "sha256:b9ee9e9ab6a237cebbe16563c787a6e83f3fcc034075c3d321dab94448426282", size = 2077125, upload-time = "2025-08-09T01:43:28.91Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/93/3a08a06ff3bde7f4c264f86d437e6a5c49792a6e362383b3a669f39c9690/openai_harmony-0.0.4-cp38-abi3-win_amd64.whl", hash = "sha256:746f751de5033b3dbcfcd4a726a4c56ce452c593ad3d54472d8597ce8d8b6d44", size = 2444821, upload-time = "2025-08-09T01:43:26.846Z" },
 ]
 
 [[package]]
@@ -3103,170 +3923,141 @@ wheels = [
 
 [[package]]
 name = "opentelemetry-api"
-version = "1.34.1"
+version = "1.38.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "importlib-metadata" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/4d/5e/94a8cb759e4e409022229418294e098ca7feca00eb3c467bb20cbd329bda/opentelemetry_api-1.34.1.tar.gz", hash = "sha256:64f0bd06d42824843731d05beea88d4d4b6ae59f9fe347ff7dfa2cc14233bbb3", size = 64987, upload-time = "2025-06-10T08:55:19.818Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/a5/3a/2ba85557e8dc024c0842ad22c570418dc02c36cbd1ab4b832a93edf071b8/opentelemetry_api-1.34.1-py3-none-any.whl", hash = "sha256:b7df4cb0830d5a6c29ad0c0691dbae874d8daefa934b8b1d642de48323d32a8c", size = 65767, upload-time = "2025-06-10T08:54:56.717Z" },
-]
-
-[[package]]
-name = "opentelemetry-exporter-otlp"
-version = "1.34.1"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "opentelemetry-exporter-otlp-proto-grpc" },
-    { name = "opentelemetry-exporter-otlp-proto-http" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/44/ba/786b4de7e39d88043622d901b92c4485835f43e0be76c2824d2687911bc2/opentelemetry_exporter_otlp-1.34.1.tar.gz", hash = "sha256:71c9ad342d665d9e4235898d205db17c5764cd7a69acb8a5dcd6d5e04c4c9988", size = 6173, upload-time = "2025-06-10T08:55:21.595Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/00/c1/259b8d8391c968e8f005d8a0ccefcb41aeef64cf55905cd0c0db4e22aaee/opentelemetry_exporter_otlp-1.34.1-py3-none-any.whl", hash = "sha256:f4a453e9cde7f6362fd4a090d8acf7881d1dc585540c7b65cbd63e36644238d4", size = 7040, upload-time = "2025-06-10T08:54:59.655Z" },
-]
-
-[[package]]
-name = "opentelemetry-exporter-otlp-proto-common"
-version = "1.34.1"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "opentelemetry-proto" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/86/f0/ff235936ee40db93360233b62da932d4fd9e8d103cd090c6bcb9afaf5f01/opentelemetry_exporter_otlp_proto_common-1.34.1.tar.gz", hash = "sha256:b59a20a927facd5eac06edaf87a07e49f9e4a13db487b7d8a52b37cb87710f8b", size = 20817, upload-time = "2025-06-10T08:55:22.55Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/72/e8/8b292a11cc8d8d87ec0c4089ae21b6a58af49ca2e51fa916435bc922fdc7/opentelemetry_exporter_otlp_proto_common-1.34.1-py3-none-any.whl", hash = "sha256:8e2019284bf24d3deebbb6c59c71e6eef3307cd88eff8c633e061abba33f7e87", size = 18834, upload-time = "2025-06-10T08:55:00.806Z" },
-]
-
-[[package]]
-name = "opentelemetry-exporter-otlp-proto-grpc"
-version = "1.34.1"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "googleapis-common-protos" },
-    { name = "grpcio" },
-    { name = "opentelemetry-api" },
-    { name = "opentelemetry-exporter-otlp-proto-common" },
-    { name = "opentelemetry-proto" },
-    { name = "opentelemetry-sdk" },
-    { name = "typing-extensions" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/41/f7/bb63837a3edb9ca857aaf5760796874e7cecddc88a2571b0992865a48fb6/opentelemetry_exporter_otlp_proto_grpc-1.34.1.tar.gz", hash = "sha256:7c841b90caa3aafcfc4fee58487a6c71743c34c6dc1787089d8b0578bbd794dd", size = 22566, upload-time = "2025-06-10T08:55:23.214Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/08/d8/0f354c375628e048bd0570645b310797299754730079853095bf000fba69/opentelemetry_api-1.38.0.tar.gz", hash = "sha256:f4c193b5e8acb0912b06ac5b16321908dd0843d75049c091487322284a3eea12", size = 65242, upload-time = "2025-10-16T08:35:50.25Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/b4/42/0a4dd47e7ef54edf670c81fc06a83d68ea42727b82126a1df9dd0477695d/opentelemetry_exporter_otlp_proto_grpc-1.34.1-py3-none-any.whl", hash = "sha256:04bb8b732b02295be79f8a86a4ad28fae3d4ddb07307a98c7aa6f331de18cca6", size = 18615, upload-time = "2025-06-10T08:55:02.214Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/a2/d86e01c28300bd41bab8f18afd613676e2bd63515417b77636fc1add426f/opentelemetry_api-1.38.0-py3-none-any.whl", hash = "sha256:2891b0197f47124454ab9f0cf58f3be33faca394457ac3e09daba13ff50aa582", size = 65947, upload-time = "2025-10-16T08:35:30.23Z" },
 ]
 
 [[package]]
-name = "opentelemetry-exporter-otlp-proto-http"
-version = "1.34.1"
+name = "opentelemetry-exporter-prometheus"
+version = "0.59b0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "googleapis-common-protos" },
     { name = "opentelemetry-api" },
-    { name = "opentelemetry-exporter-otlp-proto-common" },
-    { name = "opentelemetry-proto" },
     { name = "opentelemetry-sdk" },
-    { name = "requests" },
-    { name = "typing-extensions" },
+    { name = "prometheus-client" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/19/8f/954bc725961cbe425a749d55c0ba1df46832a5999eae764d1a7349ac1c29/opentelemetry_exporter_otlp_proto_http-1.34.1.tar.gz", hash = "sha256:aaac36fdce46a8191e604dcf632e1f9380c7d5b356b27b3e0edb5610d9be28ad", size = 15351, upload-time = "2025-06-10T08:55:24.657Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/1b/07/39370ec7eacfca10462121a0e036b66ccea3a616bf6ae6ea5fdb72e5009d/opentelemetry_exporter_prometheus-0.59b0.tar.gz", hash = "sha256:d64f23c49abb5a54e271c2fbc8feacea0c394a30ec29876ab5ef7379f08cf3d7", size = 14972, upload-time = "2025-10-16T08:35:55.973Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/79/54/b05251c04e30c1ac70cf4a7c5653c085dfcf2c8b98af71661d6a252adc39/opentelemetry_exporter_otlp_proto_http-1.34.1-py3-none-any.whl", hash = "sha256:5251f00ca85872ce50d871f6d3cc89fe203b94c3c14c964bbdc3883366c705d8", size = 17744, upload-time = "2025-06-10T08:55:03.802Z" },
+    { url = "https://files.pythonhosted.org/packages/05/ea/3005a732002242fd86203989520bdd5a752e1fd30dc225d5d45751ea19fb/opentelemetry_exporter_prometheus-0.59b0-py3-none-any.whl", hash = "sha256:71ced23207abd15b30d1fe4e7e910dcaa7c2ff1f24a6ffccbd4fdded676f541b", size = 13017, upload-time = "2025-10-16T08:35:37.253Z" },
 ]
 
 [[package]]
 name = "opentelemetry-proto"
-version = "1.34.1"
+version = "1.38.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "protobuf" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/66/b3/c3158dd012463bb7c0eb7304a85a6f63baeeb5b4c93a53845cf89f848c7e/opentelemetry_proto-1.34.1.tar.gz", hash = "sha256:16286214e405c211fc774187f3e4bbb1351290b8dfb88e8948af209ce85b719e", size = 34344, upload-time = "2025-06-10T08:55:32.25Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/51/14/f0c4f0f6371b9cb7f9fa9ee8918bfd59ac7040c7791f1e6da32a1839780d/opentelemetry_proto-1.38.0.tar.gz", hash = "sha256:88b161e89d9d372ce723da289b7da74c3a8354a8e5359992be813942969ed468", size = 46152, upload-time = "2025-10-16T08:36:01.612Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/28/ab/4591bfa54e946350ce8b3f28e5c658fe9785e7cd11e9c11b1671a867822b/opentelemetry_proto-1.34.1-py3-none-any.whl", hash = "sha256:eb4bb5ac27f2562df2d6857fc557b3a481b5e298bc04f94cc68041f00cebcbd2", size = 55692, upload-time = "2025-06-10T08:55:14.904Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/6a/82b68b14efca5150b2632f3692d627afa76b77378c4999f2648979409528/opentelemetry_proto-1.38.0-py3-none-any.whl", hash = "sha256:b6ebe54d3217c42e45462e2a1ae28c3e2bf2ec5a5645236a490f55f45f1a0a18", size = 72535, upload-time = "2025-10-16T08:35:45.749Z" },
 ]
 
 [[package]]
 name = "opentelemetry-sdk"
-version = "1.34.1"
+version = "1.38.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "opentelemetry-api" },
     { name = "opentelemetry-semantic-conventions" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/6f/41/fe20f9036433da8e0fcef568984da4c1d1c771fa072ecd1a4d98779dccdd/opentelemetry_sdk-1.34.1.tar.gz", hash = "sha256:8091db0d763fcd6098d4781bbc80ff0971f94e260739aa6afe6fd379cdf3aa4d", size = 159441, upload-time = "2025-06-10T08:55:33.028Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/85/cb/f0eee1445161faf4c9af3ba7b848cc22a50a3d3e2515051ad8628c35ff80/opentelemetry_sdk-1.38.0.tar.gz", hash = "sha256:93df5d4d871ed09cb4272305be4d996236eedb232253e3ab864c8620f051cebe", size = 171942, upload-time = "2025-10-16T08:36:02.257Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/07/1b/def4fe6aa73f483cabf4c748f4c25070d5f7604dcc8b52e962983491b29e/opentelemetry_sdk-1.34.1-py3-none-any.whl", hash = "sha256:308effad4059562f1d92163c61c8141df649da24ce361827812c40abb2a1e96e", size = 118477, upload-time = "2025-06-10T08:55:16.02Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/2e/e93777a95d7d9c40d270a371392b6d6f1ff170c2a3cb32d6176741b5b723/opentelemetry_sdk-1.38.0-py3-none-any.whl", hash = "sha256:1c66af6564ecc1553d72d811a01df063ff097cdc82ce188da9951f93b8d10f6b", size = 132349, upload-time = "2025-10-16T08:35:46.995Z" },
 ]
 
 [[package]]
 name = "opentelemetry-semantic-conventions"
-version = "0.55b1"
+version = "0.59b0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "opentelemetry-api" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/5d/f0/f33458486da911f47c4aa6db9bda308bb80f3236c111bf848bd870c16b16/opentelemetry_semantic_conventions-0.55b1.tar.gz", hash = "sha256:ef95b1f009159c28d7a7849f5cbc71c4c34c845bb514d66adfdf1b3fff3598b3", size = 119829, upload-time = "2025-06-10T08:55:33.881Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/1a/89/267b0af1b1d0ba828f0e60642b6a5116ac1fd917cde7fc02821627029bd1/opentelemetry_semantic_conventions-0.55b1-py3-none-any.whl", hash = "sha256:5da81dfdf7d52e3d37f8fe88d5e771e191de924cfff5f550ab0b8f7b2409baed", size = 196223, upload-time = "2025-06-10T08:55:17.638Z" },
-]
-
-[[package]]
-name = "opentelemetry-semantic-conventions-ai"
-version = "0.4.9"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/8c/ba/2405abde825cf654d09ba16bfcfb8c863156bccdc47d1f2a86df6331e7bb/opentelemetry_semantic_conventions_ai-0.4.9.tar.gz", hash = "sha256:54a0b901959e2de5124384925846bac2ea0a6dab3de7e501ba6aecf5e293fe04", size = 4920, upload-time = "2025-05-16T10:20:54.611Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/34/98/f5196ba0f4105a4790cec8c6671cf676c96dfa29bfedfe3c4f112bf4e6ad/opentelemetry_semantic_conventions_ai-0.4.9-py3-none-any.whl", hash = "sha256:71149e46a72554ae17de46bca6c11ba540c19c89904bd4cc3111aac6edf10315", size = 5617, upload-time = "2025-05-16T10:20:53.062Z" },
-]
-
-[[package]]
-name = "outlines"
-version = "0.1.11"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "airportsdata" },
-    { name = "cloudpickle" },
-    { name = "diskcache" },
-    { name = "interegular" },
-    { name = "jinja2" },
-    { name = "jsonschema" },
-    { name = "lark" },
-    { name = "nest-asyncio" },
-    { name = "numpy" },
-    { name = "outlines-core" },
-    { name = "pycountry" },
-    { name = "pydantic" },
-    { name = "referencing" },
-    { name = "requests" },
-    { name = "torch" },
-    { name = "tqdm" },
-    { name = "typing-extensions" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/ac/d0/d59ae830bf7026425942899e3d48e77b58a713cff946a695e5405808da1b/outlines-0.1.11.tar.gz", hash = "sha256:0997bd9da1cc050e430bd08995dc7d4bd855918bafa4531e49d3f37110a23aba", size = 2488858, upload-time = "2024-12-13T07:24:08.426Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/13/b4/99ea4a122bef60e3fd6402d19665aff1f928e0daf8fac3044d0b73f72003/outlines-0.1.11-py3-none-any.whl", hash = "sha256:f5a5f2242ed9802d3aab7a92789bf4008d734c576be9258cc0a297f690124727", size = 87623, upload-time = "2024-12-13T07:24:05.817Z" },
+sdist = { url = "https://files.pythonhosted.org/packages/40/bc/8b9ad3802cd8ac6583a4eb7de7e5d7db004e89cb7efe7008f9c8a537ee75/opentelemetry_semantic_conventions-0.59b0.tar.gz", hash = "sha256:7a6db3f30d70202d5bf9fa4b69bc866ca6a30437287de6c510fb594878aed6b0", size = 129861, upload-time = "2025-10-16T08:36:03.346Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/24/7d/c88d7b15ba8fe5c6b8f93be50fc11795e9fc05386c44afaf6b76fe191f9b/opentelemetry_semantic_conventions-0.59b0-py3-none-any.whl", hash = "sha256:35d3b8833ef97d614136e253c1da9342b4c3c083bbaf29ce31d572a1c3825eed", size = 207954, upload-time = "2025-10-16T08:35:48.054Z" },
+]
+
+[[package]]
+name = "orjson"
+version = "3.11.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/be/4d/8df5f83256a809c22c4d6792ce8d43bb503be0fb7a8e4da9025754b09658/orjson-3.11.3.tar.gz", hash = "sha256:1c0603b1d2ffcd43a411d64797a19556ef76958aef1c182f22dc30860152a98a", size = 5482394, upload-time = "2025-08-26T17:46:43.171Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3d/b0/a7edab2a00cdcb2688e1c943401cb3236323e7bfd2839815c6131a3742f4/orjson-3.11.3-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:8c752089db84333e36d754c4baf19c0e1437012242048439c7e80eb0e6426e3b", size = 238259, upload-time = "2025-08-26T17:45:15.093Z" },
+    { url = "https://files.pythonhosted.org/packages/e1/c6/ff4865a9cc398a07a83342713b5932e4dc3cb4bf4bc04e8f83dedfc0d736/orjson-3.11.3-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:9b8761b6cf04a856eb544acdd82fc594b978f12ac3602d6374a7edb9d86fd2c2", size = 127633, upload-time = "2025-08-26T17:45:16.417Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/e6/e00bea2d9472f44fe8794f523e548ce0ad51eb9693cf538a753a27b8bda4/orjson-3.11.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b13974dc8ac6ba22feaa867fc19135a3e01a134b4f7c9c28162fed4d615008a", size = 123061, upload-time = "2025-08-26T17:45:17.673Z" },
+    { url = "https://files.pythonhosted.org/packages/54/31/9fbb78b8e1eb3ac605467cb846e1c08d0588506028b37f4ee21f978a51d4/orjson-3.11.3-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f83abab5bacb76d9c821fd5c07728ff224ed0e52d7a71b7b3de822f3df04e15c", size = 127956, upload-time = "2025-08-26T17:45:19.172Z" },
+    { url = "https://files.pythonhosted.org/packages/36/88/b0604c22af1eed9f98d709a96302006915cfd724a7ebd27d6dd11c22d80b/orjson-3.11.3-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e6fbaf48a744b94091a56c62897b27c31ee2da93d826aa5b207131a1e13d4064", size = 130790, upload-time = "2025-08-26T17:45:20.586Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/9d/1c1238ae9fffbfed51ba1e507731b3faaf6b846126a47e9649222b0fd06f/orjson-3.11.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bc779b4f4bba2847d0d2940081a7b6f7b5877e05408ffbb74fa1faf4a136c424", size = 132385, upload-time = "2025-08-26T17:45:22.036Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/b5/c06f1b090a1c875f337e21dd71943bc9d84087f7cdf8c6e9086902c34e42/orjson-3.11.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bd4b909ce4c50faa2192da6bb684d9848d4510b736b0611b6ab4020ea6fd2d23", size = 135305, upload-time = "2025-08-26T17:45:23.4Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/26/5f028c7d81ad2ebbf84414ba6d6c9cac03f22f5cd0d01eb40fb2d6a06b07/orjson-3.11.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:524b765ad888dc5518bbce12c77c2e83dee1ed6b0992c1790cc5fb49bb4b6667", size = 132875, upload-time = "2025-08-26T17:45:25.182Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/d4/b8df70d9cfb56e385bf39b4e915298f9ae6c61454c8154a0f5fd7efcd42e/orjson-3.11.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:84fd82870b97ae3cdcea9d8746e592b6d40e1e4d4527835fc520c588d2ded04f", size = 130940, upload-time = "2025-08-26T17:45:27.209Z" },
+    { url = "https://files.pythonhosted.org/packages/da/5e/afe6a052ebc1a4741c792dd96e9f65bf3939d2094e8b356503b68d48f9f5/orjson-3.11.3-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:fbecb9709111be913ae6879b07bafd4b0785b44c1eb5cac8ac76da048b3885a1", size = 403852, upload-time = "2025-08-26T17:45:28.478Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/90/7bbabafeb2ce65915e9247f14a56b29c9334003536009ef5b122783fe67e/orjson-3.11.3-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:9dba358d55aee552bd868de348f4736ca5a4086d9a62e2bfbbeeb5629fe8b0cc", size = 146293, upload-time = "2025-08-26T17:45:29.86Z" },
+    { url = "https://files.pythonhosted.org/packages/27/b3/2d703946447da8b093350570644a663df69448c9d9330e5f1d9cce997f20/orjson-3.11.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:eabcf2e84f1d7105f84580e03012270c7e97ecb1fb1618bda395061b2a84a049", size = 135470, upload-time = "2025-08-26T17:45:31.243Z" },
+    { url = "https://files.pythonhosted.org/packages/38/70/b14dcfae7aff0e379b0119c8a812f8396678919c431efccc8e8a0263e4d9/orjson-3.11.3-cp312-cp312-win32.whl", hash = "sha256:3782d2c60b8116772aea8d9b7905221437fdf53e7277282e8d8b07c220f96cca", size = 136248, upload-time = "2025-08-26T17:45:32.567Z" },
+    { url = "https://files.pythonhosted.org/packages/35/b8/9e3127d65de7fff243f7f3e53f59a531bf6bb295ebe5db024c2503cc0726/orjson-3.11.3-cp312-cp312-win_amd64.whl", hash = "sha256:79b44319268af2eaa3e315b92298de9a0067ade6e6003ddaef72f8e0bedb94f1", size = 131437, upload-time = "2025-08-26T17:45:34.949Z" },
+    { url = "https://files.pythonhosted.org/packages/51/92/a946e737d4d8a7fd84a606aba96220043dcc7d6988b9e7551f7f6d5ba5ad/orjson-3.11.3-cp312-cp312-win_arm64.whl", hash = "sha256:0e92a4e83341ef79d835ca21b8bd13e27c859e4e9e4d7b63defc6e58462a3710", size = 125978, upload-time = "2025-08-26T17:45:36.422Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/79/8932b27293ad35919571f77cb3693b5906cf14f206ef17546052a241fdf6/orjson-3.11.3-cp313-cp313-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:af40c6612fd2a4b00de648aa26d18186cd1322330bd3a3cc52f87c699e995810", size = 238127, upload-time = "2025-08-26T17:45:38.146Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/82/cb93cd8cf132cd7643b30b6c5a56a26c4e780c7a145db6f83de977b540ce/orjson-3.11.3-cp313-cp313-macosx_15_0_arm64.whl", hash = "sha256:9f1587f26c235894c09e8b5b7636a38091a9e6e7fe4531937534749c04face43", size = 127494, upload-time = "2025-08-26T17:45:39.57Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/b8/2d9eb181a9b6bb71463a78882bcac1027fd29cf62c38a40cc02fc11d3495/orjson-3.11.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:61dcdad16da5bb486d7227a37a2e789c429397793a6955227cedbd7252eb5a27", size = 123017, upload-time = "2025-08-26T17:45:40.876Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/14/a0e971e72d03b509190232356d54c0f34507a05050bd026b8db2bf2c192c/orjson-3.11.3-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:11c6d71478e2cbea0a709e8a06365fa63da81da6498a53e4c4f065881d21ae8f", size = 127898, upload-time = "2025-08-26T17:45:42.188Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/af/dc74536722b03d65e17042cc30ae586161093e5b1f29bccda24765a6ae47/orjson-3.11.3-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ff94112e0098470b665cb0ed06efb187154b63649403b8d5e9aedeb482b4548c", size = 130742, upload-time = "2025-08-26T17:45:43.511Z" },
+    { url = "https://files.pythonhosted.org/packages/62/e6/7a3b63b6677bce089fe939353cda24a7679825c43a24e49f757805fc0d8a/orjson-3.11.3-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ae8b756575aaa2a855a75192f356bbda11a89169830e1439cfb1a3e1a6dde7be", size = 132377, upload-time = "2025-08-26T17:45:45.525Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/cd/ce2ab93e2e7eaf518f0fd15e3068b8c43216c8a44ed82ac2b79ce5cef72d/orjson-3.11.3-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c9416cc19a349c167ef76135b2fe40d03cea93680428efee8771f3e9fb66079d", size = 135313, upload-time = "2025-08-26T17:45:46.821Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/b4/f98355eff0bd1a38454209bbc73372ce351ba29933cb3e2eba16c04b9448/orjson-3.11.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b822caf5b9752bc6f246eb08124c3d12bf2175b66ab74bac2ef3bbf9221ce1b2", size = 132908, upload-time = "2025-08-26T17:45:48.126Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/92/8f5182d7bc2a1bed46ed960b61a39af8389f0ad476120cd99e67182bfb6d/orjson-3.11.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:414f71e3bdd5573893bf5ecdf35c32b213ed20aa15536fe2f588f946c318824f", size = 130905, upload-time = "2025-08-26T17:45:49.414Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/60/c41ca753ce9ffe3d0f67b9b4c093bdd6e5fdb1bc53064f992f66bb99954d/orjson-3.11.3-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:828e3149ad8815dc14468f36ab2a4b819237c155ee1370341b91ea4c8672d2ee", size = 403812, upload-time = "2025-08-26T17:45:51.085Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/13/e4a4f16d71ce1868860db59092e78782c67082a8f1dc06a3788aef2b41bc/orjson-3.11.3-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ac9e05f25627ffc714c21f8dfe3a579445a5c392a9c8ae7ba1d0e9fb5333f56e", size = 146277, upload-time = "2025-08-26T17:45:52.851Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/8b/bafb7f0afef9344754a3a0597a12442f1b85a048b82108ef2c956f53babd/orjson-3.11.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e44fbe4000bd321d9f3b648ae46e0196d21577cf66ae684a96ff90b1f7c93633", size = 135418, upload-time = "2025-08-26T17:45:54.806Z" },
+    { url = "https://files.pythonhosted.org/packages/60/d4/bae8e4f26afb2c23bea69d2f6d566132584d1c3a5fe89ee8c17b718cab67/orjson-3.11.3-cp313-cp313-win32.whl", hash = "sha256:2039b7847ba3eec1f5886e75e6763a16e18c68a63efc4b029ddf994821e2e66b", size = 136216, upload-time = "2025-08-26T17:45:57.182Z" },
+    { url = "https://files.pythonhosted.org/packages/88/76/224985d9f127e121c8cad882cea55f0ebe39f97925de040b75ccd4b33999/orjson-3.11.3-cp313-cp313-win_amd64.whl", hash = "sha256:29be5ac4164aa8bdcba5fa0700a3c9c316b411d8ed9d39ef8a882541bd452fae", size = 131362, upload-time = "2025-08-26T17:45:58.56Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/cf/0dce7a0be94bd36d1346be5067ed65ded6adb795fdbe3abd234c8d576d01/orjson-3.11.3-cp313-cp313-win_arm64.whl", hash = "sha256:18bd1435cb1f2857ceb59cfb7de6f92593ef7b831ccd1b9bfb28ca530e539dce", size = 125989, upload-time = "2025-08-26T17:45:59.95Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/77/d3b1fef1fc6aaeed4cbf3be2b480114035f4df8fa1a99d2dac1d40d6e924/orjson-3.11.3-cp314-cp314-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:cf4b81227ec86935568c7edd78352a92e97af8da7bd70bdfdaa0d2e0011a1ab4", size = 238115, upload-time = "2025-08-26T17:46:01.669Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/6d/468d21d49bb12f900052edcfbf52c292022d0a323d7828dc6376e6319703/orjson-3.11.3-cp314-cp314-macosx_15_0_arm64.whl", hash = "sha256:bc8bc85b81b6ac9fc4dae393a8c159b817f4c2c9dee5d12b773bddb3b95fc07e", size = 127493, upload-time = "2025-08-26T17:46:03.466Z" },
+    { url = "https://files.pythonhosted.org/packages/67/46/1e2588700d354aacdf9e12cc2d98131fb8ac6f31ca65997bef3863edb8ff/orjson-3.11.3-cp314-cp314-manylinux_2_34_aarch64.whl", hash = "sha256:88dcfc514cfd1b0de038443c7b3e6a9797ffb1b3674ef1fd14f701a13397f82d", size = 122998, upload-time = "2025-08-26T17:46:04.803Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/94/11137c9b6adb3779f1b34fd98be51608a14b430dbc02c6d41134fbba484c/orjson-3.11.3-cp314-cp314-manylinux_2_34_x86_64.whl", hash = "sha256:d61cd543d69715d5fc0a690c7c6f8dcc307bc23abef9738957981885f5f38229", size = 132915, upload-time = "2025-08-26T17:46:06.237Z" },
+    { url = "https://files.pythonhosted.org/packages/10/61/dccedcf9e9bcaac09fdabe9eaee0311ca92115699500efbd31950d878833/orjson-3.11.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:2b7b153ed90ababadbef5c3eb39549f9476890d339cf47af563aea7e07db2451", size = 130907, upload-time = "2025-08-26T17:46:07.581Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/fd/0e935539aa7b08b3ca0f817d73034f7eb506792aae5ecc3b7c6e679cdf5f/orjson-3.11.3-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:7909ae2460f5f494fecbcd10613beafe40381fd0316e35d6acb5f3a05bfda167", size = 403852, upload-time = "2025-08-26T17:46:08.982Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/2b/50ae1a5505cd1043379132fdb2adb8a05f37b3e1ebffe94a5073321966fd/orjson-3.11.3-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:2030c01cbf77bc67bee7eef1e7e31ecf28649353987775e3583062c752da0077", size = 146309, upload-time = "2025-08-26T17:46:10.576Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/1d/a473c158e380ef6f32753b5f39a69028b25ec5be331c2049a2201bde2e19/orjson-3.11.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:a0169ebd1cbd94b26c7a7ad282cf5c2744fce054133f959e02eb5265deae1872", size = 135424, upload-time = "2025-08-26T17:46:12.386Z" },
+    { url = "https://files.pythonhosted.org/packages/da/09/17d9d2b60592890ff7382e591aa1d9afb202a266b180c3d4049b1ec70e4a/orjson-3.11.3-cp314-cp314-win32.whl", hash = "sha256:0c6d7328c200c349e3a4c6d8c83e0a5ad029bdc2d417f234152bf34842d0fc8d", size = 136266, upload-time = "2025-08-26T17:46:13.853Z" },
+    { url = "https://files.pythonhosted.org/packages/15/58/358f6846410a6b4958b74734727e582ed971e13d335d6c7ce3e47730493e/orjson-3.11.3-cp314-cp314-win_amd64.whl", hash = "sha256:317bbe2c069bbc757b1a2e4105b64aacd3bc78279b66a6b9e51e846e4809f804", size = 131351, upload-time = "2025-08-26T17:46:15.27Z" },
+    { url = "https://files.pythonhosted.org/packages/28/01/d6b274a0635be0468d4dbd9cafe80c47105937a0d42434e805e67cd2ed8b/orjson-3.11.3-cp314-cp314-win_arm64.whl", hash = "sha256:e8f6a7a27d7b7bec81bd5924163e9af03d49bbb63013f107b48eb5d16db711bc", size = 125985, upload-time = "2025-08-26T17:46:16.67Z" },
 ]
 
 [[package]]
 name = "outlines-core"
-version = "0.1.26"
+version = "0.2.11"
 source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "interegular" },
-    { name = "jsonschema" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/d3/f3/274d07f4702728b43581235a77e545ec602b25f9b0098b288a0f3052521d/outlines_core-0.1.26.tar.gz", hash = "sha256:481c4301341e77cc8f1832d616784adb4d461b4fec65878e7c0d2cba7163a189", size = 75139, upload-time = "2024-12-12T23:38:50.703Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/1a/d3/e04e9145f8f806723dec9b9e5227ad695a3efcd3ced7794cf7c22b15df5e/outlines_core-0.2.11.tar.gz", hash = "sha256:dfce56f717ff5083e54cbcfdb66cad243365437fccbb5509adaa7e31e030f1d8", size = 197263, upload-time = "2025-05-19T10:12:51.719Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/c6/86/0fb40746e579db38d89f127122a3900d9e0350f76aae8cb61adeaff44cc2/outlines_core-0.1.26-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f54633bca50055d42ea4d94ae06dcbe52d3d76a9b621b75723b1177d0d952953", size = 321874, upload-time = "2024-12-12T23:38:26.834Z" },
-    { url = "https://files.pythonhosted.org/packages/ab/0c/b91f7bc03843796c1d643ee030b6cd8fd5a8ba2cd4856c855f140c878976/outlines_core-0.1.26-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9525321b48700dcaaabf60bcdc951e45f9357ba3fb3e1bfc81b662d7d4170e7c", size = 301995, upload-time = "2024-12-12T23:38:29.625Z" },
-    { url = "https://files.pythonhosted.org/packages/ad/db/fa91a2d54288b900de82d86eda3adb2417b3b5b2db6256854a5e8bc85c32/outlines_core-0.1.26-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:00f409f72c11f6ffadb57066950dd384d5388015028c1a1a615c9a64988dae3e", size = 321050, upload-time = "2024-12-12T23:38:32.274Z" },
-    { url = "https://files.pythonhosted.org/packages/e2/1d/a36292b6198986bd9c3ff8c24355deb82ed5475403379ee40b5b5473e2e3/outlines_core-0.1.26-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e86a1bb46adc5cbf6dfd7a7fe4105e0e2a4c6e041732a053126b41c521a1f223", size = 343201, upload-time = "2024-12-12T23:38:34.631Z" },
-    { url = "https://files.pythonhosted.org/packages/08/63/5dd2b5a364412f674b6edcb59b0c21513bdb07cdcc7613b064c1a0660d01/outlines_core-0.1.26-cp312-cp312-win32.whl", hash = "sha256:19f462f6b00935708677ad27cb4df55e0e17f6ffe713ab750f5f2683b090f95d", size = 233970, upload-time = "2024-12-12T23:38:37.318Z" },
-    { url = "https://files.pythonhosted.org/packages/a5/56/8adf0b7446d1e975c2314454813c59eb7b195889908a2932ed34148c113c/outlines_core-0.1.26-cp312-cp312-win_amd64.whl", hash = "sha256:9b36bff12779e58883747116893a17b3551bbd10865878b951b03a44d112229a", size = 243578, upload-time = "2024-12-12T23:38:39.964Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/2c/c7636823244c70e2960060bf9bd978248dffb55c5e7c91c46d18354b2a24/outlines_core-0.2.11-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:4a9db4872bae083631d720994f4cee603bce0536b33d5a988814576863b657cf", size = 1957668, upload-time = "2025-05-19T10:12:18.29Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/09/5c62047da139d722317a444a4d01cd5f11943a8c2eaecce784341dd0844a/outlines_core-0.2.11-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:8359a45c59f6a8f2eb717245806501a59044c75f6ea8bd08faaa131cc8cdec45", size = 2130493, upload-time = "2025-05-19T10:12:19.537Z" },
+    { url = "https://files.pythonhosted.org/packages/89/7a/d6a2810f90e37d550168e0c0a9a915086ea721444727e3ca2c630898d1ef/outlines_core-0.2.11-cp312-cp312-macosx_15_0_arm64.whl", hash = "sha256:5d26a46591377340e0b870b8a96ea8341058341a62ee0bded9098e0c88dd24f4", size = 1956804, upload-time = "2025-05-19T10:12:20.755Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/ea/339e6c273b5581128c3b7ca27d428d8993c3085912af1a467aa32ef0e9d1/outlines_core-0.2.11-cp312-cp312-macosx_15_0_x86_64.whl", hash = "sha256:ae460a34675fb11d92a5c605a480fbae4cd6c1b2d11b3698da64a7fcaba64dcf", size = 2127085, upload-time = "2025-05-19T10:12:22.02Z" },
+    { url = "https://files.pythonhosted.org/packages/92/c7/a65d1fddf49830ebc41422294eacde35286d9f68994a8aa905cb14f5aade/outlines_core-0.2.11-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86df9740368866295077346440d911df4972da2b3f1f54b8125e6f329e8a8891", size = 2287677, upload-time = "2025-05-19T10:12:24.24Z" },
+    { url = "https://files.pythonhosted.org/packages/23/79/8795aed8be9b77dd69d78e7cfbfcf28c179e6b08da6e56bbbf48a09fe55f/outlines_core-0.2.11-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:96ce4dd78f106799be4a0a5795cefd1352806162973756a4b6fce4bb6eddd7e4", size = 2113000, upload-time = "2025-05-19T10:12:25.446Z" },
+    { url = "https://files.pythonhosted.org/packages/59/e3/cbe9294b06d92ee1892dbb6f2125d833d68e8629d45d080d6daba54eec2d/outlines_core-0.2.11-cp312-cp312-win32.whl", hash = "sha256:358db161cce3650ba822e118dcf0a1efa571c7deb4864ab9d64ca2c9cca7425d", size = 1765703, upload-time = "2025-05-19T10:12:26.693Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/c9/ed3cf362515fac16e313368b9b2f2497051f4ded88679205830b6f889f54/outlines_core-0.2.11-cp312-cp312-win_amd64.whl", hash = "sha256:231f9d20d2630c70665345821780d7808b29539620a75c99f65113b518c51032", size = 2060945, upload-time = "2025-05-19T10:12:28.294Z" },
+    { url = "https://files.pythonhosted.org/packages/11/58/df6f57546f7792c990a4380ceaf99243a0b26b24c199e34e0a9277c89976/outlines_core-0.2.11-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:0907ff25d79edbf8650268028de85a1b41b38696f147059e007da4626a1031f1", size = 1957172, upload-time = "2025-05-19T10:12:29.737Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/cf/b07e33c44544e7865ec481554788807dfa6ad10fd86191ad21f2200f145e/outlines_core-0.2.11-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:f4146da5957f97550eebd19e80635e48035886fd10f03e9735cc111caaf74e93", size = 2130284, upload-time = "2025-05-19T10:12:31.408Z" },
+    { url = "https://files.pythonhosted.org/packages/83/70/8f981706e2620914c48fd1edb42f9409d76b84c72149d48e89d14820fab6/outlines_core-0.2.11-cp313-cp313-macosx_15_0_arm64.whl", hash = "sha256:8776a6db8843187c90e4c54bf94510cda68ca7a11c9b48d90587179fd3224bc2", size = 1956727, upload-time = "2025-05-19T10:12:32.996Z" },
+    { url = "https://files.pythonhosted.org/packages/89/de/fba234a9c3984408f017ee0b1ca2e9d6191f8086afa649d3e4b04ed055e2/outlines_core-0.2.11-cp313-cp313-macosx_15_0_x86_64.whl", hash = "sha256:d44f38a89028bed50494420b47d08ebefa78f34b129e2ea6383c801e5ba62c26", size = 2126905, upload-time = "2025-05-19T10:12:34.261Z" },
+    { url = "https://files.pythonhosted.org/packages/87/96/7dcdc5198844145ab35528f9f93a58c3d47b87e54d0f79357c631d7b7a9a/outlines_core-0.2.11-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:daef6eaaf8c3403455ab5cbf265cb5c6838df571eb7c4b23cddac19cfc701726", size = 2287320, upload-time = "2025-05-19T10:12:35.515Z" },
+    { url = "https://files.pythonhosted.org/packages/4d/68/b420b6a3beaadbf8e9f2a82132120027efd6424634013fbeca8c2fed7467/outlines_core-0.2.11-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:76b2512417c68863f8f227a080e87f755682dfd895e23b021121318be11da579", size = 2112861, upload-time = "2025-05-19T10:12:36.742Z" },
+    { url = "https://files.pythonhosted.org/packages/78/d6/7c2a016f7a5eab2f3df2b3a258f270872c78fe0dd7d9fbee87429f1b6b1f/outlines_core-0.2.11-cp313-cp313-win32.whl", hash = "sha256:707eeb3d190485f55a27ad9a6ad70df86688fa2bf405894a118283be7f59bd55", size = 1765574, upload-time = "2025-05-19T10:12:37.98Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/39/4c07f1d1f8e6ed85db9fe73a021113795a05aae8a84f36f0bdebb08bfde8/outlines_core-0.2.11-cp313-cp313-win_amd64.whl", hash = "sha256:ad46698564c9b13cbfbc744067de12be73bd740d7b2de20ec6b979ad7511f7c9", size = 2060567, upload-time = "2025-05-19T10:12:39.228Z" },
 ]
 
 [[package]]
@@ -3280,7 +4071,7 @@ wheels = [
 
 [[package]]
 name = "pandas"
-version = "2.3.0"
+version = "2.3.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "numpy" },
@@ -3288,42 +4079,28 @@ dependencies = [
     { name = "pytz" },
     { name = "tzdata" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/72/51/48f713c4c728d7c55ef7444ba5ea027c26998d96d1a40953b346438602fc/pandas-2.3.0.tar.gz", hash = "sha256:34600ab34ebf1131a7613a260a61dbe8b62c188ec0ea4c296da7c9a06b004133", size = 4484490, upload-time = "2025-06-05T03:27:54.133Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/94/46/24192607058dd607dbfacdd060a2370f6afb19c2ccb617406469b9aeb8e7/pandas-2.3.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2eb4728a18dcd2908c7fccf74a982e241b467d178724545a48d0caf534b38ebf", size = 11573865, upload-time = "2025-06-05T03:26:46.774Z" },
-    { url = "https://files.pythonhosted.org/packages/9f/cc/ae8ea3b800757a70c9fdccc68b67dc0280a6e814efcf74e4211fd5dea1ca/pandas-2.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b9d8c3187be7479ea5c3d30c32a5d73d62a621166675063b2edd21bc47614027", size = 10702154, upload-time = "2025-06-05T16:50:14.439Z" },
-    { url = "https://files.pythonhosted.org/packages/d8/ba/a7883d7aab3d24c6540a2768f679e7414582cc389876d469b40ec749d78b/pandas-2.3.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9ff730713d4c4f2f1c860e36c005c7cefc1c7c80c21c0688fd605aa43c9fcf09", size = 11262180, upload-time = "2025-06-05T16:50:17.453Z" },
-    { url = "https://files.pythonhosted.org/packages/01/a5/931fc3ad333d9d87b10107d948d757d67ebcfc33b1988d5faccc39c6845c/pandas-2.3.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba24af48643b12ffe49b27065d3babd52702d95ab70f50e1b34f71ca703e2c0d", size = 11991493, upload-time = "2025-06-05T03:26:51.813Z" },
-    { url = "https://files.pythonhosted.org/packages/d7/bf/0213986830a92d44d55153c1d69b509431a972eb73f204242988c4e66e86/pandas-2.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:404d681c698e3c8a40a61d0cd9412cc7364ab9a9cc6e144ae2992e11a2e77a20", size = 12470733, upload-time = "2025-06-06T00:00:18.651Z" },
-    { url = "https://files.pythonhosted.org/packages/a4/0e/21eb48a3a34a7d4bac982afc2c4eb5ab09f2d988bdf29d92ba9ae8e90a79/pandas-2.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6021910b086b3ca756755e86ddc64e0ddafd5e58e076c72cb1585162e5ad259b", size = 13212406, upload-time = "2025-06-05T03:26:55.992Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/d9/74017c4eec7a28892d8d6e31ae9de3baef71f5a5286e74e6b7aad7f8c837/pandas-2.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:094e271a15b579650ebf4c5155c05dcd2a14fd4fdd72cf4854b2f7ad31ea30be", size = 10976199, upload-time = "2025-06-05T03:26:59.594Z" },
-    { url = "https://files.pythonhosted.org/packages/d3/57/5cb75a56a4842bbd0511c3d1c79186d8315b82dac802118322b2de1194fe/pandas-2.3.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:2c7e2fc25f89a49a11599ec1e76821322439d90820108309bf42130d2f36c983", size = 11518913, upload-time = "2025-06-05T03:27:02.757Z" },
-    { url = "https://files.pythonhosted.org/packages/05/01/0c8785610e465e4948a01a059562176e4c8088aa257e2e074db868f86d4e/pandas-2.3.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c6da97aeb6a6d233fb6b17986234cc723b396b50a3c6804776351994f2a658fd", size = 10655249, upload-time = "2025-06-05T16:50:20.17Z" },
-    { url = "https://files.pythonhosted.org/packages/e8/6a/47fd7517cd8abe72a58706aab2b99e9438360d36dcdb052cf917b7bf3bdc/pandas-2.3.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb32dc743b52467d488e7a7c8039b821da2826a9ba4f85b89ea95274f863280f", size = 11328359, upload-time = "2025-06-05T03:27:06.431Z" },
-    { url = "https://files.pythonhosted.org/packages/2a/b3/463bfe819ed60fb7e7ddffb4ae2ee04b887b3444feee6c19437b8f834837/pandas-2.3.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:213cd63c43263dbb522c1f8a7c9d072e25900f6975596f883f4bebd77295d4f3", size = 12024789, upload-time = "2025-06-05T03:27:09.875Z" },
-    { url = "https://files.pythonhosted.org/packages/04/0c/e0704ccdb0ac40aeb3434d1c641c43d05f75c92e67525df39575ace35468/pandas-2.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1d2b33e68d0ce64e26a4acc2e72d747292084f4e8db4c847c6f5f6cbe56ed6d8", size = 12480734, upload-time = "2025-06-06T00:00:22.246Z" },
-    { url = "https://files.pythonhosted.org/packages/e9/df/815d6583967001153bb27f5cf075653d69d51ad887ebbf4cfe1173a1ac58/pandas-2.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:430a63bae10b5086995db1b02694996336e5a8ac9a96b4200572b413dfdfccb9", size = 13223381, upload-time = "2025-06-05T03:27:15.641Z" },
-    { url = "https://files.pythonhosted.org/packages/79/88/ca5973ed07b7f484c493e941dbff990861ca55291ff7ac67c815ce347395/pandas-2.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:4930255e28ff5545e2ca404637bcc56f031893142773b3468dc021c6c32a1390", size = 10970135, upload-time = "2025-06-05T03:27:24.131Z" },
-    { url = "https://files.pythonhosted.org/packages/24/fb/0994c14d1f7909ce83f0b1fb27958135513c4f3f2528bde216180aa73bfc/pandas-2.3.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:f925f1ef673b4bd0271b1809b72b3270384f2b7d9d14a189b12b7fc02574d575", size = 12141356, upload-time = "2025-06-05T03:27:34.547Z" },
-    { url = "https://files.pythonhosted.org/packages/9d/a2/9b903e5962134497ac4f8a96f862ee3081cb2506f69f8e4778ce3d9c9d82/pandas-2.3.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e78ad363ddb873a631e92a3c063ade1ecfb34cae71e9a2be6ad100f875ac1042", size = 11474674, upload-time = "2025-06-05T03:27:39.448Z" },
-    { url = "https://files.pythonhosted.org/packages/81/3a/3806d041bce032f8de44380f866059437fb79e36d6b22c82c187e65f765b/pandas-2.3.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:951805d146922aed8357e4cc5671b8b0b9be1027f0619cea132a9f3f65f2f09c", size = 11439876, upload-time = "2025-06-05T03:27:43.652Z" },
-    { url = "https://files.pythonhosted.org/packages/15/aa/3fc3181d12b95da71f5c2537c3e3b3af6ab3a8c392ab41ebb766e0929bc6/pandas-2.3.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1a881bc1309f3fce34696d07b00f13335c41f5f5a8770a33b09ebe23261cfc67", size = 11966182, upload-time = "2025-06-05T03:27:47.652Z" },
-    { url = "https://files.pythonhosted.org/packages/37/e7/e12f2d9b0a2c4a2cc86e2aabff7ccfd24f03e597d770abfa2acd313ee46b/pandas-2.3.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:e1991bbb96f4050b09b5f811253c4f3cf05ee89a589379aa36cd623f21a31d6f", size = 12547686, upload-time = "2025-06-06T00:00:26.142Z" },
-    { url = "https://files.pythonhosted.org/packages/39/c2/646d2e93e0af70f4e5359d870a63584dacbc324b54d73e6b3267920ff117/pandas-2.3.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:bb3be958022198531eb7ec2008cfc78c5b1eed51af8600c6c5d9160d89d8d249", size = 13231847, upload-time = "2025-06-05T03:27:51.465Z" },
-]
-
-[[package]]
-name = "paramiko"
-version = "3.5.1"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "bcrypt" },
-    { name = "cryptography" },
-    { name = "pynacl" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/7d/15/ad6ce226e8138315f2451c2aeea985bf35ee910afb477bae7477dc3a8f3b/paramiko-3.5.1.tar.gz", hash = "sha256:b2c665bc45b2b215bd7d7f039901b14b067da00f3a11e6640995fd58f2664822", size = 1566110, upload-time = "2025-02-04T02:37:59.783Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/15/f8/c7bd0ef12954a81a1d3cea60a13946bd9a49a0036a5927770c461eade7ae/paramiko-3.5.1-py3-none-any.whl", hash = "sha256:43b9a0501fc2b5e70680388d9346cf252cfb7d00b0667c39e80eb43a408b8f61", size = 227298, upload-time = "2025-02-04T02:37:57.672Z" },
+sdist = { url = "https://files.pythonhosted.org/packages/d1/6f/75aa71f8a14267117adeeed5d21b204770189c0a0025acbdc03c337b28fc/pandas-2.3.1.tar.gz", hash = "sha256:0a95b9ac964fe83ce317827f80304d37388ea77616b1425f0ae41c9d2d0d7bb2", size = 4487493, upload-time = "2025-07-07T19:20:04.079Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/46/de/b8445e0f5d217a99fe0eeb2f4988070908979bec3587c0633e5428ab596c/pandas-2.3.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:689968e841136f9e542020698ee1c4fbe9caa2ed2213ae2388dc7b81721510d3", size = 11588172, upload-time = "2025-07-07T19:18:52.054Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/e0/801cdb3564e65a5ac041ab99ea6f1d802a6c325bb6e58c79c06a3f1cd010/pandas-2.3.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:025e92411c16cbe5bb2a4abc99732a6b132f439b8aab23a59fa593eb00704232", size = 10717365, upload-time = "2025-07-07T19:18:54.785Z" },
+    { url = "https://files.pythonhosted.org/packages/51/a5/c76a8311833c24ae61a376dbf360eb1b1c9247a5d9c1e8b356563b31b80c/pandas-2.3.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9b7ff55f31c4fcb3e316e8f7fa194566b286d6ac430afec0d461163312c5841e", size = 11280411, upload-time = "2025-07-07T19:18:57.045Z" },
+    { url = "https://files.pythonhosted.org/packages/da/01/e383018feba0a1ead6cf5fe8728e5d767fee02f06a3d800e82c489e5daaf/pandas-2.3.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7dcb79bf373a47d2a40cf7232928eb7540155abbc460925c2c96d2d30b006eb4", size = 11988013, upload-time = "2025-07-07T19:18:59.771Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/14/cec7760d7c9507f11c97d64f29022e12a6cc4fc03ac694535e89f88ad2ec/pandas-2.3.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:56a342b231e8862c96bdb6ab97170e203ce511f4d0429589c8ede1ee8ece48b8", size = 12767210, upload-time = "2025-07-07T19:19:02.944Z" },
+    { url = "https://files.pythonhosted.org/packages/50/b9/6e2d2c6728ed29fb3d4d4d302504fb66f1a543e37eb2e43f352a86365cdf/pandas-2.3.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ca7ed14832bce68baef331f4d7f294411bed8efd032f8109d690df45e00c4679", size = 13440571, upload-time = "2025-07-07T19:19:06.82Z" },
+    { url = "https://files.pythonhosted.org/packages/80/a5/3a92893e7399a691bad7664d977cb5e7c81cf666c81f89ea76ba2bff483d/pandas-2.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:ac942bfd0aca577bef61f2bc8da8147c4ef6879965ef883d8e8d5d2dc3e744b8", size = 10987601, upload-time = "2025-07-07T19:19:09.589Z" },
+    { url = "https://files.pythonhosted.org/packages/32/ed/ff0a67a2c5505e1854e6715586ac6693dd860fbf52ef9f81edee200266e7/pandas-2.3.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:9026bd4a80108fac2239294a15ef9003c4ee191a0f64b90f170b40cfb7cf2d22", size = 11531393, upload-time = "2025-07-07T19:19:12.245Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/db/d8f24a7cc9fb0972adab0cc80b6817e8bef888cfd0024eeb5a21c0bb5c4a/pandas-2.3.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:6de8547d4fdb12421e2d047a2c446c623ff4c11f47fddb6b9169eb98ffba485a", size = 10668750, upload-time = "2025-07-07T19:19:14.612Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/b0/80f6ec783313f1e2356b28b4fd8d2148c378370045da918c73145e6aab50/pandas-2.3.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:782647ddc63c83133b2506912cc6b108140a38a37292102aaa19c81c83db2928", size = 11342004, upload-time = "2025-07-07T19:19:16.857Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/e2/20a317688435470872885e7fc8f95109ae9683dec7c50be29b56911515a5/pandas-2.3.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ba6aff74075311fc88504b1db890187a3cd0f887a5b10f5525f8e2ef55bfdb9", size = 12050869, upload-time = "2025-07-07T19:19:19.265Z" },
+    { url = "https://files.pythonhosted.org/packages/55/79/20d746b0a96c67203a5bee5fb4e00ac49c3e8009a39e1f78de264ecc5729/pandas-2.3.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e5635178b387bd2ba4ac040f82bc2ef6e6b500483975c4ebacd34bec945fda12", size = 12750218, upload-time = "2025-07-07T19:19:21.547Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/0f/145c8b41e48dbf03dd18fdd7f24f8ba95b8254a97a3379048378f33e7838/pandas-2.3.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6f3bf5ec947526106399a9e1d26d40ee2b259c66422efdf4de63c848492d91bb", size = 13416763, upload-time = "2025-07-07T19:19:23.939Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/c0/54415af59db5cdd86a3d3bf79863e8cc3fa9ed265f0745254061ac09d5f2/pandas-2.3.1-cp313-cp313-win_amd64.whl", hash = "sha256:1c78cf43c8fde236342a1cb2c34bcff89564a7bfed7e474ed2fffa6aed03a956", size = 10987482, upload-time = "2025-07-07T19:19:42.699Z" },
+    { url = "https://files.pythonhosted.org/packages/48/64/2fd2e400073a1230e13b8cd604c9bc95d9e3b962e5d44088ead2e8f0cfec/pandas-2.3.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:8dfc17328e8da77be3cf9f47509e5637ba8f137148ed0e9b5241e1baf526e20a", size = 12029159, upload-time = "2025-07-07T19:19:26.362Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/0a/d84fd79b0293b7ef88c760d7dca69828d867c89b6d9bc52d6a27e4d87316/pandas-2.3.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:ec6c851509364c59a5344458ab935e6451b31b818be467eb24b0fe89bd05b6b9", size = 11393287, upload-time = "2025-07-07T19:19:29.157Z" },
+    { url = "https://files.pythonhosted.org/packages/50/ae/ff885d2b6e88f3c7520bb74ba319268b42f05d7e583b5dded9837da2723f/pandas-2.3.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:911580460fc4884d9b05254b38a6bfadddfcc6aaef856fb5859e7ca202e45275", size = 11309381, upload-time = "2025-07-07T19:19:31.436Z" },
+    { url = "https://files.pythonhosted.org/packages/85/86/1fa345fc17caf5d7780d2699985c03dbe186c68fee00b526813939062bb0/pandas-2.3.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2f4d6feeba91744872a600e6edbbd5b033005b431d5ae8379abee5bcfa479fab", size = 11883998, upload-time = "2025-07-07T19:19:34.267Z" },
+    { url = "https://files.pythonhosted.org/packages/81/aa/e58541a49b5e6310d89474333e994ee57fea97c8aaa8fc7f00b873059bbf/pandas-2.3.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:fe37e757f462d31a9cd7580236a82f353f5713a80e059a29753cf938c6775d96", size = 12704705, upload-time = "2025-07-07T19:19:36.856Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/f9/07086f5b0f2a19872554abeea7658200824f5835c58a106fa8f2ae96a46c/pandas-2.3.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:5db9637dbc24b631ff3707269ae4559bce4b7fd75c1c4d7e13f40edc42df4444", size = 13189044, upload-time = "2025-07-07T19:19:39.999Z" },
 ]
 
 [[package]]
@@ -3345,53 +4122,133 @@ wheels = [
 ]
 
 [[package]]
-name = "pfzy"
-version = "0.3.4"
+name = "peft"
+version = "0.17.1"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/d9/5a/32b50c077c86bfccc7bed4881c5a2b823518f5450a30e639db5d3711952e/pfzy-0.3.4.tar.gz", hash = "sha256:717ea765dd10b63618e7298b2d98efd819e0b30cd5905c9707223dceeb94b3f1", size = 8396, upload-time = "2022-01-28T02:26:17.946Z" }
+dependencies = [
+    { name = "accelerate" },
+    { name = "huggingface-hub" },
+    { name = "numpy" },
+    { name = "packaging" },
+    { name = "psutil" },
+    { name = "pyyaml" },
+    { name = "safetensors" },
+    { name = "torch", version = "2.8.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
+    { name = "torch", version = "2.8.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin'" },
+    { name = "tqdm" },
+    { name = "transformers" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/70/b8/2e79377efaa1e5f0d70a497db7914ffd355846e760ffa2f7883ab0f600fb/peft-0.17.1.tar.gz", hash = "sha256:e6002b42517976c290b3b8bbb9829a33dd5d470676b2dec7cb4df8501b77eb9f", size = 568192, upload-time = "2025-08-21T09:25:22.703Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/8c/d7/8ff98376b1acc4503253b685ea09981697385ce344d4e3935c2af49e044d/pfzy-0.3.4-py3-none-any.whl", hash = "sha256:5f50d5b2b3207fa72e7ec0ef08372ef652685470974a107d0d4999fc5a903a96", size = 8537, upload-time = "2022-01-28T02:26:16.047Z" },
+    { url = "https://files.pythonhosted.org/packages/49/fe/a2da1627aa9cb6310b6034598363bd26ac301c4a99d21f415b1b2855891e/peft-0.17.1-py3-none-any.whl", hash = "sha256:3d129d64def3d74779c32a080d2567e5f7b674e77d546e3585138216d903f99e", size = 504896, upload-time = "2025-08-21T09:25:18.974Z" },
+]
+
+[[package]]
+name = "penguin"
+source = { editable = "3rdparty/Penguin-workspace" }
+dependencies = [
+    { name = "aiohttp" },
+    { name = "devtools" },
+    { name = "fastapi" },
+    { name = "gradio" },
+    { name = "hydra-core" },
+    { name = "mlflow" },
+    { name = "omegaconf" },
+    { name = "openai" },
+    { name = "pydantic" },
+    { name = "pydantic-core" },
+    { name = "ray", extra = ["default"] },
+    { name = "tdigest" },
+    { name = "tqdm" },
+    { name = "uvicorn" },
+    { name = "uvloop" },
+    { name = "yappi" },
+]
+
+[package.metadata]
+requires-dist = [
+    { name = "aiohttp" },
+    { name = "devtools" },
+    { name = "fastapi" },
+    { name = "gradio" },
+    { name = "hydra-core" },
+    { name = "mlflow" },
+    { name = "omegaconf" },
+    { name = "openai", specifier = "<=2.6.1" },
+    { name = "pydantic" },
+    { name = "pydantic-core" },
+    { name = "ray", extras = ["default"] },
+    { name = "tdigest", specifier = ">=0.5.2.2" },
+    { name = "tqdm" },
+    { name = "uvicorn" },
+    { name = "uvloop" },
+    { name = "yappi" },
 ]
 
 [[package]]
 name = "pillow"
-version = "11.2.1"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/af/cb/bb5c01fcd2a69335b86c22142b2bccfc3464087efb7fd382eee5ffc7fdf7/pillow-11.2.1.tar.gz", hash = "sha256:a64dd61998416367b7ef979b73d3a85853ba9bec4c2925f74e588879a58716b6", size = 47026707, upload-time = "2025-04-12T17:50:03.289Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/c7/40/052610b15a1b8961f52537cc8326ca6a881408bc2bdad0d852edeb6ed33b/pillow-11.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:78afba22027b4accef10dbd5eed84425930ba41b3ea0a86fa8d20baaf19d807f", size = 3190185, upload-time = "2025-04-12T17:48:00.417Z" },
-    { url = "https://files.pythonhosted.org/packages/e5/7e/b86dbd35a5f938632093dc40d1682874c33dcfe832558fc80ca56bfcb774/pillow-11.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:78092232a4ab376a35d68c4e6d5e00dfd73454bd12b230420025fbe178ee3b0b", size = 3030306, upload-time = "2025-04-12T17:48:02.391Z" },
-    { url = "https://files.pythonhosted.org/packages/a4/5c/467a161f9ed53e5eab51a42923c33051bf8d1a2af4626ac04f5166e58e0c/pillow-11.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25a5f306095c6780c52e6bbb6109624b95c5b18e40aab1c3041da3e9e0cd3e2d", size = 4416121, upload-time = "2025-04-12T17:48:04.554Z" },
-    { url = "https://files.pythonhosted.org/packages/62/73/972b7742e38ae0e2ac76ab137ca6005dcf877480da0d9d61d93b613065b4/pillow-11.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0c7b29dbd4281923a2bfe562acb734cee96bbb129e96e6972d315ed9f232bef4", size = 4501707, upload-time = "2025-04-12T17:48:06.831Z" },
-    { url = "https://files.pythonhosted.org/packages/e4/3a/427e4cb0b9e177efbc1a84798ed20498c4f233abde003c06d2650a6d60cb/pillow-11.2.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:3e645b020f3209a0181a418bffe7b4a93171eef6c4ef6cc20980b30bebf17b7d", size = 4522921, upload-time = "2025-04-12T17:48:09.229Z" },
-    { url = "https://files.pythonhosted.org/packages/fe/7c/d8b1330458e4d2f3f45d9508796d7caf0c0d3764c00c823d10f6f1a3b76d/pillow-11.2.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b2dbea1012ccb784a65349f57bbc93730b96e85b42e9bf7b01ef40443db720b4", size = 4612523, upload-time = "2025-04-12T17:48:11.631Z" },
-    { url = "https://files.pythonhosted.org/packages/b3/2f/65738384e0b1acf451de5a573d8153fe84103772d139e1e0bdf1596be2ea/pillow-11.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:da3104c57bbd72948d75f6a9389e6727d2ab6333c3617f0a89d72d4940aa0443", size = 4587836, upload-time = "2025-04-12T17:48:13.592Z" },
-    { url = "https://files.pythonhosted.org/packages/6a/c5/e795c9f2ddf3debb2dedd0df889f2fe4b053308bb59a3cc02a0cd144d641/pillow-11.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:598174aef4589af795f66f9caab87ba4ff860ce08cd5bb447c6fc553ffee603c", size = 4669390, upload-time = "2025-04-12T17:48:15.938Z" },
-    { url = "https://files.pythonhosted.org/packages/96/ae/ca0099a3995976a9fce2f423166f7bff9b12244afdc7520f6ed38911539a/pillow-11.2.1-cp312-cp312-win32.whl", hash = "sha256:1d535df14716e7f8776b9e7fee118576d65572b4aad3ed639be9e4fa88a1cad3", size = 2332309, upload-time = "2025-04-12T17:48:17.885Z" },
-    { url = "https://files.pythonhosted.org/packages/7c/18/24bff2ad716257fc03da964c5e8f05d9790a779a8895d6566e493ccf0189/pillow-11.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:14e33b28bf17c7a38eede290f77db7c664e4eb01f7869e37fa98a5aa95978941", size = 2676768, upload-time = "2025-04-12T17:48:19.655Z" },
-    { url = "https://files.pythonhosted.org/packages/da/bb/e8d656c9543276517ee40184aaa39dcb41e683bca121022f9323ae11b39d/pillow-11.2.1-cp312-cp312-win_arm64.whl", hash = "sha256:21e1470ac9e5739ff880c211fc3af01e3ae505859392bf65458c224d0bf283eb", size = 2415087, upload-time = "2025-04-12T17:48:21.991Z" },
-    { url = "https://files.pythonhosted.org/packages/36/9c/447528ee3776e7ab8897fe33697a7ff3f0475bb490c5ac1456a03dc57956/pillow-11.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:fdec757fea0b793056419bca3e9932eb2b0ceec90ef4813ea4c1e072c389eb28", size = 3190098, upload-time = "2025-04-12T17:48:23.915Z" },
-    { url = "https://files.pythonhosted.org/packages/b5/09/29d5cd052f7566a63e5b506fac9c60526e9ecc553825551333e1e18a4858/pillow-11.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:b0e130705d568e2f43a17bcbe74d90958e8a16263868a12c3e0d9c8162690830", size = 3030166, upload-time = "2025-04-12T17:48:25.738Z" },
-    { url = "https://files.pythonhosted.org/packages/71/5d/446ee132ad35e7600652133f9c2840b4799bbd8e4adba881284860da0a36/pillow-11.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7bdb5e09068332578214cadd9c05e3d64d99e0e87591be22a324bdbc18925be0", size = 4408674, upload-time = "2025-04-12T17:48:27.908Z" },
-    { url = "https://files.pythonhosted.org/packages/69/5f/cbe509c0ddf91cc3a03bbacf40e5c2339c4912d16458fcb797bb47bcb269/pillow-11.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d189ba1bebfbc0c0e529159631ec72bb9e9bc041f01ec6d3233d6d82eb823bc1", size = 4496005, upload-time = "2025-04-12T17:48:29.888Z" },
-    { url = "https://files.pythonhosted.org/packages/f9/b3/dd4338d8fb8a5f312021f2977fb8198a1184893f9b00b02b75d565c33b51/pillow-11.2.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:191955c55d8a712fab8934a42bfefbf99dd0b5875078240943f913bb66d46d9f", size = 4518707, upload-time = "2025-04-12T17:48:31.874Z" },
-    { url = "https://files.pythonhosted.org/packages/13/eb/2552ecebc0b887f539111c2cd241f538b8ff5891b8903dfe672e997529be/pillow-11.2.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:ad275964d52e2243430472fc5d2c2334b4fc3ff9c16cb0a19254e25efa03a155", size = 4610008, upload-time = "2025-04-12T17:48:34.422Z" },
-    { url = "https://files.pythonhosted.org/packages/72/d1/924ce51bea494cb6e7959522d69d7b1c7e74f6821d84c63c3dc430cbbf3b/pillow-11.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:750f96efe0597382660d8b53e90dd1dd44568a8edb51cb7f9d5d918b80d4de14", size = 4585420, upload-time = "2025-04-12T17:48:37.641Z" },
-    { url = "https://files.pythonhosted.org/packages/43/ab/8f81312d255d713b99ca37479a4cb4b0f48195e530cdc1611990eb8fd04b/pillow-11.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fe15238d3798788d00716637b3d4e7bb6bde18b26e5d08335a96e88564a36b6b", size = 4667655, upload-time = "2025-04-12T17:48:39.652Z" },
-    { url = "https://files.pythonhosted.org/packages/94/86/8f2e9d2dc3d308dfd137a07fe1cc478df0a23d42a6c4093b087e738e4827/pillow-11.2.1-cp313-cp313-win32.whl", hash = "sha256:3fe735ced9a607fee4f481423a9c36701a39719252a9bb251679635f99d0f7d2", size = 2332329, upload-time = "2025-04-12T17:48:41.765Z" },
-    { url = "https://files.pythonhosted.org/packages/6d/ec/1179083b8d6067a613e4d595359b5fdea65d0a3b7ad623fee906e1b3c4d2/pillow-11.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:74ee3d7ecb3f3c05459ba95eed5efa28d6092d751ce9bf20e3e253a4e497e691", size = 2676388, upload-time = "2025-04-12T17:48:43.625Z" },
-    { url = "https://files.pythonhosted.org/packages/23/f1/2fc1e1e294de897df39fa8622d829b8828ddad938b0eaea256d65b84dd72/pillow-11.2.1-cp313-cp313-win_arm64.whl", hash = "sha256:5119225c622403afb4b44bad4c1ca6c1f98eed79db8d3bc6e4e160fc6339d66c", size = 2414950, upload-time = "2025-04-12T17:48:45.475Z" },
-    { url = "https://files.pythonhosted.org/packages/c4/3e/c328c48b3f0ead7bab765a84b4977acb29f101d10e4ef57a5e3400447c03/pillow-11.2.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:8ce2e8411c7aaef53e6bb29fe98f28cd4fbd9a1d9be2eeea434331aac0536b22", size = 3192759, upload-time = "2025-04-12T17:48:47.866Z" },
-    { url = "https://files.pythonhosted.org/packages/18/0e/1c68532d833fc8b9f404d3a642991441d9058eccd5606eab31617f29b6d4/pillow-11.2.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:9ee66787e095127116d91dea2143db65c7bb1e232f617aa5957c0d9d2a3f23a7", size = 3033284, upload-time = "2025-04-12T17:48:50.189Z" },
-    { url = "https://files.pythonhosted.org/packages/b7/cb/6faf3fb1e7705fd2db74e070f3bf6f88693601b0ed8e81049a8266de4754/pillow-11.2.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9622e3b6c1d8b551b6e6f21873bdcc55762b4b2126633014cea1803368a9aa16", size = 4445826, upload-time = "2025-04-12T17:48:52.346Z" },
-    { url = "https://files.pythonhosted.org/packages/07/94/8be03d50b70ca47fb434a358919d6a8d6580f282bbb7af7e4aa40103461d/pillow-11.2.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:63b5dff3a68f371ea06025a1a6966c9a1e1ee452fc8020c2cd0ea41b83e9037b", size = 4527329, upload-time = "2025-04-12T17:48:54.403Z" },
-    { url = "https://files.pythonhosted.org/packages/fd/a4/bfe78777076dc405e3bd2080bc32da5ab3945b5a25dc5d8acaa9de64a162/pillow-11.2.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:31df6e2d3d8fc99f993fd253e97fae451a8db2e7207acf97859732273e108406", size = 4549049, upload-time = "2025-04-12T17:48:56.383Z" },
-    { url = "https://files.pythonhosted.org/packages/65/4d/eaf9068dc687c24979e977ce5677e253624bd8b616b286f543f0c1b91662/pillow-11.2.1-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:062b7a42d672c45a70fa1f8b43d1d38ff76b63421cbbe7f88146b39e8a558d91", size = 4635408, upload-time = "2025-04-12T17:48:58.782Z" },
-    { url = "https://files.pythonhosted.org/packages/1d/26/0fd443365d9c63bc79feb219f97d935cd4b93af28353cba78d8e77b61719/pillow-11.2.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:4eb92eca2711ef8be42fd3f67533765d9fd043b8c80db204f16c8ea62ee1a751", size = 4614863, upload-time = "2025-04-12T17:49:00.709Z" },
-    { url = "https://files.pythonhosted.org/packages/49/65/dca4d2506be482c2c6641cacdba5c602bc76d8ceb618fd37de855653a419/pillow-11.2.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f91ebf30830a48c825590aede79376cb40f110b387c17ee9bd59932c961044f9", size = 4692938, upload-time = "2025-04-12T17:49:02.946Z" },
-    { url = "https://files.pythonhosted.org/packages/b3/92/1ca0c3f09233bd7decf8f7105a1c4e3162fb9142128c74adad0fb361b7eb/pillow-11.2.1-cp313-cp313t-win32.whl", hash = "sha256:e0b55f27f584ed623221cfe995c912c61606be8513bfa0e07d2c674b4516d9dd", size = 2335774, upload-time = "2025-04-12T17:49:04.889Z" },
-    { url = "https://files.pythonhosted.org/packages/a5/ac/77525347cb43b83ae905ffe257bbe2cc6fd23acb9796639a1f56aa59d191/pillow-11.2.1-cp313-cp313t-win_amd64.whl", hash = "sha256:36d6b82164c39ce5482f649b437382c0fb2395eabc1e2b1702a6deb8ad647d6e", size = 2681895, upload-time = "2025-04-12T17:49:06.635Z" },
-    { url = "https://files.pythonhosted.org/packages/67/32/32dc030cfa91ca0fc52baebbba2e009bb001122a1daa8b6a79ad830b38d3/pillow-11.2.1-cp313-cp313t-win_arm64.whl", hash = "sha256:225c832a13326e34f212d2072982bb1adb210e0cc0b153e688743018c94a2681", size = 2417234, upload-time = "2025-04-12T17:49:08.399Z" },
+version = "11.3.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f3/0d/d0d6dea55cd152ce3d6767bb38a8fc10e33796ba4ba210cbab9354b6d238/pillow-11.3.0.tar.gz", hash = "sha256:3828ee7586cd0b2091b6209e5ad53e20d0649bbe87164a459d0676e035e8f523", size = 47113069, upload-time = "2025-07-01T09:16:30.666Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/40/fe/1bc9b3ee13f68487a99ac9529968035cca2f0a51ec36892060edcc51d06a/pillow-11.3.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:fdae223722da47b024b867c1ea0be64e0df702c5e0a60e27daad39bf960dd1e4", size = 5278800, upload-time = "2025-07-01T09:14:17.648Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/32/7e2ac19b5713657384cec55f89065fb306b06af008cfd87e572035b27119/pillow-11.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:921bd305b10e82b4d1f5e802b6850677f965d8394203d182f078873851dada69", size = 4686296, upload-time = "2025-07-01T09:14:19.828Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/1e/b9e12bbe6e4c2220effebc09ea0923a07a6da1e1f1bfbc8d7d29a01ce32b/pillow-11.3.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:eb76541cba2f958032d79d143b98a3a6b3ea87f0959bbe256c0b5e416599fd5d", size = 5871726, upload-time = "2025-07-03T13:10:04.448Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/33/e9200d2bd7ba00dc3ddb78df1198a6e80d7669cce6c2bdbeb2530a74ec58/pillow-11.3.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:67172f2944ebba3d4a7b54f2e95c786a3a50c21b88456329314caaa28cda70f6", size = 7644652, upload-time = "2025-07-03T13:10:10.391Z" },
+    { url = "https://files.pythonhosted.org/packages/41/f1/6f2427a26fc683e00d985bc391bdd76d8dd4e92fac33d841127eb8fb2313/pillow-11.3.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:97f07ed9f56a3b9b5f49d3661dc9607484e85c67e27f3e8be2c7d28ca032fec7", size = 5977787, upload-time = "2025-07-01T09:14:21.63Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/c9/06dd4a38974e24f932ff5f98ea3c546ce3f8c995d3f0985f8e5ba48bba19/pillow-11.3.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:676b2815362456b5b3216b4fd5bd89d362100dc6f4945154ff172e206a22c024", size = 6645236, upload-time = "2025-07-01T09:14:23.321Z" },
+    { url = "https://files.pythonhosted.org/packages/40/e7/848f69fb79843b3d91241bad658e9c14f39a32f71a301bcd1d139416d1be/pillow-11.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3e184b2f26ff146363dd07bde8b711833d7b0202e27d13540bfe2e35a323a809", size = 6086950, upload-time = "2025-07-01T09:14:25.237Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/1a/7cff92e695a2a29ac1958c2a0fe4c0b2393b60aac13b04a4fe2735cad52d/pillow-11.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6be31e3fc9a621e071bc17bb7de63b85cbe0bfae91bb0363c893cbe67247780d", size = 6723358, upload-time = "2025-07-01T09:14:27.053Z" },
+    { url = "https://files.pythonhosted.org/packages/26/7d/73699ad77895f69edff76b0f332acc3d497f22f5d75e5360f78cbcaff248/pillow-11.3.0-cp312-cp312-win32.whl", hash = "sha256:7b161756381f0918e05e7cb8a371fff367e807770f8fe92ecb20d905d0e1c149", size = 6275079, upload-time = "2025-07-01T09:14:30.104Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/ce/e7dfc873bdd9828f3b6e5c2bbb74e47a98ec23cc5c74fc4e54462f0d9204/pillow-11.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:a6444696fce635783440b7f7a9fc24b3ad10a9ea3f0ab66c5905be1c19ccf17d", size = 6986324, upload-time = "2025-07-01T09:14:31.899Z" },
+    { url = "https://files.pythonhosted.org/packages/16/8f/b13447d1bf0b1f7467ce7d86f6e6edf66c0ad7cf44cf5c87a37f9bed9936/pillow-11.3.0-cp312-cp312-win_arm64.whl", hash = "sha256:2aceea54f957dd4448264f9bf40875da0415c83eb85f55069d89c0ed436e3542", size = 2423067, upload-time = "2025-07-01T09:14:33.709Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/93/0952f2ed8db3a5a4c7a11f91965d6184ebc8cd7cbb7941a260d5f018cd2d/pillow-11.3.0-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:1c627742b539bba4309df89171356fcb3cc5a9178355b2727d1b74a6cf155fbd", size = 2128328, upload-time = "2025-07-01T09:14:35.276Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/e8/100c3d114b1a0bf4042f27e0f87d2f25e857e838034e98ca98fe7b8c0a9c/pillow-11.3.0-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:30b7c02f3899d10f13d7a48163c8969e4e653f8b43416d23d13d1bbfdc93b9f8", size = 2170652, upload-time = "2025-07-01T09:14:37.203Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/86/3f758a28a6e381758545f7cdb4942e1cb79abd271bea932998fc0db93cb6/pillow-11.3.0-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:7859a4cc7c9295f5838015d8cc0a9c215b77e43d07a25e460f35cf516df8626f", size = 2227443, upload-time = "2025-07-01T09:14:39.344Z" },
+    { url = "https://files.pythonhosted.org/packages/01/f4/91d5b3ffa718df2f53b0dc109877993e511f4fd055d7e9508682e8aba092/pillow-11.3.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ec1ee50470b0d050984394423d96325b744d55c701a439d2bd66089bff963d3c", size = 5278474, upload-time = "2025-07-01T09:14:41.843Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/0e/37d7d3eca6c879fbd9dba21268427dffda1ab00d4eb05b32923d4fbe3b12/pillow-11.3.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7db51d222548ccfd274e4572fdbf3e810a5e66b00608862f947b163e613b67dd", size = 4686038, upload-time = "2025-07-01T09:14:44.008Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/b0/3426e5c7f6565e752d81221af9d3676fdbb4f352317ceafd42899aaf5d8a/pillow-11.3.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2d6fcc902a24ac74495df63faad1884282239265c6839a0a6416d33faedfae7e", size = 5864407, upload-time = "2025-07-03T13:10:15.628Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/c1/c6c423134229f2a221ee53f838d4be9d82bab86f7e2f8e75e47b6bf6cd77/pillow-11.3.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f0f5d8f4a08090c6d6d578351a2b91acf519a54986c055af27e7a93feae6d3f1", size = 7639094, upload-time = "2025-07-03T13:10:21.857Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/c9/09e6746630fe6372c67c648ff9deae52a2bc20897d51fa293571977ceb5d/pillow-11.3.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c37d8ba9411d6003bba9e518db0db0c58a680ab9fe5179f040b0463644bc9805", size = 5973503, upload-time = "2025-07-01T09:14:45.698Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/1c/a2a29649c0b1983d3ef57ee87a66487fdeb45132df66ab30dd37f7dbe162/pillow-11.3.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:13f87d581e71d9189ab21fe0efb5a23e9f28552d5be6979e84001d3b8505abe8", size = 6642574, upload-time = "2025-07-01T09:14:47.415Z" },
+    { url = "https://files.pythonhosted.org/packages/36/de/d5cc31cc4b055b6c6fd990e3e7f0f8aaf36229a2698501bcb0cdf67c7146/pillow-11.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:023f6d2d11784a465f09fd09a34b150ea4672e85fb3d05931d89f373ab14abb2", size = 6084060, upload-time = "2025-07-01T09:14:49.636Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/ea/502d938cbaeec836ac28a9b730193716f0114c41325db428e6b280513f09/pillow-11.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:45dfc51ac5975b938e9809451c51734124e73b04d0f0ac621649821a63852e7b", size = 6721407, upload-time = "2025-07-01T09:14:51.962Z" },
+    { url = "https://files.pythonhosted.org/packages/45/9c/9c5e2a73f125f6cbc59cc7087c8f2d649a7ae453f83bd0362ff7c9e2aee2/pillow-11.3.0-cp313-cp313-win32.whl", hash = "sha256:a4d336baed65d50d37b88ca5b60c0fa9d81e3a87d4a7930d3880d1624d5b31f3", size = 6273841, upload-time = "2025-07-01T09:14:54.142Z" },
+    { url = "https://files.pythonhosted.org/packages/23/85/397c73524e0cd212067e0c969aa245b01d50183439550d24d9f55781b776/pillow-11.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:0bce5c4fd0921f99d2e858dc4d4d64193407e1b99478bc5cacecba2311abde51", size = 6978450, upload-time = "2025-07-01T09:14:56.436Z" },
+    { url = "https://files.pythonhosted.org/packages/17/d2/622f4547f69cd173955194b78e4d19ca4935a1b0f03a302d655c9f6aae65/pillow-11.3.0-cp313-cp313-win_arm64.whl", hash = "sha256:1904e1264881f682f02b7f8167935cce37bc97db457f8e7849dc3a6a52b99580", size = 2423055, upload-time = "2025-07-01T09:14:58.072Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/80/a8a2ac21dda2e82480852978416cfacd439a4b490a501a288ecf4fe2532d/pillow-11.3.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:4c834a3921375c48ee6b9624061076bc0a32a60b5532b322cc0ea64e639dd50e", size = 5281110, upload-time = "2025-07-01T09:14:59.79Z" },
+    { url = "https://files.pythonhosted.org/packages/44/d6/b79754ca790f315918732e18f82a8146d33bcd7f4494380457ea89eb883d/pillow-11.3.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:5e05688ccef30ea69b9317a9ead994b93975104a677a36a8ed8106be9260aa6d", size = 4689547, upload-time = "2025-07-01T09:15:01.648Z" },
+    { url = "https://files.pythonhosted.org/packages/49/20/716b8717d331150cb00f7fdd78169c01e8e0c219732a78b0e59b6bdb2fd6/pillow-11.3.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1019b04af07fc0163e2810167918cb5add8d74674b6267616021ab558dc98ced", size = 5901554, upload-time = "2025-07-03T13:10:27.018Z" },
+    { url = "https://files.pythonhosted.org/packages/74/cf/a9f3a2514a65bb071075063a96f0a5cf949c2f2fce683c15ccc83b1c1cab/pillow-11.3.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f944255db153ebb2b19c51fe85dd99ef0ce494123f21b9db4877ffdfc5590c7c", size = 7669132, upload-time = "2025-07-03T13:10:33.01Z" },
+    { url = "https://files.pythonhosted.org/packages/98/3c/da78805cbdbee9cb43efe8261dd7cc0b4b93f2ac79b676c03159e9db2187/pillow-11.3.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1f85acb69adf2aaee8b7da124efebbdb959a104db34d3a2cb0f3793dbae422a8", size = 6005001, upload-time = "2025-07-01T09:15:03.365Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/fa/ce044b91faecf30e635321351bba32bab5a7e034c60187fe9698191aef4f/pillow-11.3.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:05f6ecbeff5005399bb48d198f098a9b4b6bdf27b8487c7f38ca16eeb070cd59", size = 6668814, upload-time = "2025-07-01T09:15:05.655Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/51/90f9291406d09bf93686434f9183aba27b831c10c87746ff49f127ee80cb/pillow-11.3.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a7bc6e6fd0395bc052f16b1a8670859964dbd7003bd0af2ff08342eb6e442cfe", size = 6113124, upload-time = "2025-07-01T09:15:07.358Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/5a/6fec59b1dfb619234f7636d4157d11fb4e196caeee220232a8d2ec48488d/pillow-11.3.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:83e1b0161c9d148125083a35c1c5a89db5b7054834fd4387499e06552035236c", size = 6747186, upload-time = "2025-07-01T09:15:09.317Z" },
+    { url = "https://files.pythonhosted.org/packages/49/6b/00187a044f98255225f172de653941e61da37104a9ea60e4f6887717e2b5/pillow-11.3.0-cp313-cp313t-win32.whl", hash = "sha256:2a3117c06b8fb646639dce83694f2f9eac405472713fcb1ae887469c0d4f6788", size = 6277546, upload-time = "2025-07-01T09:15:11.311Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/5c/6caaba7e261c0d75bab23be79f1d06b5ad2a2ae49f028ccec801b0e853d6/pillow-11.3.0-cp313-cp313t-win_amd64.whl", hash = "sha256:857844335c95bea93fb39e0fa2726b4d9d758850b34075a7e3ff4f4fa3aa3b31", size = 6985102, upload-time = "2025-07-01T09:15:13.164Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/7e/b623008460c09a0cb38263c93b828c666493caee2eb34ff67f778b87e58c/pillow-11.3.0-cp313-cp313t-win_arm64.whl", hash = "sha256:8797edc41f3e8536ae4b10897ee2f637235c94f27404cac7297f7b607dd0716e", size = 2424803, upload-time = "2025-07-01T09:15:15.695Z" },
+    { url = "https://files.pythonhosted.org/packages/73/f4/04905af42837292ed86cb1b1dabe03dce1edc008ef14c473c5c7e1443c5d/pillow-11.3.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:d9da3df5f9ea2a89b81bb6087177fb1f4d1c7146d583a3fe5c672c0d94e55e12", size = 5278520, upload-time = "2025-07-01T09:15:17.429Z" },
+    { url = "https://files.pythonhosted.org/packages/41/b0/33d79e377a336247df6348a54e6d2a2b85d644ca202555e3faa0cf811ecc/pillow-11.3.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:0b275ff9b04df7b640c59ec5a3cb113eefd3795a8df80bac69646ef699c6981a", size = 4686116, upload-time = "2025-07-01T09:15:19.423Z" },
+    { url = "https://files.pythonhosted.org/packages/49/2d/ed8bc0ab219ae8768f529597d9509d184fe8a6c4741a6864fea334d25f3f/pillow-11.3.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0743841cabd3dba6a83f38a92672cccbd69af56e3e91777b0ee7f4dba4385632", size = 5864597, upload-time = "2025-07-03T13:10:38.404Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/3d/b932bb4225c80b58dfadaca9d42d08d0b7064d2d1791b6a237f87f661834/pillow-11.3.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2465a69cf967b8b49ee1b96d76718cd98c4e925414ead59fdf75cf0fd07df673", size = 7638246, upload-time = "2025-07-03T13:10:44.987Z" },
+    { url = "https://files.pythonhosted.org/packages/09/b5/0487044b7c096f1b48f0d7ad416472c02e0e4bf6919541b111efd3cae690/pillow-11.3.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:41742638139424703b4d01665b807c6468e23e699e8e90cffefe291c5832b027", size = 5973336, upload-time = "2025-07-01T09:15:21.237Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/2d/524f9318f6cbfcc79fbc004801ea6b607ec3f843977652fdee4857a7568b/pillow-11.3.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:93efb0b4de7e340d99057415c749175e24c8864302369e05914682ba642e5d77", size = 6642699, upload-time = "2025-07-01T09:15:23.186Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/d2/a9a4f280c6aefedce1e8f615baaa5474e0701d86dd6f1dede66726462bbd/pillow-11.3.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7966e38dcd0fa11ca390aed7c6f20454443581d758242023cf36fcb319b1a874", size = 6083789, upload-time = "2025-07-01T09:15:25.1Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/54/86b0cd9dbb683a9d5e960b66c7379e821a19be4ac5810e2e5a715c09a0c0/pillow-11.3.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:98a9afa7b9007c67ed84c57c9e0ad86a6000da96eaa638e4f8abe5b65ff83f0a", size = 6720386, upload-time = "2025-07-01T09:15:27.378Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/95/88efcaf384c3588e24259c4203b909cbe3e3c2d887af9e938c2022c9dd48/pillow-11.3.0-cp314-cp314-win32.whl", hash = "sha256:02a723e6bf909e7cea0dac1b0e0310be9d7650cd66222a5f1c571455c0a45214", size = 6370911, upload-time = "2025-07-01T09:15:29.294Z" },
+    { url = "https://files.pythonhosted.org/packages/2e/cc/934e5820850ec5eb107e7b1a72dd278140731c669f396110ebc326f2a503/pillow-11.3.0-cp314-cp314-win_amd64.whl", hash = "sha256:a418486160228f64dd9e9efcd132679b7a02a5f22c982c78b6fc7dab3fefb635", size = 7117383, upload-time = "2025-07-01T09:15:31.128Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/e9/9c0a616a71da2a5d163aa37405e8aced9a906d574b4a214bede134e731bc/pillow-11.3.0-cp314-cp314-win_arm64.whl", hash = "sha256:155658efb5e044669c08896c0c44231c5e9abcaadbc5cd3648df2f7c0b96b9a6", size = 2511385, upload-time = "2025-07-01T09:15:33.328Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/33/c88376898aff369658b225262cd4f2659b13e8178e7534df9e6e1fa289f6/pillow-11.3.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:59a03cdf019efbfeeed910bf79c7c93255c3d54bc45898ac2a4140071b02b4ae", size = 5281129, upload-time = "2025-07-01T09:15:35.194Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/70/d376247fb36f1844b42910911c83a02d5544ebd2a8bad9efcc0f707ea774/pillow-11.3.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:f8a5827f84d973d8636e9dc5764af4f0cf2318d26744b3d902931701b0d46653", size = 4689580, upload-time = "2025-07-01T09:15:37.114Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/1c/537e930496149fbac69efd2fc4329035bbe2e5475b4165439e3be9cb183b/pillow-11.3.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ee92f2fd10f4adc4b43d07ec5e779932b4eb3dbfbc34790ada5a6669bc095aa6", size = 5902860, upload-time = "2025-07-03T13:10:50.248Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/57/80f53264954dcefeebcf9dae6e3eb1daea1b488f0be8b8fef12f79a3eb10/pillow-11.3.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c96d333dcf42d01f47b37e0979b6bd73ec91eae18614864622d9b87bbd5bbf36", size = 7670694, upload-time = "2025-07-03T13:10:56.432Z" },
+    { url = "https://files.pythonhosted.org/packages/70/ff/4727d3b71a8578b4587d9c276e90efad2d6fe0335fd76742a6da08132e8c/pillow-11.3.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4c96f993ab8c98460cd0c001447bff6194403e8b1d7e149ade5f00594918128b", size = 6005888, upload-time = "2025-07-01T09:15:39.436Z" },
+    { url = "https://files.pythonhosted.org/packages/05/ae/716592277934f85d3be51d7256f3636672d7b1abfafdc42cf3f8cbd4b4c8/pillow-11.3.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:41342b64afeba938edb034d122b2dda5db2139b9a4af999729ba8818e0056477", size = 6670330, upload-time = "2025-07-01T09:15:41.269Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/bb/7fe6cddcc8827b01b1a9766f5fdeb7418680744f9082035bdbabecf1d57f/pillow-11.3.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:068d9c39a2d1b358eb9f245ce7ab1b5c3246c7c8c7d9ba58cfa5b43146c06e50", size = 6114089, upload-time = "2025-07-01T09:15:43.13Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/f5/06bfaa444c8e80f1a8e4bff98da9c83b37b5be3b1deaa43d27a0db37ef84/pillow-11.3.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a1bc6ba083b145187f648b667e05a2534ecc4b9f2784c2cbe3089e44868f2b9b", size = 6748206, upload-time = "2025-07-01T09:15:44.937Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/77/bc6f92a3e8e6e46c0ca78abfffec0037845800ea38c73483760362804c41/pillow-11.3.0-cp314-cp314t-win32.whl", hash = "sha256:118ca10c0d60b06d006be10a501fd6bbdfef559251ed31b794668ed569c87e12", size = 6377370, upload-time = "2025-07-01T09:15:46.673Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/82/3a721f7d69dca802befb8af08b7c79ebcab461007ce1c18bd91a5d5896f9/pillow-11.3.0-cp314-cp314t-win_amd64.whl", hash = "sha256:8924748b688aa210d79883357d102cd64690e56b923a186f35a82cbc10f997db", size = 7121500, upload-time = "2025-07-01T09:15:48.512Z" },
+    { url = "https://files.pythonhosted.org/packages/89/c7/5572fa4a3f45740eaab6ae86fcdf7195b55beac1371ac8c619d880cfe948/pillow-11.3.0-cp314-cp314t-win_arm64.whl", hash = "sha256:79ea0d14d3ebad43ec77ad5272e6ff9bba5b679ef73375ea760261207fa8e0aa", size = 2512835, upload-time = "2025-07-01T09:15:50.399Z" },
 ]
 
 [[package]]
@@ -3405,15 +4262,15 @@ wheels = [
 
 [[package]]
 name = "plotly"
-version = "6.1.2"
+version = "6.3.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "narwhals" },
     { name = "packaging" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/ae/77/431447616eda6a432dc3ce541b3f808ecb8803ea3d4ab2573b67f8eb4208/plotly-6.1.2.tar.gz", hash = "sha256:4fdaa228926ba3e3a213f4d1713287e69dcad1a7e66cf2025bd7d7026d5014b4", size = 7662971, upload-time = "2025-05-27T20:21:52.56Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/a0/64/850de5076f4436410e1ce4f6a69f4313ef6215dfea155f3f6559335cad29/plotly-6.3.0.tar.gz", hash = "sha256:8840a184d18ccae0f9189c2b9a2943923fd5cae7717b723f36eef78f444e5a73", size = 6923926, upload-time = "2025-08-12T20:22:14.127Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/bf/6f/759d5da0517547a5d38aabf05d04d9f8adf83391d2c7fc33f904417d3ba2/plotly-6.1.2-py3-none-any.whl", hash = "sha256:f1548a8ed9158d59e03d7fed548c7db5549f3130d9ae19293c8638c202648f6d", size = 16265530, upload-time = "2025-05-27T20:21:46.6Z" },
+    { url = "https://files.pythonhosted.org/packages/95/a9/12e2dc726ba1ba775a2c6922d5d5b4488ad60bdab0888c337c194c8e6de8/plotly-6.3.0-py3-none-any.whl", hash = "sha256:7ad806edce9d3cdd882eaebaf97c0c9e252043ed1ed3d382c3e3520ec07806d4", size = 9791257, upload-time = "2025-08-12T20:22:09.205Z" },
 ]
 
 [[package]]
@@ -3427,7 +4284,7 @@ wheels = [
 
 [[package]]
 name = "pre-commit"
-version = "3.6.0"
+version = "4.3.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "cfgv" },
@@ -3436,9 +4293,21 @@ dependencies = [
     { name = "pyyaml" },
     { name = "virtualenv" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/88/e8/4330d06f2b00ad3a9c66e07a68fe23f70233a4e7e1aaba5a738a93d2cb5d/pre_commit-3.6.0.tar.gz", hash = "sha256:d30bad9abf165f7785c15a21a1f46da7d0677cb00ee7ff4c579fd38922efe15d", size = 177069, upload-time = "2023-12-09T21:25:31.535Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/ff/29/7cf5bbc236333876e4b41f56e06857a87937ce4bf91e117a6991a2dbb02a/pre_commit-4.3.0.tar.gz", hash = "sha256:499fe450cc9d42e9d58e606262795ecb64dd05438943c62b66f6a8673da30b16", size = 193792, upload-time = "2025-08-09T18:56:14.651Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5b/a5/987a405322d78a73b66e39e4a90e4ef156fd7141bf71df987e50717c321b/pre_commit-4.3.0-py2.py3-none-any.whl", hash = "sha256:2b0747ad7e6e967169136edffee14c16e148a778a54e4f967921aa1ebf2308d8", size = 220965, upload-time = "2025-08-09T18:56:13.192Z" },
+]
+
+[[package]]
+name = "prettytable"
+version = "3.16.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "wcwidth" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/99/b1/85e18ac92afd08c533603e3393977b6bc1443043115a47bb094f3b98f94f/prettytable-3.16.0.tar.gz", hash = "sha256:3c64b31719d961bf69c9a7e03d0c1e477320906a98da63952bc6698d6164ff57", size = 66276, upload-time = "2025-03-24T19:39:04.008Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/e2/e3/54cd906d377e1766299df14710ded125e195d5c685c8f1bafecec073e9c6/pre_commit-3.6.0-py2.py3-none-any.whl", hash = "sha256:c255039ef399049a5544b6ce13d135caba8f2c28c3b4033277a788f434308376", size = 204021, upload-time = "2023-12-09T21:25:28.932Z" },
+    { url = "https://files.pythonhosted.org/packages/02/c7/5613524e606ea1688b3bdbf48aa64bafb6d0a4ac3750274c43b6158a390f/prettytable-3.16.0-py3-none-any.whl", hash = "sha256:b5eccfabb82222f5aa46b798ff02a8452cf530a352c31bddfa29be41242863aa", size = 33863, upload-time = "2025-03-24T19:39:02.359Z" },
 ]
 
 [[package]]
@@ -3463,18 +4332,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/27/72/0824c18f3bc75810f55dacc2dd933f6ec829771180245ae3cc976195dec0/prometheus_fastapi_instrumentator-7.1.0-py3-none-any.whl", hash = "sha256:978130f3c0bb7b8ebcc90d35516a6fe13e02d2eb358c8f83887cdef7020c31e9", size = 19296, upload-time = "2025-03-19T19:35:04.323Z" },
 ]
 
-[[package]]
-name = "prompt-toolkit"
-version = "3.0.51"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "wcwidth" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/bb/6e/9d084c929dfe9e3bfe0c6a47e31f78a25c54627d64a66e884a8bf5474f1c/prompt_toolkit-3.0.51.tar.gz", hash = "sha256:931a162e3b27fc90c86f1b48bb1fb2c528c2761475e57c9c06de13311c7b54ed", size = 428940, upload-time = "2025-04-15T09:18:47.731Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/ce/4f/5249960887b1fbe561d9ff265496d170b55a735b76724f10ef19f9e40716/prompt_toolkit-3.0.51-py3-none-any.whl", hash = "sha256:52742911fde84e2d423e2f9a4cf1de7d7ac4e51958f648d9540e0fb8db077b07", size = 387810, upload-time = "2025-04-15T09:18:44.753Z" },
-]
-
 [[package]]
 name = "propcache"
 version = "0.3.2"
@@ -3546,16 +4403,16 @@ wheels = [
 
 [[package]]
 name = "protobuf"
-version = "5.29.5"
+version = "6.32.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/43/29/d09e70352e4e88c9c7a198d5645d7277811448d76c23b00345670f7c8a38/protobuf-5.29.5.tar.gz", hash = "sha256:bc1463bafd4b0929216c35f437a8e28731a2b7fe3d98bb77a600efced5a15c84", size = 425226, upload-time = "2025-05-28T23:51:59.82Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/c0/df/fb4a8eeea482eca989b51cffd274aac2ee24e825f0bf3cbce5281fa1567b/protobuf-6.32.0.tar.gz", hash = "sha256:a81439049127067fc49ec1d36e25c6ee1d1a2b7be930675f919258d03c04e7d2", size = 440614, upload-time = "2025-08-14T21:21:25.015Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/5f/11/6e40e9fc5bba02988a214c07cf324595789ca7820160bfd1f8be96e48539/protobuf-5.29.5-cp310-abi3-win32.whl", hash = "sha256:3f1c6468a2cfd102ff4703976138844f78ebd1fb45f49011afc5139e9e283079", size = 422963, upload-time = "2025-05-28T23:51:41.204Z" },
-    { url = "https://files.pythonhosted.org/packages/81/7f/73cefb093e1a2a7c3ffd839e6f9fcafb7a427d300c7f8aef9c64405d8ac6/protobuf-5.29.5-cp310-abi3-win_amd64.whl", hash = "sha256:3f76e3a3675b4a4d867b52e4a5f5b78a2ef9565549d4037e06cf7b0942b1d3fc", size = 434818, upload-time = "2025-05-28T23:51:44.297Z" },
-    { url = "https://files.pythonhosted.org/packages/dd/73/10e1661c21f139f2c6ad9b23040ff36fee624310dc28fba20d33fdae124c/protobuf-5.29.5-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:e38c5add5a311f2a6eb0340716ef9b039c1dfa428b28f25a7838ac329204a671", size = 418091, upload-time = "2025-05-28T23:51:45.907Z" },
-    { url = "https://files.pythonhosted.org/packages/6c/04/98f6f8cf5b07ab1294c13f34b4e69b3722bb609c5b701d6c169828f9f8aa/protobuf-5.29.5-cp38-abi3-manylinux2014_aarch64.whl", hash = "sha256:fa18533a299d7ab6c55a238bf8629311439995f2e7eca5caaff08663606e9015", size = 319824, upload-time = "2025-05-28T23:51:47.545Z" },
-    { url = "https://files.pythonhosted.org/packages/85/e4/07c80521879c2d15f321465ac24c70efe2381378c00bf5e56a0f4fbac8cd/protobuf-5.29.5-cp38-abi3-manylinux2014_x86_64.whl", hash = "sha256:63848923da3325e1bf7e9003d680ce6e14b07e55d0473253a690c3a8b8fd6e61", size = 319942, upload-time = "2025-05-28T23:51:49.11Z" },
-    { url = "https://files.pythonhosted.org/packages/7e/cc/7e77861000a0691aeea8f4566e5d3aa716f2b1dece4a24439437e41d3d25/protobuf-5.29.5-py3-none-any.whl", hash = "sha256:6cf42630262c59b2d8de33954443d94b746c952b01434fc58a417fdbd2e84bd5", size = 172823, upload-time = "2025-05-28T23:51:58.157Z" },
+    { url = "https://files.pythonhosted.org/packages/33/18/df8c87da2e47f4f1dcc5153a81cd6bca4e429803f4069a299e236e4dd510/protobuf-6.32.0-cp310-abi3-win32.whl", hash = "sha256:84f9e3c1ff6fb0308dbacb0950d8aa90694b0d0ee68e75719cb044b7078fe741", size = 424409, upload-time = "2025-08-14T21:21:12.366Z" },
+    { url = "https://files.pythonhosted.org/packages/e1/59/0a820b7310f8139bd8d5a9388e6a38e1786d179d6f33998448609296c229/protobuf-6.32.0-cp310-abi3-win_amd64.whl", hash = "sha256:a8bdbb2f009cfc22a36d031f22a625a38b615b5e19e558a7b756b3279723e68e", size = 435735, upload-time = "2025-08-14T21:21:15.046Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/5b/0d421533c59c789e9c9894683efac582c06246bf24bb26b753b149bd88e4/protobuf-6.32.0-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:d52691e5bee6c860fff9a1c86ad26a13afbeb4b168cd4445c922b7e2cf85aaf0", size = 426449, upload-time = "2025-08-14T21:21:16.687Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/7b/607764ebe6c7a23dcee06e054fd1de3d5841b7648a90fd6def9a3bb58c5e/protobuf-6.32.0-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:501fe6372fd1c8ea2a30b4d9be8f87955a64d6be9c88a973996cef5ef6f0abf1", size = 322869, upload-time = "2025-08-14T21:21:18.282Z" },
+    { url = "https://files.pythonhosted.org/packages/40/01/2e730bd1c25392fc32e3268e02446f0d77cb51a2c3a8486b1798e34d5805/protobuf-6.32.0-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:75a2aab2bd1aeb1f5dc7c5f33bcb11d82ea8c055c9becbb41c26a8c43fd7092c", size = 322009, upload-time = "2025-08-14T21:21:19.893Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/f2/80ffc4677aac1bc3519b26bc7f7f5de7fce0ee2f7e36e59e27d8beb32dd1/protobuf-6.32.0-py3-none-any.whl", hash = "sha256:ba377e5b67b908c8f3072a57b63e2c6a4cbd18aea4ed98d2584350dbf46f2783", size = 169287, upload-time = "2025-08-14T21:21:23.515Z" },
 ]
 
 [[package]]
@@ -3575,11 +4432,11 @@ wheels = [
 
 [[package]]
 name = "pulp"
-version = "3.2.1"
+version = "3.2.2"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/2f/cd/cb1308632ad5b092ebbfe64d0cd0b9906caec6e52bff88f54ddd3d434694/pulp-3.2.1.tar.gz", hash = "sha256:fc6c02c47c06342c586b175924add753cad7638ff6149b3b43e87ac6709ac469", size = 16297436, upload-time = "2025-05-29T09:25:51.647Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/b2/4f/11cfa283228b5f259bcfc913f731f7c6f68748d26711594e14cf2cb5e39a/pulp-3.2.2.tar.gz", hash = "sha256:389a6ff1dc34ec4b093f34f7a9fa3553743ff0ea99b2a423e9f0dd16940f63d2", size = 16299367, upload-time = "2025-07-29T11:42:04.109Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/84/45/2bb878df73b5545405faff0b0b30f72929222356387a41b50ca268951d5d/pulp-3.2.1-py3-none-any.whl", hash = "sha256:c6cf7fe84cef15795bc7c27e2f3c6784db5cf6ebf68e94d5a659b02415f982c5", size = 16383592, upload-time = "2025-05-29T09:25:49.262Z" },
+    { url = "https://files.pythonhosted.org/packages/15/8d/a6a9d58c929a869f7f1b99b3d37b3f14ef63e2826eef581416338d686c3f/pulp-3.2.2-py3-none-any.whl", hash = "sha256:d3ca5ff11a28b3e7b2508a992d7e51f3533471d89305f0560b5fe3b6cc821043", size = 16385354, upload-time = "2025-07-29T11:42:01.829Z" },
 ]
 
 [[package]]
@@ -3593,52 +4450,46 @@ wheels = [
 
 [[package]]
 name = "py-spy"
-version = "0.4.0"
+version = "0.4.1"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/7c/cd/9dacc04604dc4398ce5bed77ed59918ad0940f15165954d4aaa651cc640c/py_spy-0.4.0.tar.gz", hash = "sha256:806602ce7972782cc9c1e383f339bfc27bfb822d42485e6a3e0530ae5040e1f0", size = 253236, upload-time = "2024-11-01T19:08:51.487Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/19/e2/ff811a367028b87e86714945bb9ecb5c1cc69114a8039a67b3a862cef921/py_spy-0.4.1.tar.gz", hash = "sha256:e53aa53daa2e47c2eef97dd2455b47bb3a7e7f962796a86cc3e7dbde8e6f4db4", size = 244726, upload-time = "2025-07-31T19:33:25.172Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/6a/7e/02ca3ee68507db47afce769504060d71b4dc1455f0f9faa8d32fc7762221/py_spy-0.4.0-py2.py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:f2cf3f7130e7d780471faa5957441d3b4e0ec39a79b2c00f4c33d494f7728428", size = 3617847, upload-time = "2024-11-01T19:08:37.44Z" },
-    { url = "https://files.pythonhosted.org/packages/65/7c/d9e26cc4c8e91f96a3a65de04d2e2e4131fbcaf6830d10917d4fab9d6788/py_spy-0.4.0-py2.py3-none-macosx_11_0_arm64.whl", hash = "sha256:47cdda4c34d9b6cb01f3aaeceb2e88faf57da880207fe72ff6ff97e9bb6cc8a9", size = 1761955, upload-time = "2024-11-01T19:08:39.632Z" },
-    { url = "https://files.pythonhosted.org/packages/d2/e4/8fbfd219b7f282b80e6b2e74c9197850d2c51db8555705567bb65507b060/py_spy-0.4.0-py2.py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eee3d0bde85ca5cf4f01f012d461180ca76c24835a96f7b5c4ded64eb6a008ab", size = 2059471, upload-time = "2024-11-01T19:08:41.818Z" },
-    { url = "https://files.pythonhosted.org/packages/a7/1d/79a94a5ace810c13b730ce96765ca465c171b4952034f1be7402d8accbc1/py_spy-0.4.0-py2.py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c5f06ffce4c9c98b7fc9f5e67e5e7db591173f1351837633f3f23d9378b1d18a", size = 2067486, upload-time = "2024-11-01T19:08:43.673Z" },
-    { url = "https://files.pythonhosted.org/packages/6d/90/fbbb038f826a83ed15ebc4ae606815d6cad6c5c6399c86c7ab96f6c60817/py_spy-0.4.0-py2.py3-none-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:87573e64dbfdfc89ba2e0f5e2f525aa84e0299c7eb6454b47ea335fde583a7a0", size = 2141433, upload-time = "2024-11-01T19:08:45.988Z" },
-    { url = "https://files.pythonhosted.org/packages/c9/c1/5e012669ebb687e546dc99fcfc4861ebfcf3a337b7a41af945df23140bb5/py_spy-0.4.0-py2.py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:8bf2f3702cef367a489faa45177b41a6c31b2a3e5bd78c978d44e29340152f5a", size = 2732951, upload-time = "2024-11-01T19:08:48.109Z" },
-    { url = "https://files.pythonhosted.org/packages/74/8b/dd8490660019a6b0be28d9ffd2bf1db967604b19f3f2719c0e283a16ac7f/py_spy-0.4.0-py2.py3-none-win_amd64.whl", hash = "sha256:77d8f637ade38367d944874776f45b703b7ac5938b1f7be8891f3a5876ddbb96", size = 1810770, upload-time = "2024-11-01T19:08:50.229Z" },
+    { url = "https://files.pythonhosted.org/packages/14/e3/3a32500d845bdd94f6a2b4ed6244982f42ec2bc64602ea8fcfe900678ae7/py_spy-0.4.1-py2.py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:809094208c6256c8f4ccadd31e9a513fe2429253f48e20066879239ba12cd8cc", size = 3682508, upload-time = "2025-07-31T19:33:13.753Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/bf/e4d280e9e0bec71d39fc646654097027d4bbe8e04af18fb68e49afcff404/py_spy-0.4.1-py2.py3-none-macosx_11_0_arm64.whl", hash = "sha256:1fb8bf71ab8df95a95cc387deed6552934c50feef2cf6456bc06692a5508fd0c", size = 1796395, upload-time = "2025-07-31T19:33:15.325Z" },
+    { url = "https://files.pythonhosted.org/packages/df/79/9ed50bb0a9de63ed023aa2db8b6265b04a7760d98c61eb54def6a5fddb68/py_spy-0.4.1-py2.py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ee776b9d512a011d1ad3907ed53ae32ce2f3d9ff3e1782236554e22103b5c084", size = 2034938, upload-time = "2025-07-31T19:33:17.194Z" },
+    { url = "https://files.pythonhosted.org/packages/53/a5/36862e3eea59f729dfb70ee6f9e14b051d8ddce1aa7e70e0b81d9fe18536/py_spy-0.4.1-py2.py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:532d3525538254d1859b49de1fbe9744df6b8865657c9f0e444bf36ce3f19226", size = 2658968, upload-time = "2025-07-31T19:33:18.916Z" },
+    { url = "https://files.pythonhosted.org/packages/08/f8/9ea0b586b065a623f591e5e7961282ec944b5fbbdca33186c7c0296645b3/py_spy-0.4.1-py2.py3-none-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4972c21890b6814017e39ac233c22572c4a61fd874524ebc5ccab0f2237aee0a", size = 2147541, upload-time = "2025-07-31T19:33:20.565Z" },
+    { url = "https://files.pythonhosted.org/packages/68/fb/bc7f639aed026bca6e7beb1e33f6951e16b7d315594e7635a4f7d21d63f4/py_spy-0.4.1-py2.py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:6a80ec05eb8a6883863a367c6a4d4f2d57de68466f7956b6367d4edd5c61bb29", size = 2763338, upload-time = "2025-07-31T19:33:22.202Z" },
+    { url = "https://files.pythonhosted.org/packages/e1/da/fcc9a9fcd4ca946ff402cff20348e838b051d69f50f5d1f5dca4cd3c5eb8/py_spy-0.4.1-py2.py3-none-win_amd64.whl", hash = "sha256:d92e522bd40e9bf7d87c204033ce5bb5c828fca45fa28d970f58d71128069fdc", size = 1818784, upload-time = "2025-07-31T19:33:23.802Z" },
 ]
 
 [[package]]
 name = "pyarrow"
-version = "20.0.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/a2/ee/a7810cb9f3d6e9238e61d312076a9859bf3668fd21c69744de9532383912/pyarrow-20.0.0.tar.gz", hash = "sha256:febc4a913592573c8d5805091a6c2b5064c8bd6e002131f01061797d91c783c1", size = 1125187, upload-time = "2025-04-27T12:34:23.264Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/a1/d6/0c10e0d54f6c13eb464ee9b67a68b8c71bcf2f67760ef5b6fbcddd2ab05f/pyarrow-20.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:75a51a5b0eef32727a247707d4755322cb970be7e935172b6a3a9f9ae98404ba", size = 30815067, upload-time = "2025-04-27T12:29:44.384Z" },
-    { url = "https://files.pythonhosted.org/packages/7e/e2/04e9874abe4094a06fd8b0cbb0f1312d8dd7d707f144c2ec1e5e8f452ffa/pyarrow-20.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:211d5e84cecc640c7a3ab900f930aaff5cd2702177e0d562d426fb7c4f737781", size = 32297128, upload-time = "2025-04-27T12:29:52.038Z" },
-    { url = "https://files.pythonhosted.org/packages/31/fd/c565e5dcc906a3b471a83273039cb75cb79aad4a2d4a12f76cc5ae90a4b8/pyarrow-20.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ba3cf4182828be7a896cbd232aa8dd6a31bd1f9e32776cc3796c012855e1199", size = 41334890, upload-time = "2025-04-27T12:29:59.452Z" },
-    { url = "https://files.pythonhosted.org/packages/af/a9/3bdd799e2c9b20c1ea6dc6fa8e83f29480a97711cf806e823f808c2316ac/pyarrow-20.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c3a01f313ffe27ac4126f4c2e5ea0f36a5fc6ab51f8726cf41fee4b256680bd", size = 42421775, upload-time = "2025-04-27T12:30:06.875Z" },
-    { url = "https://files.pythonhosted.org/packages/10/f7/da98ccd86354c332f593218101ae56568d5dcedb460e342000bd89c49cc1/pyarrow-20.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:a2791f69ad72addd33510fec7bb14ee06c2a448e06b649e264c094c5b5f7ce28", size = 40687231, upload-time = "2025-04-27T12:30:13.954Z" },
-    { url = "https://files.pythonhosted.org/packages/bb/1b/2168d6050e52ff1e6cefc61d600723870bf569cbf41d13db939c8cf97a16/pyarrow-20.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:4250e28a22302ce8692d3a0e8ec9d9dde54ec00d237cff4dfa9c1fbf79e472a8", size = 42295639, upload-time = "2025-04-27T12:30:21.949Z" },
-    { url = "https://files.pythonhosted.org/packages/b2/66/2d976c0c7158fd25591c8ca55aee026e6d5745a021915a1835578707feb3/pyarrow-20.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:89e030dc58fc760e4010148e6ff164d2f44441490280ef1e97a542375e41058e", size = 42908549, upload-time = "2025-04-27T12:30:29.551Z" },
-    { url = "https://files.pythonhosted.org/packages/31/a9/dfb999c2fc6911201dcbf348247f9cc382a8990f9ab45c12eabfd7243a38/pyarrow-20.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6102b4864d77102dbbb72965618e204e550135a940c2534711d5ffa787df2a5a", size = 44557216, upload-time = "2025-04-27T12:30:36.977Z" },
-    { url = "https://files.pythonhosted.org/packages/a0/8e/9adee63dfa3911be2382fb4d92e4b2e7d82610f9d9f668493bebaa2af50f/pyarrow-20.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:96d6a0a37d9c98be08f5ed6a10831d88d52cac7b13f5287f1e0f625a0de8062b", size = 25660496, upload-time = "2025-04-27T12:30:42.809Z" },
-    { url = "https://files.pythonhosted.org/packages/9b/aa/daa413b81446d20d4dad2944110dcf4cf4f4179ef7f685dd5a6d7570dc8e/pyarrow-20.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:a15532e77b94c61efadde86d10957950392999503b3616b2ffcef7621a002893", size = 30798501, upload-time = "2025-04-27T12:30:48.351Z" },
-    { url = "https://files.pythonhosted.org/packages/ff/75/2303d1caa410925de902d32ac215dc80a7ce7dd8dfe95358c165f2adf107/pyarrow-20.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:dd43f58037443af715f34f1322c782ec463a3c8a94a85fdb2d987ceb5658e061", size = 32277895, upload-time = "2025-04-27T12:30:55.238Z" },
-    { url = "https://files.pythonhosted.org/packages/92/41/fe18c7c0b38b20811b73d1bdd54b1fccba0dab0e51d2048878042d84afa8/pyarrow-20.0.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa0d288143a8585806e3cc7c39566407aab646fb9ece164609dac1cfff45f6ae", size = 41327322, upload-time = "2025-04-27T12:31:05.587Z" },
-    { url = "https://files.pythonhosted.org/packages/da/ab/7dbf3d11db67c72dbf36ae63dcbc9f30b866c153b3a22ef728523943eee6/pyarrow-20.0.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b6953f0114f8d6f3d905d98e987d0924dabce59c3cda380bdfaa25a6201563b4", size = 42411441, upload-time = "2025-04-27T12:31:15.675Z" },
-    { url = "https://files.pythonhosted.org/packages/90/c3/0c7da7b6dac863af75b64e2f827e4742161128c350bfe7955b426484e226/pyarrow-20.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:991f85b48a8a5e839b2128590ce07611fae48a904cae6cab1f089c5955b57eb5", size = 40677027, upload-time = "2025-04-27T12:31:24.631Z" },
-    { url = "https://files.pythonhosted.org/packages/be/27/43a47fa0ff9053ab5203bb3faeec435d43c0d8bfa40179bfd076cdbd4e1c/pyarrow-20.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:97c8dc984ed09cb07d618d57d8d4b67a5100a30c3818c2fb0b04599f0da2de7b", size = 42281473, upload-time = "2025-04-27T12:31:31.311Z" },
-    { url = "https://files.pythonhosted.org/packages/bc/0b/d56c63b078876da81bbb9ba695a596eabee9b085555ed12bf6eb3b7cab0e/pyarrow-20.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9b71daf534f4745818f96c214dbc1e6124d7daf059167330b610fc69b6f3d3e3", size = 42893897, upload-time = "2025-04-27T12:31:39.406Z" },
-    { url = "https://files.pythonhosted.org/packages/92/ac/7d4bd020ba9145f354012838692d48300c1b8fe5634bfda886abcada67ed/pyarrow-20.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e8b88758f9303fa5a83d6c90e176714b2fd3852e776fc2d7e42a22dd6c2fb368", size = 44543847, upload-time = "2025-04-27T12:31:45.997Z" },
-    { url = "https://files.pythonhosted.org/packages/9d/07/290f4abf9ca702c5df7b47739c1b2c83588641ddfa2cc75e34a301d42e55/pyarrow-20.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:30b3051b7975801c1e1d387e17c588d8ab05ced9b1e14eec57915f79869b5031", size = 25653219, upload-time = "2025-04-27T12:31:54.11Z" },
-    { url = "https://files.pythonhosted.org/packages/95/df/720bb17704b10bd69dde086e1400b8eefb8f58df3f8ac9cff6c425bf57f1/pyarrow-20.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:ca151afa4f9b7bc45bcc791eb9a89e90a9eb2772767d0b1e5389609c7d03db63", size = 30853957, upload-time = "2025-04-27T12:31:59.215Z" },
-    { url = "https://files.pythonhosted.org/packages/d9/72/0d5f875efc31baef742ba55a00a25213a19ea64d7176e0fe001c5d8b6e9a/pyarrow-20.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:4680f01ecd86e0dd63e39eb5cd59ef9ff24a9d166db328679e36c108dc993d4c", size = 32247972, upload-time = "2025-04-27T12:32:05.369Z" },
-    { url = "https://files.pythonhosted.org/packages/d5/bc/e48b4fa544d2eea72f7844180eb77f83f2030b84c8dad860f199f94307ed/pyarrow-20.0.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7f4c8534e2ff059765647aa69b75d6543f9fef59e2cd4c6d18015192565d2b70", size = 41256434, upload-time = "2025-04-27T12:32:11.814Z" },
-    { url = "https://files.pythonhosted.org/packages/c3/01/974043a29874aa2cf4f87fb07fd108828fc7362300265a2a64a94965e35b/pyarrow-20.0.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3e1f8a47f4b4ae4c69c4d702cfbdfe4d41e18e5c7ef6f1bb1c50918c1e81c57b", size = 42353648, upload-time = "2025-04-27T12:32:20.766Z" },
-    { url = "https://files.pythonhosted.org/packages/68/95/cc0d3634cde9ca69b0e51cbe830d8915ea32dda2157560dda27ff3b3337b/pyarrow-20.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:a1f60dc14658efaa927f8214734f6a01a806d7690be4b3232ba526836d216122", size = 40619853, upload-time = "2025-04-27T12:32:28.1Z" },
-    { url = "https://files.pythonhosted.org/packages/29/c2/3ad40e07e96a3e74e7ed7cc8285aadfa84eb848a798c98ec0ad009eb6bcc/pyarrow-20.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:204a846dca751428991346976b914d6d2a82ae5b8316a6ed99789ebf976551e6", size = 42241743, upload-time = "2025-04-27T12:32:35.792Z" },
-    { url = "https://files.pythonhosted.org/packages/eb/cb/65fa110b483339add6a9bc7b6373614166b14e20375d4daa73483755f830/pyarrow-20.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:f3b117b922af5e4c6b9a9115825726cac7d8b1421c37c2b5e24fbacc8930612c", size = 42839441, upload-time = "2025-04-27T12:32:46.64Z" },
-    { url = "https://files.pythonhosted.org/packages/98/7b/f30b1954589243207d7a0fbc9997401044bf9a033eec78f6cb50da3f304a/pyarrow-20.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:e724a3fd23ae5b9c010e7be857f4405ed5e679db5c93e66204db1a69f733936a", size = 44503279, upload-time = "2025-04-27T12:32:56.503Z" },
-    { url = "https://files.pythonhosted.org/packages/37/40/ad395740cd641869a13bcf60851296c89624662575621968dcfafabaa7f6/pyarrow-20.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:82f1ee5133bd8f49d31be1299dc07f585136679666b502540db854968576faf9", size = 25944982, upload-time = "2025-04-27T12:33:04.72Z" },
+version = "21.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ef/c2/ea068b8f00905c06329a3dfcd40d0fcc2b7d0f2e355bdb25b65e0a0e4cd4/pyarrow-21.0.0.tar.gz", hash = "sha256:5051f2dccf0e283ff56335760cbc8622cf52264d67e359d5569541ac11b6d5bc", size = 1133487, upload-time = "2025-07-18T00:57:31.761Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ca/d4/d4f817b21aacc30195cf6a46ba041dd1be827efa4a623cc8bf39a1c2a0c0/pyarrow-21.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:3a302f0e0963db37e0a24a70c56cf91a4faa0bca51c23812279ca2e23481fccd", size = 31160305, upload-time = "2025-07-18T00:55:35.373Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/9c/dcd38ce6e4b4d9a19e1d36914cb8e2b1da4e6003dd075474c4cfcdfe0601/pyarrow-21.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:b6b27cf01e243871390474a211a7922bfbe3bda21e39bc9160daf0da3fe48876", size = 32684264, upload-time = "2025-07-18T00:55:39.303Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/74/2a2d9f8d7a59b639523454bec12dba35ae3d0a07d8ab529dc0809f74b23c/pyarrow-21.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:e72a8ec6b868e258a2cd2672d91f2860ad532d590ce94cdf7d5e7ec674ccf03d", size = 41108099, upload-time = "2025-07-18T00:55:42.889Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/90/2660332eeb31303c13b653ea566a9918484b6e4d6b9d2d46879a33ab0622/pyarrow-21.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b7ae0bbdc8c6674259b25bef5d2a1d6af5d39d7200c819cf99e07f7dfef1c51e", size = 42829529, upload-time = "2025-07-18T00:55:47.069Z" },
+    { url = "https://files.pythonhosted.org/packages/33/27/1a93a25c92717f6aa0fca06eb4700860577d016cd3ae51aad0e0488ac899/pyarrow-21.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:58c30a1729f82d201627c173d91bd431db88ea74dcaa3885855bc6203e433b82", size = 43367883, upload-time = "2025-07-18T00:55:53.069Z" },
+    { url = "https://files.pythonhosted.org/packages/05/d9/4d09d919f35d599bc05c6950095e358c3e15148ead26292dfca1fb659b0c/pyarrow-21.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:072116f65604b822a7f22945a7a6e581cfa28e3454fdcc6939d4ff6090126623", size = 45133802, upload-time = "2025-07-18T00:55:57.714Z" },
+    { url = "https://files.pythonhosted.org/packages/71/30/f3795b6e192c3ab881325ffe172e526499eb3780e306a15103a2764916a2/pyarrow-21.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:cf56ec8b0a5c8c9d7021d6fd754e688104f9ebebf1bf4449613c9531f5346a18", size = 26203175, upload-time = "2025-07-18T00:56:01.364Z" },
+    { url = "https://files.pythonhosted.org/packages/16/ca/c7eaa8e62db8fb37ce942b1ea0c6d7abfe3786ca193957afa25e71b81b66/pyarrow-21.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:e99310a4ebd4479bcd1964dff9e14af33746300cb014aa4a3781738ac63baf4a", size = 31154306, upload-time = "2025-07-18T00:56:04.42Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/e8/e87d9e3b2489302b3a1aea709aaca4b781c5252fcb812a17ab6275a9a484/pyarrow-21.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:d2fe8e7f3ce329a71b7ddd7498b3cfac0eeb200c2789bd840234f0dc271a8efe", size = 32680622, upload-time = "2025-07-18T00:56:07.505Z" },
+    { url = "https://files.pythonhosted.org/packages/84/52/79095d73a742aa0aba370c7942b1b655f598069489ab387fe47261a849e1/pyarrow-21.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:f522e5709379d72fb3da7785aa489ff0bb87448a9dc5a75f45763a795a089ebd", size = 41104094, upload-time = "2025-07-18T00:56:10.994Z" },
+    { url = "https://files.pythonhosted.org/packages/89/4b/7782438b551dbb0468892a276b8c789b8bbdb25ea5c5eb27faadd753e037/pyarrow-21.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:69cbbdf0631396e9925e048cfa5bce4e8c3d3b41562bbd70c685a8eb53a91e61", size = 42825576, upload-time = "2025-07-18T00:56:15.569Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/62/0f29de6e0a1e33518dec92c65be0351d32d7ca351e51ec5f4f837a9aab91/pyarrow-21.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:731c7022587006b755d0bdb27626a1a3bb004bb56b11fb30d98b6c1b4718579d", size = 43368342, upload-time = "2025-07-18T00:56:19.531Z" },
+    { url = "https://files.pythonhosted.org/packages/90/c7/0fa1f3f29cf75f339768cc698c8ad4ddd2481c1742e9741459911c9ac477/pyarrow-21.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:dc56bc708f2d8ac71bd1dcb927e458c93cec10b98eb4120206a4091db7b67b99", size = 45131218, upload-time = "2025-07-18T00:56:23.347Z" },
+    { url = "https://files.pythonhosted.org/packages/01/63/581f2076465e67b23bc5a37d4a2abff8362d389d29d8105832e82c9c811c/pyarrow-21.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:186aa00bca62139f75b7de8420f745f2af12941595bbbfa7ed3870ff63e25636", size = 26087551, upload-time = "2025-07-18T00:56:26.758Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/ab/357d0d9648bb8241ee7348e564f2479d206ebe6e1c47ac5027c2e31ecd39/pyarrow-21.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:a7a102574faa3f421141a64c10216e078df467ab9576684d5cd696952546e2da", size = 31290064, upload-time = "2025-07-18T00:56:30.214Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/8a/5685d62a990e4cac2043fc76b4661bf38d06efed55cf45a334b455bd2759/pyarrow-21.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:1e005378c4a2c6db3ada3ad4c217b381f6c886f0a80d6a316fe586b90f77efd7", size = 32727837, upload-time = "2025-07-18T00:56:33.935Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/de/c0828ee09525c2bafefd3e736a248ebe764d07d0fd762d4f0929dbc516c9/pyarrow-21.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:65f8e85f79031449ec8706b74504a316805217b35b6099155dd7e227eef0d4b6", size = 41014158, upload-time = "2025-07-18T00:56:37.528Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/26/a2865c420c50b7a3748320b614f3484bfcde8347b2639b2b903b21ce6a72/pyarrow-21.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:3a81486adc665c7eb1a2bde0224cfca6ceaba344a82a971ef059678417880eb8", size = 42667885, upload-time = "2025-07-18T00:56:41.483Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/f9/4ee798dc902533159250fb4321267730bc0a107d8c6889e07c3add4fe3a5/pyarrow-21.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:fc0d2f88b81dcf3ccf9a6ae17f89183762c8a94a5bdcfa09e05cfe413acf0503", size = 43276625, upload-time = "2025-07-18T00:56:48.002Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/da/e02544d6997037a4b0d22d8e5f66bc9315c3671371a8b18c79ade1cefe14/pyarrow-21.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6299449adf89df38537837487a4f8d3bd91ec94354fdd2a7d30bc11c48ef6e79", size = 44951890, upload-time = "2025-07-18T00:56:52.568Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/4e/519c1bc1876625fe6b71e9a28287c43ec2f20f73c658b9ae1d485c0c206e/pyarrow-21.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:222c39e2c70113543982c6b34f3077962b44fca38c0bd9e68bb6781534425c10", size = 26371006, upload-time = "2025-07-18T00:56:56.379Z" },
 ]
 
 [[package]]
@@ -3662,13 +4513,124 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/47/8d/d529b5d697919ba8c11ad626e835d4039be708a35b0d22de83a269a6682c/pyasn1_modules-0.4.2-py3-none-any.whl", hash = "sha256:29253a9207ce32b64c3ac6600edc75368f98473906e8fd1043bd6b5b1de2c14a", size = 181259, upload-time = "2025-03-28T02:41:19.028Z" },
 ]
 
+[[package]]
+name = "pybase64"
+version = "1.4.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/04/14/43297a7b7f0c1bf0c00b596f754ee3ac946128c64d21047ccf9c9bbc5165/pybase64-1.4.2.tar.gz", hash = "sha256:46cdefd283ed9643315d952fe44de80dc9b9a811ce6e3ec97fd1827af97692d0", size = 137246, upload-time = "2025-07-27T13:08:57.808Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/28/6d/11ede991e800797b9f5ebd528013b34eee5652df93de61ffb24503393fa5/pybase64-1.4.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:db2c75d1388855b5a1015b65096d7dbcc708e7de3245dcbedeb872ec05a09326", size = 38326, upload-time = "2025-07-27T13:03:09.065Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/84/87f1f565f42e2397e2aaa2477c86419f5173c3699881c42325c090982f0a/pybase64-1.4.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6b621a972a01841368fdb9dedc55fd3c6e0c7217d0505ba3b1ebe95e7ef1b493", size = 31661, upload-time = "2025-07-27T13:03:10.295Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/2a/a24c810e7a61d2cc6f73fe9ee4872a03030887fa8654150901b15f376f65/pybase64-1.4.2-cp312-cp312-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:f48c32ac6a16cbf57a5a96a073fef6ff7e3526f623cd49faa112b7f9980bafba", size = 68192, upload-time = "2025-07-27T13:03:11.467Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/87/d9baf98cbfc37b8657290ad4421f3a3c36aa0eafe4872c5859cfb52f3448/pybase64-1.4.2-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:ace8b23093a6bb862477080d9059b784096ab2f97541e8bfc40d42f062875149", size = 71587, upload-time = "2025-07-27T13:03:12.719Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/89/3df043cc56ef3b91b7aa0c26ae822a2d7ec8da0b0fd7c309c879b0eb5988/pybase64-1.4.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1772c7532a7fb6301baea3dd3e010148dbf70cd1136a83c2f5f91bdc94822145", size = 59910, upload-time = "2025-07-27T13:03:14.266Z" },
+    { url = "https://files.pythonhosted.org/packages/75/4f/6641e9edf37aeb4d4524dc7ba2168eff8d96c90e77f6283c2be3400ab380/pybase64-1.4.2-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.whl", hash = "sha256:f86f7faddcba5cbfea475f8ab96567834c28bf09ca6c7c3d66ee445adac80d8f", size = 56701, upload-time = "2025-07-27T13:03:15.6Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/7f/20d8ac1046f12420a0954a45a13033e75f98aade36eecd00c64e3549b071/pybase64-1.4.2-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:0b8c8e275b5294089f314814b4a50174ab90af79d6a4850f6ae11261ff6a7372", size = 59288, upload-time = "2025-07-27T13:03:16.823Z" },
+    { url = "https://files.pythonhosted.org/packages/17/ea/9c0ca570e3e50b3c6c3442e280c83b321a0464c86a9db1f982a4ff531550/pybase64-1.4.2-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:864d85a0470c615807ae8b97d724d068b940a2d10ac13a5f1b9e75a3ce441758", size = 60267, upload-time = "2025-07-27T13:03:18.132Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/ac/46894929d71ccedebbfb0284173b0fea96bc029cd262654ba8451a7035d6/pybase64-1.4.2-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:47254d97ed2d8351e30ecfdb9e2414547f66ba73f8a09f932c9378ff75cd10c5", size = 54801, upload-time = "2025-07-27T13:03:19.669Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/1e/02c95218ea964f0b2469717c2c69b48e63f4ca9f18af01a5b2a29e4c1216/pybase64-1.4.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:264b65ecc4f0ee73f3298ab83bbd8008f7f9578361b8df5b448f985d8c63e02a", size = 58599, upload-time = "2025-07-27T13:03:20.951Z" },
+    { url = "https://files.pythonhosted.org/packages/15/45/ccc21004930789b8fb439d43e3212a6c260ccddb2bf450c39a20db093f33/pybase64-1.4.2-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:fbcc2b30cd740c16c9699f596f22c7a9e643591311ae72b1e776f2d539e9dd9d", size = 52388, upload-time = "2025-07-27T13:03:23.064Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/45/22e46e549710c4c237d77785b6fb1bc4c44c288a5c44237ba9daf5c34b82/pybase64-1.4.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:cda9f79c22d51ee4508f5a43b673565f1d26af4330c99f114e37e3186fdd3607", size = 68802, upload-time = "2025-07-27T13:03:24.673Z" },
+    { url = "https://files.pythonhosted.org/packages/55/0c/232c6261b81296e5593549b36e6e7884a5da008776d12665923446322c36/pybase64-1.4.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:0c91c6d2a7232e2a1cd10b3b75a8bb657defacd4295a1e5e80455df2dfc84d4f", size = 57841, upload-time = "2025-07-27T13:03:25.948Z" },
+    { url = "https://files.pythonhosted.org/packages/20/8a/b35a615ae6f04550d696bb179c414538b3b477999435fdd4ad75b76139e4/pybase64-1.4.2-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:a370dea7b1cee2a36a4d5445d4e09cc243816c5bc8def61f602db5a6f5438e52", size = 54320, upload-time = "2025-07-27T13:03:27.495Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/a9/8bd4f9bcc53689f1b457ecefed1eaa080e4949d65a62c31a38b7253d5226/pybase64-1.4.2-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:9aa4de83f02e462a6f4e066811c71d6af31b52d7484de635582d0e3ec3d6cc3e", size = 56482, upload-time = "2025-07-27T13:03:28.942Z" },
+    { url = "https://files.pythonhosted.org/packages/75/e5/4a7735b54a1191f61c3f5c2952212c85c2d6b06eb5fb3671c7603395f70c/pybase64-1.4.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:83a1c2f9ed00fee8f064d548c8654a480741131f280e5750bb32475b7ec8ee38", size = 70959, upload-time = "2025-07-27T13:03:30.171Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/67/e2b6cb32c782e12304d467418e70da0212567f42bd4d3b5eb1fdf64920ad/pybase64-1.4.2-cp312-cp312-win32.whl", hash = "sha256:a6e5688b18d558e8c6b8701cc8560836c4bbeba61d33c836b4dba56b19423716", size = 33683, upload-time = "2025-07-27T13:03:31.775Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/bc/d5c277496063a09707486180f17abbdbdebbf2f5c4441b20b11d3cb7dc7c/pybase64-1.4.2-cp312-cp312-win_amd64.whl", hash = "sha256:c995d21b8bd08aa179cd7dd4db0695c185486ecc72da1e8f6c37ec86cadb8182", size = 35817, upload-time = "2025-07-27T13:03:32.99Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/69/e4be18ae685acff0ae77f75d4586590f29d2cd187bf603290cf1d635cad4/pybase64-1.4.2-cp312-cp312-win_arm64.whl", hash = "sha256:e254b9258c40509c2ea063a7784f6994988f3f26099d6e08704e3c15dfed9a55", size = 30900, upload-time = "2025-07-27T13:03:34.499Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/56/5337f27a8b8d2d6693f46f7b36bae47895e5820bfa259b0072574a4e1057/pybase64-1.4.2-cp313-cp313-android_21_arm64_v8a.whl", hash = "sha256:0f331aa59549de21f690b6ccc79360ffed1155c3cfbc852eb5c097c0b8565a2b", size = 33888, upload-time = "2025-07-27T13:03:35.698Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/09/f3f4b11fc9beda7e8625e29fb0f549958fcbb34fea3914e1c1d95116e344/pybase64-1.4.2-cp313-cp313-android_21_x86_64.whl", hash = "sha256:9dad20bf1f3ed9e6fe566c4c9d07d9a6c04f5a280daebd2082ffb8620b0a880d", size = 40796, upload-time = "2025-07-27T13:03:36.927Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/ff/470768f0fe6de0aa302a8cb1bdf2f9f5cffc3f69e60466153be68bc953aa/pybase64-1.4.2-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:69d3f0445b0faeef7bb7f93bf8c18d850785e2a77f12835f49e524cc54af04e7", size = 30914, upload-time = "2025-07-27T13:03:38.475Z" },
+    { url = "https://files.pythonhosted.org/packages/75/6b/d328736662665e0892409dc410353ebef175b1be5eb6bab1dad579efa6df/pybase64-1.4.2-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:2372b257b1f4dd512f317fb27e77d313afd137334de64c87de8374027aacd88a", size = 31380, upload-time = "2025-07-27T13:03:39.7Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/96/7ff718f87c67f4147c181b73d0928897cefa17dc75d7abc6e37730d5908f/pybase64-1.4.2-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:fb794502b4b1ec91c4ca5d283ae71aef65e3de7721057bd9e2b3ec79f7a62d7d", size = 38230, upload-time = "2025-07-27T13:03:41.637Z" },
+    { url = "https://files.pythonhosted.org/packages/4d/58/a3307b048d799ff596a3c7c574fcba66f9b6b8c899a3c00a698124ca7ad5/pybase64-1.4.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:d5c532b03fd14a5040d6cf6571299a05616f925369c72ddf6fe2fb643eb36fed", size = 38319, upload-time = "2025-07-27T13:03:42.847Z" },
+    { url = "https://files.pythonhosted.org/packages/08/a7/0bda06341b0a2c830d348c6e1c4d348caaae86c53dc9a046e943467a05e9/pybase64-1.4.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0f699514dc1d5689ca9cf378139e0214051922732f9adec9404bc680a8bef7c0", size = 31655, upload-time = "2025-07-27T13:03:44.426Z" },
+    { url = "https://files.pythonhosted.org/packages/87/df/e1d6e8479e0c5113c2c63c7b44886935ce839c2d99884c7304ca9e86547c/pybase64-1.4.2-cp313-cp313-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:cd3e8713cbd32c8c6aa935feaf15c7670e2b7e8bfe51c24dc556811ebd293a29", size = 68232, upload-time = "2025-07-27T13:03:45.729Z" },
+    { url = "https://files.pythonhosted.org/packages/71/ab/db4dbdfccb9ca874d6ce34a0784761471885d96730de85cee3d300381529/pybase64-1.4.2-cp313-cp313-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:d377d48acf53abf4b926c2a7a24a19deb092f366a04ffd856bf4b3aa330b025d", size = 71608, upload-time = "2025-07-27T13:03:47.01Z" },
+    { url = "https://files.pythonhosted.org/packages/11/e9/508df958563951045d728bbfbd3be77465f9231cf805cb7ccaf6951fc9f1/pybase64-1.4.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d83c076e78d619b9e1dd674e2bf5fb9001aeb3e0b494b80a6c8f6d4120e38cd9", size = 59912, upload-time = "2025-07-27T13:03:48.277Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/58/7f2cef1ceccc682088958448d56727369de83fa6b29148478f4d2acd107a/pybase64-1.4.2-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.whl", hash = "sha256:ab9cdb6a8176a5cb967f53e6ad60e40c83caaa1ae31c5e1b29e5c8f507f17538", size = 56413, upload-time = "2025-07-27T13:03:49.908Z" },
+    { url = "https://files.pythonhosted.org/packages/08/7c/7e0af5c5728fa7e2eb082d88eca7c6bd17429be819d58518e74919d42e66/pybase64-1.4.2-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:adf0c103ad559dbfb9fe69edfd26a15c65d9c991a5ab0a25b04770f9eb0b9484", size = 59311, upload-time = "2025-07-27T13:03:51.238Z" },
+    { url = "https://files.pythonhosted.org/packages/03/8b/09825d0f37e45b9a3f546e5f990b6cf2dd838e54ea74122c2464646e0c77/pybase64-1.4.2-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:0d03ef2f253d97ce0685d3624bf5e552d716b86cacb8a6c971333ba4b827e1fc", size = 60282, upload-time = "2025-07-27T13:03:52.56Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/3f/3711d2413f969bfd5b9cc19bc6b24abae361b7673ff37bcb90c43e199316/pybase64-1.4.2-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:e565abf906efee76ae4be1aef5df4aed0fda1639bc0d7732a3dafef76cb6fc35", size = 54845, upload-time = "2025-07-27T13:03:54.167Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/3c/4c7ce1ae4d828c2bb56d144322f81bffbaaac8597d35407c3d7cbb0ff98f/pybase64-1.4.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e3c6a5f15fd03f232fc6f295cce3684f7bb08da6c6d5b12cc771f81c9f125cc6", size = 58615, upload-time = "2025-07-27T13:03:55.494Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/8f/c2fc03bf4ed038358620065c75968a30184d5d3512d09d3ef9cc3bd48592/pybase64-1.4.2-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:bad9e3db16f448728138737bbd1af9dc2398efd593a8bdd73748cc02cd33f9c6", size = 52434, upload-time = "2025-07-27T13:03:56.808Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/0a/757d6df0a60327c893cfae903e15419914dd792092dc8cc5c9523d40bc9b/pybase64-1.4.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2683ef271328365c31afee0ed8fa29356fb8fb7c10606794656aa9ffb95e92be", size = 68824, upload-time = "2025-07-27T13:03:58.735Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/14/84abe2ed8c29014239be1cfab45dfebe5a5ca779b177b8b6f779bd8b69da/pybase64-1.4.2-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:265b20089cd470079114c09bb74b101b3bfc3c94ad6b4231706cf9eff877d570", size = 57898, upload-time = "2025-07-27T13:04:00.379Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/c6/d193031f90c864f7b59fa6d1d1b5af41f0f5db35439988a8b9f2d1b32a13/pybase64-1.4.2-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:e53173badead10ef8b839aa5506eecf0067c7b75ad16d9bf39bc7144631f8e67", size = 54319, upload-time = "2025-07-27T13:04:01.742Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/37/ec0c7a610ff8f994ee6e0c5d5d66b6b6310388b96ebb347b03ae39870fdf/pybase64-1.4.2-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:5823b8dcf74da7da0f761ed60c961e8928a6524e520411ad05fe7f9f47d55b40", size = 56472, upload-time = "2025-07-27T13:04:03.089Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/5a/e585b74f85cedd261d271e4c2ef333c5cfce7e80750771808f56fee66b98/pybase64-1.4.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1237f66c54357d325390da60aa5e21c6918fbcd1bf527acb9c1f4188c62cb7d5", size = 70966, upload-time = "2025-07-27T13:04:04.361Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/20/1b2fdd98b4ba36008419668c813025758214c543e362c66c49214ecd1127/pybase64-1.4.2-cp313-cp313-win32.whl", hash = "sha256:b0b851eb4f801d16040047f6889cca5e9dfa102b3e33f68934d12511245cef86", size = 33681, upload-time = "2025-07-27T13:04:06.126Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/64/3df4067d169c047054889f34b5a946cbe3785bca43404b93c962a5461a41/pybase64-1.4.2-cp313-cp313-win_amd64.whl", hash = "sha256:19541c6e26d17d9522c02680fe242206ae05df659c82a657aabadf209cd4c6c7", size = 35822, upload-time = "2025-07-27T13:04:07.752Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/fd/db505188adf812e60ee923f196f9deddd8a1895b2b29b37f5db94afc3b1c/pybase64-1.4.2-cp313-cp313-win_arm64.whl", hash = "sha256:77a191863d576c0a5dd81f8a568a5ca15597cc980ae809dce62c717c8d42d8aa", size = 30899, upload-time = "2025-07-27T13:04:09.062Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/27/5f5fecd206ec1e06e1608a380af18dcb76a6ab08ade6597a3251502dcdb2/pybase64-1.4.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:2e194bbabe3fdf9e47ba9f3e157394efe0849eb226df76432126239b3f44992c", size = 38677, upload-time = "2025-07-27T13:04:10.334Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/0f/abe4b5a28529ef5f74e8348fa6a9ef27d7d75fbd98103d7664cf485b7d8f/pybase64-1.4.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:39aef1dadf4a004f11dd09e703abaf6528a87c8dbd39c448bb8aebdc0a08c1be", size = 32066, upload-time = "2025-07-27T13:04:11.641Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/7e/ea0ce6a7155cada5526017ec588b6d6185adea4bf9331565272f4ef583c2/pybase64-1.4.2-cp313-cp313t-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:91cb920c7143e36ec8217031282c8651da3b2206d70343f068fac0e7f073b7f9", size = 72300, upload-time = "2025-07-27T13:04:12.969Z" },
+    { url = "https://files.pythonhosted.org/packages/45/2d/e64c7a056c9ec48dfe130d1295e47a8c2b19c3984488fc08e5eaa1e86c88/pybase64-1.4.2-cp313-cp313t-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:6958631143fb9e71f9842000da042ec2f6686506b6706e2dfda29e97925f6aa0", size = 75520, upload-time = "2025-07-27T13:04:14.374Z" },
+    { url = "https://files.pythonhosted.org/packages/43/e0/e5f93b2e1cb0751a22713c4baa6c6eaf5f307385e369180486c8316ed21e/pybase64-1.4.2-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:dc35f14141ef3f1ac70d963950a278a2593af66fe5a1c7a208e185ca6278fa25", size = 65384, upload-time = "2025-07-27T13:04:16.204Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/23/8c645a1113ad88a1c6a3d0e825e93ef8b74ad3175148767853a0a4d7626e/pybase64-1.4.2-cp313-cp313t-manylinux2014_armv7l.manylinux_2_17_armv7l.whl", hash = "sha256:5d949d2d677859c3a8507e1b21432a039d2b995e0bd3fe307052b6ded80f207a", size = 60471, upload-time = "2025-07-27T13:04:17.947Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/81/edd0f7d8b0526b91730a0dd4ce6b4c8be2136cd69d424afe36235d2d2a06/pybase64-1.4.2-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:09caacdd3e15fe7253a67781edd10a6a918befab0052a2a3c215fe5d1f150269", size = 63945, upload-time = "2025-07-27T13:04:19.383Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/a5/edc224cd821fd65100b7af7c7e16b8f699916f8c0226c9c97bbae5a75e71/pybase64-1.4.2-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:e44b0e793b23f28ea0f15a9754bd0c960102a2ac4bccb8fafdedbd4cc4d235c0", size = 64858, upload-time = "2025-07-27T13:04:20.807Z" },
+    { url = "https://files.pythonhosted.org/packages/11/3b/92853f968f1af7e42b7e54d21bdd319097b367e7dffa2ca20787361df74c/pybase64-1.4.2-cp313-cp313t-manylinux_2_31_riscv64.whl", hash = "sha256:849f274d0bcb90fc6f642c39274082724d108e41b15f3a17864282bd41fc71d5", size = 58557, upload-time = "2025-07-27T13:04:22.229Z" },
+    { url = "https://files.pythonhosted.org/packages/76/09/0ec6bd2b2303b0ea5c6da7535edc9a608092075ef8c0cdd96e3e726cd687/pybase64-1.4.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:528dba7ef1357bd7ce1aea143084501f47f5dd0fff7937d3906a68565aa59cfe", size = 63624, upload-time = "2025-07-27T13:04:23.952Z" },
+    { url = "https://files.pythonhosted.org/packages/73/6e/52cb1ced2a517a3118b2e739e9417432049013ac7afa15d790103059e8e4/pybase64-1.4.2-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:1da54be743d9a68671700cfe56c3ab8c26e8f2f5cc34eface905c55bc3a9af94", size = 56174, upload-time = "2025-07-27T13:04:25.419Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/9d/820fe79347467e48af985fe46180e1dd28e698ade7317bebd66de8a143f5/pybase64-1.4.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:9b07c0406c3eaa7014499b0aacafb21a6d1146cfaa85d56f0aa02e6d542ee8f3", size = 72640, upload-time = "2025-07-27T13:04:26.824Z" },
+    { url = "https://files.pythonhosted.org/packages/53/58/e863e10d08361e694935c815b73faad7e1ab03f99ae154d86c4e2f331896/pybase64-1.4.2-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:312f2aa4cf5d199a97fbcaee75d2e59ebbaafcd091993eb373b43683498cdacb", size = 62453, upload-time = "2025-07-27T13:04:28.562Z" },
+    { url = "https://files.pythonhosted.org/packages/95/f0/c392c4ac8ccb7a34b28377c21faa2395313e3c676d76c382642e19a20703/pybase64-1.4.2-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:ad59362fc267bf15498a318c9e076686e4beeb0dfe09b457fabbc2b32468b97a", size = 58103, upload-time = "2025-07-27T13:04:29.996Z" },
+    { url = "https://files.pythonhosted.org/packages/32/30/00ab21316e7df8f526aa3e3dc06f74de6711d51c65b020575d0105a025b2/pybase64-1.4.2-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:01593bd064e7dcd6c86d04e94e44acfe364049500c20ac68ca1e708fbb2ca970", size = 60779, upload-time = "2025-07-27T13:04:31.549Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/65/114ca81839b1805ce4a2b7d58bc16e95634734a2059991f6382fc71caf3e/pybase64-1.4.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:5b81547ad8ea271c79fdf10da89a1e9313cb15edcba2a17adf8871735e9c02a0", size = 74684, upload-time = "2025-07-27T13:04:32.976Z" },
+    { url = "https://files.pythonhosted.org/packages/54/8f/aa9d445b9bb693b8f6bb1456bd6d8576d79b7a63bf6c69af3a539235b15f/pybase64-1.4.2-cp313-cp313t-win32.whl", hash = "sha256:7edbe70b5654545a37e6e6b02de738303b1bbdfcde67f6cfec374cfb5cc4099e", size = 33961, upload-time = "2025-07-27T13:04:34.806Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/e5/da37cfb173c646fd4fc7c6aae2bc41d40de2ee49529854af8f4e6f498b45/pybase64-1.4.2-cp313-cp313t-win_amd64.whl", hash = "sha256:385690addf87c25d6366fab5d8ff512eed8a7ecb18da9e8152af1c789162f208", size = 36199, upload-time = "2025-07-27T13:04:36.223Z" },
+    { url = "https://files.pythonhosted.org/packages/66/3e/1eb68fb7d00f2cec8bd9838e2a30d183d6724ae06e745fd6e65216f170ff/pybase64-1.4.2-cp313-cp313t-win_arm64.whl", hash = "sha256:c2070d0aa88580f57fe15ca88b09f162e604d19282915a95a3795b5d3c1c05b5", size = 31221, upload-time = "2025-07-27T13:04:37.704Z" },
+    { url = "https://files.pythonhosted.org/packages/99/bf/00a87d951473ce96c8c08af22b6983e681bfabdb78dd2dcf7ee58eac0932/pybase64-1.4.2-cp314-cp314-ios_13_0_arm64_iphoneos.whl", hash = "sha256:4157ad277a32cf4f02a975dffc62a3c67d73dfa4609b2c1978ef47e722b18b8e", size = 30924, upload-time = "2025-07-27T13:04:39.189Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/43/dee58c9d60e60e6fb32dc6da722d84592e22f13c277297eb4ce6baf99a99/pybase64-1.4.2-cp314-cp314-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:e113267dc349cf624eb4f4fbf53fd77835e1aa048ac6877399af426aab435757", size = 31390, upload-time = "2025-07-27T13:04:40.995Z" },
+    { url = "https://files.pythonhosted.org/packages/e1/11/b28906fc2e330b8b1ab4bc845a7bef808b8506734e90ed79c6062b095112/pybase64-1.4.2-cp314-cp314-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:cea5aaf218fd9c5c23afacfe86fd4464dfedc1a0316dd3b5b4075b068cc67df0", size = 38212, upload-time = "2025-07-27T13:04:42.729Z" },
+    { url = "https://files.pythonhosted.org/packages/24/9e/868d1e104413d14b19feaf934fc7fad4ef5b18946385f8bb79684af40f24/pybase64-1.4.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:41213497abbd770435c7a9c8123fb02b93709ac4cf60155cd5aefc5f3042b600", size = 38303, upload-time = "2025-07-27T13:04:44.095Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/73/f7eac96ca505df0600280d6bfc671a9e2e2f947c2b04b12a70e36412f7eb/pybase64-1.4.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c8b522df7ee00f2ac1993ccd5e1f6608ae7482de3907668c2ff96a83ef213925", size = 31669, upload-time = "2025-07-27T13:04:45.845Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/43/8e18bea4fd455100112d6a73a83702843f067ef9b9272485b6bdfd9ed2f0/pybase64-1.4.2-cp314-cp314-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:06725022e540c5b098b978a0418ca979773e2cbdbb76f10bd97536f2ad1c5b49", size = 68452, upload-time = "2025-07-27T13:04:47.788Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/2e/851eb51284b97354ee5dfa1309624ab90920696e91a33cd85b13d20cc5c1/pybase64-1.4.2-cp314-cp314-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:a3e54dcf0d0305ec88473c9d0009f698cabf86f88a8a10090efeff2879c421bb", size = 71674, upload-time = "2025-07-27T13:04:49.294Z" },
+    { url = "https://files.pythonhosted.org/packages/57/0d/5cf1e5dc64aec8db43e8dee4e4046856d639a72bcb0fb3e716be42ced5f1/pybase64-1.4.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:67675cee727a60dc91173d2790206f01aa3c7b3fbccfa84fd5c1e3d883fe6caa", size = 60027, upload-time = "2025-07-27T13:04:50.769Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/8e/3479266bc0e65f6cc48b3938d4a83bff045330649869d950a378f2ddece0/pybase64-1.4.2-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.whl", hash = "sha256:753da25d4fd20be7bda2746f545935773beea12d5cb5ec56ec2d2960796477b1", size = 56461, upload-time = "2025-07-27T13:04:52.37Z" },
+    { url = "https://files.pythonhosted.org/packages/20/b6/f2b6cf59106dd78bae8717302be5b814cec33293504ad409a2eb752ad60c/pybase64-1.4.2-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:a78c768ce4ca550885246d14babdb8923e0f4a848dfaaeb63c38fc99e7ea4052", size = 59446, upload-time = "2025-07-27T13:04:53.967Z" },
+    { url = "https://files.pythonhosted.org/packages/16/70/3417797dfccdfdd0a54e4ad17c15b0624f0fc2d6a362210f229f5c4e8fd0/pybase64-1.4.2-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:51b17f36d890c92f0618fb1c8db2ccc25e6ed07afa505bab616396fc9b0b0492", size = 60350, upload-time = "2025-07-27T13:04:55.881Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/c6/6e4269dd98d150ae95d321b311a345eae0f7fd459d97901b4a586d7513bb/pybase64-1.4.2-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:f92218d667049ab4f65d54fa043a88ffdb2f07fff1f868789ef705a5221de7ec", size = 54989, upload-time = "2025-07-27T13:04:57.436Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/e8/18c1b0c255f964fafd0412b0d5a163aad588aeccb8f84b9bf9c8611d80f6/pybase64-1.4.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:3547b3d1499919a06491b3f879a19fbe206af2bd1a424ecbb4e601eb2bd11fea", size = 58724, upload-time = "2025-07-27T13:04:59.406Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/ad/ddfbd2125fc20b94865fb232b2e9105376fa16eee492e4b7786d42a86cbf/pybase64-1.4.2-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:958af7b0e09ddeb13e8c2330767c47b556b1ade19c35370f6451d139cde9f2a9", size = 52285, upload-time = "2025-07-27T13:05:01.198Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/4c/b9d4ec9224add33c84b925a03d1a53cd4106efb449ea8e0ae7795fed7bf7/pybase64-1.4.2-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:4facc57f6671e2229a385a97a618273e7be36a9ea0a9d1c1b9347f14d19ceba8", size = 69036, upload-time = "2025-07-27T13:05:03.109Z" },
+    { url = "https://files.pythonhosted.org/packages/92/38/7b96794da77bed3d9b4fea40f14ae563648fba83a696e7602fabe60c0eb7/pybase64-1.4.2-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:a32fc57d05d73a7c9b0ca95e9e265e21cf734195dc6873829a890058c35f5cfd", size = 57938, upload-time = "2025-07-27T13:05:04.744Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/c5/ae8bbce3c322d1b074e79f51f5df95961fe90cb8748df66c6bc97616e974/pybase64-1.4.2-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:3dc853243c81ce89cc7318e6946f860df28ddb7cd2a0648b981652d9ad09ee5a", size = 54474, upload-time = "2025-07-27T13:05:06.662Z" },
+    { url = "https://files.pythonhosted.org/packages/15/9a/c09887c4bb1b43c03fc352e2671ef20c6686c6942a99106a45270ee5b840/pybase64-1.4.2-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:0e6d863a86b3e7bc6ac9bd659bebda4501b9da842521111b0b0e54eb51295df5", size = 56533, upload-time = "2025-07-27T13:05:08.368Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/0f/d5114d63d35d085639606a880cb06e2322841cd4b213adfc14d545c1186f/pybase64-1.4.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:6579475140ff2067903725d8aca47f5747bcb211597a1edd60b58f6d90ada2bd", size = 71030, upload-time = "2025-07-27T13:05:10.3Z" },
+    { url = "https://files.pythonhosted.org/packages/40/0e/fe6f1ed22ea52eb99f490a8441815ba21de288f4351aeef4968d71d20d2d/pybase64-1.4.2-cp314-cp314-win32.whl", hash = "sha256:373897f728d7b4f241a1f803ac732c27b6945d26d86b2741ad9b75c802e4e378", size = 34174, upload-time = "2025-07-27T13:05:12.254Z" },
+    { url = "https://files.pythonhosted.org/packages/71/46/0e15bea52ffc63e8ae7935e945accbaf635e0aefa26d3e31fdf9bc9dcd01/pybase64-1.4.2-cp314-cp314-win_amd64.whl", hash = "sha256:1afe3361344617d298c1d08bc657ef56d0f702d6b72cb65d968b2771017935aa", size = 36308, upload-time = "2025-07-27T13:05:13.898Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/dc/55849fee2577bda77c1e078da04cc9237e8e474a8c8308deb702a26f2511/pybase64-1.4.2-cp314-cp314-win_arm64.whl", hash = "sha256:f131c9360babe522f3d90f34da3f827cba80318125cf18d66f2ee27e3730e8c4", size = 31341, upload-time = "2025-07-27T13:05:15.553Z" },
+    { url = "https://files.pythonhosted.org/packages/39/44/c69d088e28b25e70ac742b6789cde038473815b2a69345c4bae82d5e244d/pybase64-1.4.2-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:2583ac304131c1bd6e3120b0179333610f18816000db77c0a2dd6da1364722a8", size = 38678, upload-time = "2025-07-27T13:05:17.544Z" },
+    { url = "https://files.pythonhosted.org/packages/00/93/2860ec067497b9cbb06242f96d44caebbd9eed32174e4eb8c1ffef760f94/pybase64-1.4.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:75a8116be4ea4cdd30a5c4f1a6f3b038e0d457eb03c8a2685d8ce2aa00ef8f92", size = 32066, upload-time = "2025-07-27T13:05:19.18Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/55/1e96249a38759332e8a01b31c370d88c60ceaf44692eb6ba4f0f451ee496/pybase64-1.4.2-cp314-cp314t-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:217ea776a098d7c08668e5526b9764f5048bbfd28cac86834217ddfe76a4e3c4", size = 72465, upload-time = "2025-07-27T13:05:20.866Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/ab/0f468605b899f3e35dbb7423fba3ff98aeed1ec16abb02428468494a58f4/pybase64-1.4.2-cp314-cp314t-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:4ec14683e343c95b14248cdfdfa78c052582be7a3865fd570aa7cffa5ab5cf37", size = 75693, upload-time = "2025-07-27T13:05:22.896Z" },
+    { url = "https://files.pythonhosted.org/packages/91/d1/9980a0159b699e2489baba05b71b7c953b29249118ba06fdbb3e9ea1b9b5/pybase64-1.4.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:480ecf21e1e956c5a10d3cf7b3b7e75bce3f9328cf08c101e4aab1925d879f34", size = 65577, upload-time = "2025-07-27T13:05:25Z" },
+    { url = "https://files.pythonhosted.org/packages/16/86/b27e7b95f9863d245c0179a7245582eda3d262669d8f822777364d8fd7d5/pybase64-1.4.2-cp314-cp314t-manylinux2014_armv7l.manylinux_2_17_armv7l.whl", hash = "sha256:1fe1ebdc55e9447142e2f6658944aadfb5a4fbf03dbd509be34182585515ecc1", size = 60662, upload-time = "2025-07-27T13:05:27.138Z" },
+    { url = "https://files.pythonhosted.org/packages/28/87/a7f0dde0abc26bfbee761f1d3558eb4b139f33ddd9fe1f6825ffa7daa22d/pybase64-1.4.2-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:c793a2b06753accdaf5e1a8bbe5d800aab2406919e5008174f989a1ca0081411", size = 64179, upload-time = "2025-07-27T13:05:28.996Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/88/5d6fa1c60e1363b4cac4c396978f39e9df4689e75225d7d9c0a5998e3a14/pybase64-1.4.2-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:6acae6e1d1f7ebe40165f08076c7a73692b2bf9046fefe673f350536e007f556", size = 64968, upload-time = "2025-07-27T13:05:30.818Z" },
+    { url = "https://files.pythonhosted.org/packages/20/6e/2ed585af5b2211040445d9849326dd2445320c9316268794f5453cfbaf30/pybase64-1.4.2-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:88b91cd0949358aadcea75f8de5afbcf3c8c5fb9ec82325bd24285b7119cf56e", size = 58738, upload-time = "2025-07-27T13:05:32.629Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/94/e2960b56322eabb3fbf303fc5a72e6444594c1b90035f3975c6fe666db5c/pybase64-1.4.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:53316587e1b1f47a11a5ff068d3cbd4a3911c291f2aec14882734973684871b2", size = 63802, upload-time = "2025-07-27T13:05:34.687Z" },
+    { url = "https://files.pythonhosted.org/packages/95/47/312139d764c223f534f751528ce3802887c279125eac64f71cd3b4e05abc/pybase64-1.4.2-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:caa7f20f43d00602cf9043b5ba758d54f5c41707d3709b2a5fac17361579c53c", size = 56341, upload-time = "2025-07-27T13:05:36.554Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/d7/aec9a6ed53b128dac32f8768b646ca5730c88eef80934054d7fa7d02f3ef/pybase64-1.4.2-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:2d93817e24fdd79c534ed97705df855af6f1d2535ceb8dfa80da9de75482a8d7", size = 72838, upload-time = "2025-07-27T13:05:38.459Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/a8/6ccc54c5f1f7c3450ad7c56da10c0f131d85ebe069ea6952b5b42f2e92d9/pybase64-1.4.2-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:63cd769b51474d8d08f7f2ce73b30380d9b4078ec92ea6b348ea20ed1e1af88a", size = 62633, upload-time = "2025-07-27T13:05:40.624Z" },
+    { url = "https://files.pythonhosted.org/packages/34/22/2b9d89f8ff6f2a01d6d6a88664b20a4817049cfc3f2c62caca040706660c/pybase64-1.4.2-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:cd07e6a9993c392ec8eb03912a43c6a6b21b2deb79ee0d606700fe276e9a576f", size = 58282, upload-time = "2025-07-27T13:05:42.565Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/14/dbf6266177532a6a11804ac080ebffcee272f491b92820c39886ee20f201/pybase64-1.4.2-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:6a8944e8194adff4668350504bc6b7dbde2dab9244c88d99c491657d145b5af5", size = 60948, upload-time = "2025-07-27T13:05:44.48Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/7a/b2ae9046a66dd5746cd72836a41386517b1680bea5ce02f2b4f1c9ebc688/pybase64-1.4.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:04ab398ec4b6a212af57f6a21a6336d5a1d754ff4ccb215951366ab9080481b2", size = 74854, upload-time = "2025-07-27T13:05:46.416Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/7e/9856f6d6c38a7b730e001123d2d9fa816b8b1a45f0cdee1d509d5947b047/pybase64-1.4.2-cp314-cp314t-win32.whl", hash = "sha256:3b9201ecdcb1c3e23be4caebd6393a4e6615bd0722528f5413b58e22e3792dd3", size = 34490, upload-time = "2025-07-27T13:05:48.304Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/38/8523a9dc1ec8704dedbe5ccc95192ae9a7585f7eec85cc62946fe3cacd32/pybase64-1.4.2-cp314-cp314t-win_amd64.whl", hash = "sha256:36e9b0cad8197136d73904ef5a71d843381d063fd528c5ab203fc4990264f682", size = 36680, upload-time = "2025-07-27T13:05:50.264Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/52/5600104ef7b85f89fb8ec54f73504ead3f6f0294027e08d281f3cafb5c1a/pybase64-1.4.2-cp314-cp314t-win_arm64.whl", hash = "sha256:f25140496b02db0e7401567cd869fb13b4c8118bf5c2428592ec339987146d8b", size = 31600, upload-time = "2025-07-27T13:05:52.24Z" },
+]
+
 [[package]]
 name = "pybind11"
-version = "2.13.6"
+version = "3.0.0"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/d2/c1/72b9622fcb32ff98b054f724e213c7f70d6898baa714f4516288456ceaba/pybind11-2.13.6.tar.gz", hash = "sha256:ba6af10348c12b24e92fa086b39cfba0eff619b61ac77c406167d813b096d39a", size = 218403, upload-time = "2024-09-14T00:35:22.606Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/ef/83/698d120e257a116f2472c710932023ad779409adf2734d2e940f34eea2c5/pybind11-3.0.0.tar.gz", hash = "sha256:c3f07bce3ada51c3e4b76badfa85df11688d12c46111f9d242bc5c9415af7862", size = 544819, upload-time = "2025-07-10T16:52:09.335Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/13/2f/0f24b288e2ce56f51c920137620b4434a38fd80583dbbe24fc2a1656c388/pybind11-2.13.6-py3-none-any.whl", hash = "sha256:237c41e29157b962835d356b370ededd57594a26d5894a795960f0047cb5caf5", size = 243282, upload-time = "2024-09-14T00:35:20.361Z" },
+    { url = "https://files.pythonhosted.org/packages/41/9c/85f50a5476832c3efc67b6d7997808388236ae4754bf53e1749b3bc27577/pybind11-3.0.0-py3-none-any.whl", hash = "sha256:7c5cac504da5a701b5163f0e6a7ba736c713a096a5378383c5b4b064b753f607", size = 292118, upload-time = "2025-07-10T16:52:07.828Z" },
 ]
 
 [[package]]
@@ -3734,6 +4696,11 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/6a/c0/ec2b1c8712ca690e5d61979dee872603e92b8a32f94cc1b72d53beab008a/pydantic-2.11.7-py3-none-any.whl", hash = "sha256:dde5df002701f6de26248661f6835bbe296a47bf73990135c7d07ce741b9623b", size = 444782, upload-time = "2025-06-14T08:33:14.905Z" },
 ]
 
+[package.optional-dependencies]
+email = [
+    { name = "email-validator" },
+]
+
 [[package]]
 name = "pydantic-core"
 version = "2.33.2"
@@ -3776,6 +4743,24 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/6f/9a/e73262f6c6656262b5fdd723ad90f518f579b7bc8622e43a942eec53c938/pydantic_core-2.33.2-cp313-cp313t-win_amd64.whl", hash = "sha256:c2fc0a768ef76c15ab9238afa6da7f69895bb5d1ee83aeea2e3509af4472d0b9", size = 1935777, upload-time = "2025-04-23T18:32:25.088Z" },
 ]
 
+[[package]]
+name = "pydantic-extra-types"
+version = "2.10.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pydantic" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/7e/ba/4178111ec4116c54e1dc7ecd2a1ff8f54256cdbd250e576882911e8f710a/pydantic_extra_types-2.10.5.tar.gz", hash = "sha256:1dcfa2c0cf741a422f088e0dbb4690e7bfadaaf050da3d6f80d6c3cf58a2bad8", size = 138429, upload-time = "2025-06-02T09:31:52.713Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/70/1a/5f4fd9e7285f10c44095a4f9fe17d0f358d1702a7c74a9278c794e8a7537/pydantic_extra_types-2.10.5-py3-none-any.whl", hash = "sha256:b60c4e23d573a69a4f1a16dd92888ecc0ef34fb0e655b4f305530377fa70e7a8", size = 38315, upload-time = "2025-06-02T09:31:51.229Z" },
+]
+
+[package.optional-dependencies]
+pycountry = [
+    { name = "pycountry" },
+]
+
 [[package]]
 name = "pydata-sphinx-theme"
 version = "0.16.1"
@@ -3794,6 +4779,29 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/e2/0d/8ba33fa83a7dcde13eb3c1c2a0c1cc29950a048bfed6d9b0d8b6bd710b4c/pydata_sphinx_theme-0.16.1-py3-none-any.whl", hash = "sha256:225331e8ac4b32682c18fcac5a57a6f717c4e632cea5dd0e247b55155faeccde", size = 6723264, upload-time = "2024-12-17T10:53:35.645Z" },
 ]
 
+[[package]]
+name = "pydub"
+version = "0.25.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/fe/9a/e6bca0eed82db26562c73b5076539a4a08d3cffd19c3cc5913a3e61145fd/pydub-0.25.1.tar.gz", hash = "sha256:980a33ce9949cab2a569606b65674d748ecbca4f0796887fd6f46173a7b0d30f", size = 38326, upload-time = "2021-03-10T02:09:54.659Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a6/53/d78dc063216e62fc55f6b2eebb447f6a4b0a59f55c8406376f76bf959b08/pydub-0.25.1-py2.py3-none-any.whl", hash = "sha256:65617e33033874b59d87db603aa1ed450633288aefead953b30bded59cb599a6", size = 32327, upload-time = "2021-03-10T02:09:53.503Z" },
+]
+
+[[package]]
+name = "pyecharts"
+version = "2.0.8"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "jinja2" },
+    { name = "prettytable" },
+    { name = "simplejson" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/98/68/c86a3960ab5d97709237e91a5136ae02079c3c39c83192ca16f1a25f8b1a/pyecharts-2.0.8.tar.gz", hash = "sha256:908dbd939862dd3c76bb53697bdb41d3cdd0b5ba48ca69a76a6085d0aa27dbdf", size = 165148, upload-time = "2025-01-24T03:10:07.179Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ae/18/383622b338e4f6948ba1b75a8155d748ce097ead08a4163ca763f0ad510e/pyecharts-2.0.8-py3-none-any.whl", hash = "sha256:8b711ba139f39f89bc1b2a869d7adda89dc74c910d158a1f9063109fe66bc985", size = 153686, upload-time = "2025-01-24T03:10:03.737Z" },
+]
+
 [[package]]
 name = "pygments"
 version = "2.19.2"
@@ -3804,23 +4812,15 @@ wheels = [
 ]
 
 [[package]]
-name = "pynacl"
-version = "1.5.0"
+name = "pynvml"
+version = "12.0.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "cffi" },
+    { name = "nvidia-ml-py" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/a7/22/27582568be639dfe22ddb3902225f91f2f17ceff88ce80e4db396c8986da/PyNaCl-1.5.0.tar.gz", hash = "sha256:8ac7448f09ab85811607bdd21ec2464495ac8b7c66d146bf545b0f08fb9220ba", size = 3392854, upload-time = "2022-01-07T22:05:41.134Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/26/6f/6b5880ed0239e85b9a39aed103b65b2ef81425beef9f45e5c035bf008330/pynvml-12.0.0.tar.gz", hash = "sha256:299ce2451a6a17e6822d6faee750103e25b415f06f59abb8db65d30f794166f5", size = 33636, upload-time = "2024-12-02T15:04:36.631Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/ce/75/0b8ede18506041c0bf23ac4d8e2971b4161cd6ce630b177d0a08eb0d8857/PyNaCl-1.5.0-cp36-abi3-macosx_10_10_universal2.whl", hash = "sha256:401002a4aaa07c9414132aaed7f6836ff98f59277a234704ff66878c2ee4a0d1", size = 349920, upload-time = "2022-01-07T22:05:49.156Z" },
-    { url = "https://files.pythonhosted.org/packages/59/bb/fddf10acd09637327a97ef89d2a9d621328850a72f1fdc8c08bdf72e385f/PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:52cb72a79269189d4e0dc537556f4740f7f0a9ec41c1322598799b0bdad4ef92", size = 601722, upload-time = "2022-01-07T22:05:50.989Z" },
-    { url = "https://files.pythonhosted.org/packages/5d/70/87a065c37cca41a75f2ce113a5a2c2aa7533be648b184ade58971b5f7ccc/PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a36d4a9dda1f19ce6e03c9a784a2921a4b726b02e1c736600ca9c22029474394", size = 680087, upload-time = "2022-01-07T22:05:52.539Z" },
-    { url = "https://files.pythonhosted.org/packages/ee/87/f1bb6a595f14a327e8285b9eb54d41fef76c585a0edef0a45f6fc95de125/PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:0c84947a22519e013607c9be43706dd42513f9e6ae5d39d3613ca1e142fba44d", size = 856678, upload-time = "2022-01-07T22:05:54.251Z" },
-    { url = "https://files.pythonhosted.org/packages/66/28/ca86676b69bf9f90e710571b67450508484388bfce09acf8a46f0b8c785f/PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:06b8f6fa7f5de8d5d2f7573fe8c863c051225a27b61e6860fd047b1775807858", size = 1133660, upload-time = "2022-01-07T22:05:56.056Z" },
-    { url = "https://files.pythonhosted.org/packages/3d/85/c262db650e86812585e2bc59e497a8f59948a005325a11bbbc9ecd3fe26b/PyNaCl-1.5.0-cp36-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:a422368fc821589c228f4c49438a368831cb5bbc0eab5ebe1d7fac9dded6567b", size = 663824, upload-time = "2022-01-07T22:05:57.434Z" },
-    { url = "https://files.pythonhosted.org/packages/fd/1a/cc308a884bd299b651f1633acb978e8596c71c33ca85e9dc9fa33a5399b9/PyNaCl-1.5.0-cp36-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:61f642bf2378713e2c2e1de73444a3778e5f0a38be6fee0fe532fe30060282ff", size = 1117912, upload-time = "2022-01-07T22:05:58.665Z" },
-    { url = "https://files.pythonhosted.org/packages/25/2d/b7df6ddb0c2a33afdb358f8af6ea3b8c4d1196ca45497dd37a56f0c122be/PyNaCl-1.5.0-cp36-abi3-win32.whl", hash = "sha256:e46dae94e34b085175f8abb3b0aaa7da40767865ac82c928eeb9e57e1ea8a543", size = 204624, upload-time = "2022-01-07T22:06:00.085Z" },
-    { url = "https://files.pythonhosted.org/packages/5e/22/d3db169895faaf3e2eda892f005f433a62db2decbcfbc2f61e6517adfa87/PyNaCl-1.5.0-cp36-abi3-win_amd64.whl", hash = "sha256:20f42270d27e1b6a29f54032090b972d97f0a1b0948cc52392041ef7831fee93", size = 212141, upload-time = "2022-01-07T22:06:01.861Z" },
+    { url = "https://files.pythonhosted.org/packages/ed/df/f7cf07a65a96dd11d71f346f9c2863accdd4784da83af7181b067d556cbc/pynvml-12.0.0-py3-none-any.whl", hash = "sha256:fdff84b62a27dbe98e08e1a647eb77342bef1aebe0878bcd15e99a83fcbecb9e", size = 26560, upload-time = "2024-12-02T15:04:35.047Z" },
 ]
 
 [[package]]
@@ -3832,19 +4832,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/05/e7/df2285f3d08fee213f2d041540fa4fc9ca6c2d44cf36d3a035bf2a8d2bcc/pyparsing-3.2.3-py3-none-any.whl", hash = "sha256:a749938e02d6fd0b59b356ca504a24982314bb090c383e3cf201c95ef7e2bfcf", size = 111120, upload-time = "2025-03-25T05:01:24.908Z" },
 ]
 
-[[package]]
-name = "pyre-extensions"
-version = "0.0.32"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "typing-extensions" },
-    { name = "typing-inspect" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/a7/53/5bc2532536e921c48366ad1047c1344ccef6afa5e84053f0f6e20a453767/pyre_extensions-0.0.32.tar.gz", hash = "sha256:5396715f14ea56c4d5fd0a88c57ca7e44faa468f905909edd7de4ad90ed85e55", size = 10852, upload-time = "2024-11-22T19:26:44.152Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/a4/7a/9812cb8be9828ab688203c5ac5f743c60652887f0c00995a6f6f19f912bd/pyre_extensions-0.0.32-py3-none-any.whl", hash = "sha256:a63ba6883ab02f4b1a9f372ed4eb4a2f4c6f3d74879aa2725186fdfcfe3e5c68", size = 12766, upload-time = "2024-11-22T19:26:42.465Z" },
-]
-
 [[package]]
 name = "pyrefly"
 version = "0.24.2"
@@ -3879,14 +4866,14 @@ wheels = [
 
 [[package]]
 name = "pytest-asyncio"
-version = "1.0.0"
+version = "1.1.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "pytest" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/d0/d4/14f53324cb1a6381bef29d698987625d80052bb33932d8e7cbf9b337b17c/pytest_asyncio-1.0.0.tar.gz", hash = "sha256:d15463d13f4456e1ead2594520216b225a16f781e144f8fdf6c5bb4667c48b3f", size = 46960, upload-time = "2025-05-26T04:54:40.484Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/4e/51/f8794af39eeb870e87a8c8068642fc07bce0c854d6865d7dd0f2a9d338c2/pytest_asyncio-1.1.0.tar.gz", hash = "sha256:796aa822981e01b68c12e4827b8697108f7205020f24b5793b3c41555dab68ea", size = 46652, upload-time = "2025-07-16T04:29:26.393Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/30/05/ce271016e351fddc8399e546f6e23761967ee09c8c568bbfbecb0c150171/pytest_asyncio-1.0.0-py3-none-any.whl", hash = "sha256:4f024da9f1ef945e680dc68610b52550e36590a67fd31bb3b4943979a1f90ef3", size = 15976, upload-time = "2025-05-26T04:54:39.035Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/9d/bf86eddabf8c6c9cb1ea9a869d6873b46f105a5d292d3a6f7071f5b07935/pytest_asyncio-1.1.0-py3-none-any.whl", hash = "sha256:5fe2d69607b0bd75c656d1211f969cadba035030156745ee09e7d71740e58ecf", size = 15157, upload-time = "2025-07-16T04:29:24.929Z" },
 ]
 
 [[package]]
@@ -3927,6 +4914,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/a4/7f/92c8dbe185aa38270fec1e73e0ed70d8e5de31963aa057ba621055f8b008/pytest_random_order-1.2.0-py3-none-any.whl", hash = "sha256:78d1d6f346222cdf26a7302c502d2f1cab19454529af960b8b9e1427a99ab277", size = 10889, upload-time = "2025-06-22T14:44:42.438Z" },
 ]
 
+[[package]]
+name = "pytest-testmon"
+version = "2.1.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "coverage" },
+    { name = "pytest" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/54/24/b17712bc8b9d9814a30346e5bd76a6c4539f5187455f4e0d99d95f033da6/pytest_testmon-2.1.3.tar.gz", hash = "sha256:dad41aa7d501d74571750da1abd3f6673b63fd9dbf3023bd1623814999018c97", size = 22608, upload-time = "2024-12-22T12:43:28.822Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/73/08/278800711d937e76ce59105fea1bb739ae5ff5c13583fd064fe3b4e64fa1/pytest_testmon-2.1.3-py3-none-any.whl", hash = "sha256:53ba06d8a90ce24c3a191b196aac72ca4b788beff5eb1c1bffee04dc50ec7105", size = 24994, upload-time = "2024-12-22T12:43:10.173Z" },
+]
+
 [[package]]
 name = "pytest-timeout"
 version = "2.4.0"
@@ -3978,25 +4978,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/45/58/38b5afbc1a800eeea951b9285d3912613f2603bdf897a4ab0f4bd7f405fc/python_multipart-0.0.20-py3-none-any.whl", hash = "sha256:8a62d3a8335e06589fe01f2a3e178cdcc632f3fbe0d492ad9ee0ec35aab1f104", size = 24546, upload-time = "2024-12-16T19:45:44.423Z" },
 ]
 
-[[package]]
-name = "pytorch-lightning"
-version = "2.5.2"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "fsspec", extra = ["http"] },
-    { name = "lightning-utilities" },
-    { name = "packaging" },
-    { name = "pyyaml" },
-    { name = "torch" },
-    { name = "torchmetrics" },
-    { name = "tqdm" },
-    { name = "typing-extensions" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/01/3e/728fbdc671d07727ad447f9401d98a43570573965beb3cb2060f9a330b4f/pytorch_lightning-2.5.2.tar.gz", hash = "sha256:f817087d611be8d43b777dd4e543d72703e235510936677a13e6c29f7fd790e3", size = 636859, upload-time = "2025-06-20T15:58:27.062Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/e2/42/47c186c8f9e956e559c89e6c764d5d5d0d0af517c04ca0ad39bd0a357d3a/pytorch_lightning-2.5.2-py3-none-any.whl", hash = "sha256:17cfdf89bd98074e389101f097cdf34c486a1f5c6d3fdcefbaf4dea7f97ff0bf", size = 825366, upload-time = "2025-06-20T15:58:25.534Z" },
-]
-
 [[package]]
 name = "pytz"
 version = "2025.2"
@@ -4006,17 +4987,26 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/81/c4/34e93fe5f5429d7570ec1fa436f1986fb1f00c3e0f43a589fe2bbcd22c3f/pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00", size = 509225, upload-time = "2025-03-25T02:24:58.468Z" },
 ]
 
+[[package]]
+name = "pyudorandom"
+version = "1.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/13/14/6fc20ea903eda547d6a255e995f8d4a09fdc3cf8bfacb6f85e6d669bc259/pyudorandom-1.0.0.tar.gz", hash = "sha256:f30a093a0170c15f9c7f87eb29f71f0f5fde995528b7c6dc4606d389e8c37755", size = 1599, upload-time = "2016-07-18T16:18:56.037Z" }
+
 [[package]]
 name = "pywin32"
-version = "310"
+version = "311"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/6b/ec/4fdbe47932f671d6e348474ea35ed94227fb5df56a7c30cbbb42cd396ed0/pywin32-310-cp312-cp312-win32.whl", hash = "sha256:8a75a5cc3893e83a108c05d82198880704c44bbaee4d06e442e471d3c9ea4f3d", size = 8796239, upload-time = "2025-03-17T00:55:58.807Z" },
-    { url = "https://files.pythonhosted.org/packages/e3/e5/b0627f8bb84e06991bea89ad8153a9e50ace40b2e1195d68e9dff6b03d0f/pywin32-310-cp312-cp312-win_amd64.whl", hash = "sha256:bf5c397c9a9a19a6f62f3fb821fbf36cac08f03770056711f765ec1503972060", size = 9503839, upload-time = "2025-03-17T00:56:00.8Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/32/9ccf53748df72301a89713936645a664ec001abd35ecc8578beda593d37d/pywin32-310-cp312-cp312-win_arm64.whl", hash = "sha256:2349cc906eae872d0663d4d6290d13b90621eaf78964bb1578632ff20e152966", size = 8459470, upload-time = "2025-03-17T00:56:02.601Z" },
-    { url = "https://files.pythonhosted.org/packages/1c/09/9c1b978ffc4ae53999e89c19c77ba882d9fce476729f23ef55211ea1c034/pywin32-310-cp313-cp313-win32.whl", hash = "sha256:5d241a659c496ada3253cd01cfaa779b048e90ce4b2b38cd44168ad555ce74ab", size = 8794384, upload-time = "2025-03-17T00:56:04.383Z" },
-    { url = "https://files.pythonhosted.org/packages/45/3c/b4640f740ffebadd5d34df35fecba0e1cfef8fde9f3e594df91c28ad9b50/pywin32-310-cp313-cp313-win_amd64.whl", hash = "sha256:667827eb3a90208ddbdcc9e860c81bde63a135710e21e4cb3348968e4bd5249e", size = 9503039, upload-time = "2025-03-17T00:56:06.207Z" },
-    { url = "https://files.pythonhosted.org/packages/b4/f4/f785020090fb050e7fb6d34b780f2231f302609dc964672f72bfaeb59a28/pywin32-310-cp313-cp313-win_arm64.whl", hash = "sha256:e308f831de771482b7cf692a1f308f8fca701b2d8f9dde6cc440c7da17e47b33", size = 8458152, upload-time = "2025-03-17T00:56:07.819Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/ab/01ea1943d4eba0f850c3c61e78e8dd59757ff815ff3ccd0a84de5f541f42/pywin32-311-cp312-cp312-win32.whl", hash = "sha256:750ec6e621af2b948540032557b10a2d43b0cee2ae9758c54154d711cc852d31", size = 8706543, upload-time = "2025-07-14T20:13:20.765Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/a8/a0e8d07d4d051ec7502cd58b291ec98dcc0c3fff027caad0470b72cfcc2f/pywin32-311-cp312-cp312-win_amd64.whl", hash = "sha256:b8c095edad5c211ff31c05223658e71bf7116daa0ecf3ad85f3201ea3190d067", size = 9495040, upload-time = "2025-07-14T20:13:22.543Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/3a/2ae996277b4b50f17d61f0603efd8253cb2d79cc7ae159468007b586396d/pywin32-311-cp312-cp312-win_arm64.whl", hash = "sha256:e286f46a9a39c4a18b319c28f59b61de793654af2f395c102b4f819e584b5852", size = 8710102, upload-time = "2025-07-14T20:13:24.682Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/be/3fd5de0979fcb3994bfee0d65ed8ca9506a8a1260651b86174f6a86f52b3/pywin32-311-cp313-cp313-win32.whl", hash = "sha256:f95ba5a847cba10dd8c4d8fefa9f2a6cf283b8b88ed6178fa8a6c1ab16054d0d", size = 8705700, upload-time = "2025-07-14T20:13:26.471Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/28/e0a1909523c6890208295a29e05c2adb2126364e289826c0a8bc7297bd5c/pywin32-311-cp313-cp313-win_amd64.whl", hash = "sha256:718a38f7e5b058e76aee1c56ddd06908116d35147e133427e59a3983f703a20d", size = 9494700, upload-time = "2025-07-14T20:13:28.243Z" },
+    { url = "https://files.pythonhosted.org/packages/04/bf/90339ac0f55726dce7d794e6d79a18a91265bdf3aa70b6b9ca52f35e022a/pywin32-311-cp313-cp313-win_arm64.whl", hash = "sha256:7b4075d959648406202d92a2310cb990fea19b535c7f4a78d3f5e10b926eeb8a", size = 8709318, upload-time = "2025-07-14T20:13:30.348Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/31/097f2e132c4f16d99a22bfb777e0fd88bd8e1c634304e102f313af69ace5/pywin32-311-cp314-cp314-win32.whl", hash = "sha256:b7a2c10b93f8986666d0c803ee19b5990885872a7de910fc460f9b0c2fbf92ee", size = 8840714, upload-time = "2025-07-14T20:13:32.449Z" },
+    { url = "https://files.pythonhosted.org/packages/90/4b/07c77d8ba0e01349358082713400435347df8426208171ce297da32c313d/pywin32-311-cp314-cp314-win_amd64.whl", hash = "sha256:3aca44c046bd2ed8c90de9cb8427f581c479e594e99b5c0bb19b29c10fd6cb87", size = 9656800, upload-time = "2025-07-14T20:13:34.312Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/d2/21af5c535501a7233e734b8af901574572da66fcc254cb35d0609c9080dd/pywin32-311-cp314-cp314-win_arm64.whl", hash = "sha256:a508e2d9025764a8270f93111a970e1d0fbfc33f4153b388bb649b7eec4f9b42", size = 8932540, upload-time = "2025-07-14T20:13:36.379Z" },
 ]
 
 [[package]]
@@ -4045,63 +5035,72 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/fa/de/02b54f42487e3d3c6efb3f89428677074ca7bf43aae402517bc7cca949f3/PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563", size = 156446, upload-time = "2024-08-06T20:33:04.33Z" },
 ]
 
-[[package]]
-name = "pyyaml-ft"
-version = "8.0.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/5e/eb/5a0d575de784f9a1f94e2b1288c6886f13f34185e13117ed530f32b6f8a8/pyyaml_ft-8.0.0.tar.gz", hash = "sha256:0c947dce03954c7b5d38869ed4878b2e6ff1d44b08a0d84dc83fdad205ae39ab", size = 141057, upload-time = "2025-06-10T15:32:15.613Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/68/ba/a067369fe61a2e57fb38732562927d5bae088c73cb9bb5438736a9555b29/pyyaml_ft-8.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8c1306282bc958bfda31237f900eb52c9bedf9b93a11f82e1aab004c9a5657a6", size = 187027, upload-time = "2025-06-10T15:31:48.722Z" },
-    { url = "https://files.pythonhosted.org/packages/ad/c5/a3d2020ce5ccfc6aede0d45bcb870298652ac0cf199f67714d250e0cdf39/pyyaml_ft-8.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:30c5f1751625786c19de751e3130fc345ebcba6a86f6bddd6e1285342f4bbb69", size = 176146, upload-time = "2025-06-10T15:31:50.584Z" },
-    { url = "https://files.pythonhosted.org/packages/e3/bb/23a9739291086ca0d3189eac7cd92b4d00e9fdc77d722ab610c35f9a82ba/pyyaml_ft-8.0.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3fa992481155ddda2e303fcc74c79c05eddcdbc907b888d3d9ce3ff3e2adcfb0", size = 746792, upload-time = "2025-06-10T15:31:52.304Z" },
-    { url = "https://files.pythonhosted.org/packages/5f/c2/e8825f4ff725b7e560d62a3609e31d735318068e1079539ebfde397ea03e/pyyaml_ft-8.0.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cec6c92b4207004b62dfad1f0be321c9f04725e0f271c16247d8b39c3bf3ea42", size = 786772, upload-time = "2025-06-10T15:31:54.712Z" },
-    { url = "https://files.pythonhosted.org/packages/35/be/58a4dcae8854f2fdca9b28d9495298fd5571a50d8430b1c3033ec95d2d0e/pyyaml_ft-8.0.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:06237267dbcab70d4c0e9436d8f719f04a51123f0ca2694c00dd4b68c338e40b", size = 778723, upload-time = "2025-06-10T15:31:56.093Z" },
-    { url = "https://files.pythonhosted.org/packages/86/ed/fed0da92b5d5d7340a082e3802d84c6dc9d5fa142954404c41a544c1cb92/pyyaml_ft-8.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:8a7f332bc565817644cdb38ffe4739e44c3e18c55793f75dddb87630f03fc254", size = 758478, upload-time = "2025-06-10T15:31:58.314Z" },
-    { url = "https://files.pythonhosted.org/packages/f0/69/ac02afe286275980ecb2dcdc0156617389b7e0c0a3fcdedf155c67be2b80/pyyaml_ft-8.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7d10175a746be65f6feb86224df5d6bc5c049ebf52b89a88cf1cd78af5a367a8", size = 799159, upload-time = "2025-06-10T15:31:59.675Z" },
-    { url = "https://files.pythonhosted.org/packages/4e/ac/c492a9da2e39abdff4c3094ec54acac9747743f36428281fb186a03fab76/pyyaml_ft-8.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:58e1015098cf8d8aec82f360789c16283b88ca670fe4275ef6c48c5e30b22a96", size = 158779, upload-time = "2025-06-10T15:32:01.029Z" },
-    { url = "https://files.pythonhosted.org/packages/5d/9b/41998df3298960d7c67653669f37710fa2d568a5fc933ea24a6df60acaf6/pyyaml_ft-8.0.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:e64fa5f3e2ceb790d50602b2fd4ec37abbd760a8c778e46354df647e7c5a4ebb", size = 191331, upload-time = "2025-06-10T15:32:02.602Z" },
-    { url = "https://files.pythonhosted.org/packages/0f/16/2710c252ee04cbd74d9562ebba709e5a284faeb8ada88fcda548c9191b47/pyyaml_ft-8.0.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:8d445bf6ea16bb93c37b42fdacfb2f94c8e92a79ba9e12768c96ecde867046d1", size = 182879, upload-time = "2025-06-10T15:32:04.466Z" },
-    { url = "https://files.pythonhosted.org/packages/9a/40/ae8163519d937fa7bfa457b6f78439cc6831a7c2b170e4f612f7eda71815/pyyaml_ft-8.0.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c56bb46b4fda34cbb92a9446a841da3982cdde6ea13de3fbd80db7eeeab8b49", size = 811277, upload-time = "2025-06-10T15:32:06.214Z" },
-    { url = "https://files.pythonhosted.org/packages/f9/66/28d82dbff7f87b96f0eeac79b7d972a96b4980c1e445eb6a857ba91eda00/pyyaml_ft-8.0.0-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dab0abb46eb1780da486f022dce034b952c8ae40753627b27a626d803926483b", size = 831650, upload-time = "2025-06-10T15:32:08.076Z" },
-    { url = "https://files.pythonhosted.org/packages/e8/df/161c4566facac7d75a9e182295c223060373d4116dead9cc53a265de60b9/pyyaml_ft-8.0.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bd48d639cab5ca50ad957b6dd632c7dd3ac02a1abe0e8196a3c24a52f5db3f7a", size = 815755, upload-time = "2025-06-10T15:32:09.435Z" },
-    { url = "https://files.pythonhosted.org/packages/05/10/f42c48fa5153204f42eaa945e8d1fd7c10d6296841dcb2447bf7da1be5c4/pyyaml_ft-8.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:052561b89d5b2a8e1289f326d060e794c21fa068aa11255fe71d65baf18a632e", size = 810403, upload-time = "2025-06-10T15:32:11.051Z" },
-    { url = "https://files.pythonhosted.org/packages/d5/d2/e369064aa51009eb9245399fd8ad2c562bd0bcd392a00be44b2a824ded7c/pyyaml_ft-8.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:3bb4b927929b0cb162fb1605392a321e3333e48ce616cdcfa04a839271373255", size = 835581, upload-time = "2025-06-10T15:32:12.897Z" },
-    { url = "https://files.pythonhosted.org/packages/c0/28/26534bed77109632a956977f60d8519049f545abc39215d086e33a61f1f2/pyyaml_ft-8.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:de04cfe9439565e32f178106c51dd6ca61afaa2907d143835d501d84703d3793", size = 171579, upload-time = "2025-06-10T15:32:14.34Z" },
-]
-
 [[package]]
 name = "pyzmq"
-version = "27.0.0"
+version = "27.0.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "cffi", marker = "implementation_name == 'pypy'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/f1/06/50a4e9648b3e8b992bef8eb632e457307553a89d294103213cfd47b3da69/pyzmq-27.0.0.tar.gz", hash = "sha256:b1f08eeb9ce1510e6939b6e5dcd46a17765e2333daae78ecf4606808442e52cf", size = 280478, upload-time = "2025-06-13T14:09:07.087Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/93/a7/9ad68f55b8834ede477842214feba6a4c786d936c022a67625497aacf61d/pyzmq-27.0.0-cp312-abi3-macosx_10_15_universal2.whl", hash = "sha256:cbabc59dcfaac66655c040dfcb8118f133fb5dde185e5fc152628354c1598e52", size = 1305438, upload-time = "2025-06-13T14:07:31.676Z" },
-    { url = "https://files.pythonhosted.org/packages/ba/ee/26aa0f98665a22bc90ebe12dced1de5f3eaca05363b717f6fb229b3421b3/pyzmq-27.0.0-cp312-abi3-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:cb0ac5179cba4b2f94f1aa208fbb77b62c4c9bf24dd446278b8b602cf85fcda3", size = 895095, upload-time = "2025-06-13T14:07:33.104Z" },
-    { url = "https://files.pythonhosted.org/packages/cf/85/c57e7ab216ecd8aa4cc7e3b83b06cc4e9cf45c87b0afc095f10cd5ce87c1/pyzmq-27.0.0-cp312-abi3-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:53a48f0228eab6cbf69fde3aa3c03cbe04e50e623ef92ae395fce47ef8a76152", size = 651826, upload-time = "2025-06-13T14:07:34.831Z" },
-    { url = "https://files.pythonhosted.org/packages/69/9a/9ea7e230feda9400fb0ae0d61d7d6ddda635e718d941c44eeab22a179d34/pyzmq-27.0.0-cp312-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:111db5f395e09f7e775f759d598f43cb815fc58e0147623c4816486e1a39dc22", size = 839750, upload-time = "2025-06-13T14:07:36.553Z" },
-    { url = "https://files.pythonhosted.org/packages/08/66/4cebfbe71f3dfbd417011daca267539f62ed0fbc68105357b68bbb1a25b7/pyzmq-27.0.0-cp312-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:c8878011653dcdc27cc2c57e04ff96f0471e797f5c19ac3d7813a245bcb24371", size = 1641357, upload-time = "2025-06-13T14:07:38.21Z" },
-    { url = "https://files.pythonhosted.org/packages/ac/f6/b0f62578c08d2471c791287149cb8c2aaea414ae98c6e995c7dbe008adfb/pyzmq-27.0.0-cp312-abi3-musllinux_1_2_i686.whl", hash = "sha256:c0ed2c1f335ba55b5fdc964622254917d6b782311c50e138863eda409fbb3b6d", size = 2020281, upload-time = "2025-06-13T14:07:39.599Z" },
-    { url = "https://files.pythonhosted.org/packages/37/b9/4f670b15c7498495da9159edc374ec09c88a86d9cd5a47d892f69df23450/pyzmq-27.0.0-cp312-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:e918d70862d4cfd4b1c187310015646a14e1f5917922ab45b29f28f345eeb6be", size = 1877110, upload-time = "2025-06-13T14:07:41.027Z" },
-    { url = "https://files.pythonhosted.org/packages/66/31/9dee25c226295b740609f0d46db2fe972b23b6f5cf786360980524a3ba92/pyzmq-27.0.0-cp312-abi3-win32.whl", hash = "sha256:88b4e43cab04c3c0f0d55df3b1eef62df2b629a1a369b5289a58f6fa8b07c4f4", size = 559297, upload-time = "2025-06-13T14:07:42.533Z" },
-    { url = "https://files.pythonhosted.org/packages/9b/12/52da5509800f7ff2d287b2f2b4e636e7ea0f001181cba6964ff6c1537778/pyzmq-27.0.0-cp312-abi3-win_amd64.whl", hash = "sha256:dce4199bf5f648a902ce37e7b3afa286f305cd2ef7a8b6ec907470ccb6c8b371", size = 619203, upload-time = "2025-06-13T14:07:43.843Z" },
-    { url = "https://files.pythonhosted.org/packages/93/6d/7f2e53b19d1edb1eb4f09ec7c3a1f945ca0aac272099eab757d15699202b/pyzmq-27.0.0-cp312-abi3-win_arm64.whl", hash = "sha256:56e46bbb85d52c1072b3f809cc1ce77251d560bc036d3a312b96db1afe76db2e", size = 551927, upload-time = "2025-06-13T14:07:45.51Z" },
-    { url = "https://files.pythonhosted.org/packages/19/62/876b27c4ff777db4ceba1c69ea90d3c825bb4f8d5e7cd987ce5802e33c55/pyzmq-27.0.0-cp313-cp313t-macosx_10_15_universal2.whl", hash = "sha256:c36ad534c0c29b4afa088dc53543c525b23c0797e01b69fef59b1a9c0e38b688", size = 1340826, upload-time = "2025-06-13T14:07:46.881Z" },
-    { url = "https://files.pythonhosted.org/packages/43/69/58ef8f4f59d3bcd505260c73bee87b008850f45edca40ddaba54273c35f4/pyzmq-27.0.0-cp313-cp313t-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:67855c14173aec36395d7777aaba3cc527b393821f30143fd20b98e1ff31fd38", size = 897283, upload-time = "2025-06-13T14:07:49.562Z" },
-    { url = "https://files.pythonhosted.org/packages/43/15/93a0d0396700a60475ad3c5d42c5f1c308d3570bc94626b86c71ef9953e0/pyzmq-27.0.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8617c7d43cd8ccdb62aebe984bfed77ca8f036e6c3e46dd3dddda64b10f0ab7a", size = 660567, upload-time = "2025-06-13T14:07:51.364Z" },
-    { url = "https://files.pythonhosted.org/packages/0e/b3/fe055513e498ca32f64509abae19b9c9eb4d7c829e02bd8997dd51b029eb/pyzmq-27.0.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:67bfbcbd0a04c575e8103a6061d03e393d9f80ffdb9beb3189261e9e9bc5d5e9", size = 847681, upload-time = "2025-06-13T14:07:52.77Z" },
-    { url = "https://files.pythonhosted.org/packages/b6/4f/ff15300b00b5b602191f3df06bbc8dd4164e805fdd65bb77ffbb9c5facdc/pyzmq-27.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:5cd11d46d7b7e5958121b3eaf4cd8638eff3a720ec527692132f05a57f14341d", size = 1650148, upload-time = "2025-06-13T14:07:54.178Z" },
-    { url = "https://files.pythonhosted.org/packages/c4/6f/84bdfff2a224a6f26a24249a342e5906993c50b0761e311e81b39aef52a7/pyzmq-27.0.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:b801c2e40c5aa6072c2f4876de8dccd100af6d9918d4d0d7aa54a1d982fd4f44", size = 2023768, upload-time = "2025-06-13T14:07:55.714Z" },
-    { url = "https://files.pythonhosted.org/packages/64/39/dc2db178c26a42228c5ac94a9cc595030458aa64c8d796a7727947afbf55/pyzmq-27.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:20d5cb29e8c5f76a127c75b6e7a77e846bc4b655c373baa098c26a61b7ecd0ef", size = 1885199, upload-time = "2025-06-13T14:07:57.166Z" },
-    { url = "https://files.pythonhosted.org/packages/c7/21/dae7b06a1f8cdee5d8e7a63d99c5d129c401acc40410bef2cbf42025e26f/pyzmq-27.0.0-cp313-cp313t-win32.whl", hash = "sha256:a20528da85c7ac7a19b7384e8c3f8fa707841fd85afc4ed56eda59d93e3d98ad", size = 575439, upload-time = "2025-06-13T14:07:58.959Z" },
-    { url = "https://files.pythonhosted.org/packages/eb/bc/1709dc55f0970cf4cb8259e435e6773f9946f41a045c2cb90e870b7072da/pyzmq-27.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:d8229f2efece6a660ee211d74d91dbc2a76b95544d46c74c615e491900dc107f", size = 639933, upload-time = "2025-06-13T14:08:00.777Z" },
+sdist = { url = "https://files.pythonhosted.org/packages/30/5f/557d2032a2f471edbcc227da724c24a1c05887b5cda1e3ae53af98b9e0a5/pyzmq-27.0.1.tar.gz", hash = "sha256:45c549204bc20e7484ffd2555f6cf02e572440ecf2f3bdd60d4404b20fddf64b", size = 281158, upload-time = "2025-08-03T05:05:40.352Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0e/9b/c0957041067c7724b310f22c398be46399297c12ed834c3bc42200a2756f/pyzmq-27.0.1-cp312-abi3-macosx_10_15_universal2.whl", hash = "sha256:af7ebce2a1e7caf30c0bb64a845f63a69e76a2fadbc1cac47178f7bb6e657bdd", size = 1305432, upload-time = "2025-08-03T05:03:32.177Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/55/bd3a312790858f16b7def3897a0c3eb1804e974711bf7b9dcb5f47e7f82c/pyzmq-27.0.1-cp312-abi3-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:8f617f60a8b609a13099b313e7e525e67f84ef4524b6acad396d9ff153f6e4cd", size = 895095, upload-time = "2025-08-03T05:03:33.918Z" },
+    { url = "https://files.pythonhosted.org/packages/20/50/fc384631d8282809fb1029a4460d2fe90fa0370a0e866a8318ed75c8d3bb/pyzmq-27.0.1-cp312-abi3-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1d59dad4173dc2a111f03e59315c7bd6e73da1a9d20a84a25cf08325b0582b1a", size = 651826, upload-time = "2025-08-03T05:03:35.818Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/0a/2356305c423a975000867de56888b79e44ec2192c690ff93c3109fd78081/pyzmq-27.0.1-cp312-abi3-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f5b6133c8d313bde8bd0d123c169d22525300ff164c2189f849de495e1344577", size = 839751, upload-time = "2025-08-03T05:03:37.265Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/1b/81e95ad256ca7e7ccd47f5294c1c6da6e2b64fbace65b84fe8a41470342e/pyzmq-27.0.1-cp312-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:58cca552567423f04d06a075f4b473e78ab5bdb906febe56bf4797633f54aa4e", size = 1641359, upload-time = "2025-08-03T05:03:38.799Z" },
+    { url = "https://files.pythonhosted.org/packages/50/63/9f50ec965285f4e92c265c8f18344e46b12803666d8b73b65d254d441435/pyzmq-27.0.1-cp312-abi3-musllinux_1_2_i686.whl", hash = "sha256:4b9d8e26fb600d0d69cc9933e20af08552e97cc868a183d38a5c0d661e40dfbb", size = 2020281, upload-time = "2025-08-03T05:03:40.338Z" },
+    { url = "https://files.pythonhosted.org/packages/02/4a/19e3398d0dc66ad2b463e4afa1fc541d697d7bc090305f9dfb948d3dfa29/pyzmq-27.0.1-cp312-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:2329f0c87f0466dce45bba32b63f47018dda5ca40a0085cc5c8558fea7d9fc55", size = 1877112, upload-time = "2025-08-03T05:03:42.012Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/42/c562e9151aa90ed1d70aac381ea22a929d6b3a2ce4e1d6e2e135d34fd9c6/pyzmq-27.0.1-cp312-abi3-win32.whl", hash = "sha256:57bb92abdb48467b89c2d21da1ab01a07d0745e536d62afd2e30d5acbd0092eb", size = 558177, upload-time = "2025-08-03T05:03:43.979Z" },
+    { url = "https://files.pythonhosted.org/packages/40/96/5c50a7d2d2b05b19994bf7336b97db254299353dd9b49b565bb71b485f03/pyzmq-27.0.1-cp312-abi3-win_amd64.whl", hash = "sha256:ff3f8757570e45da7a5bedaa140489846510014f7a9d5ee9301c61f3f1b8a686", size = 618923, upload-time = "2025-08-03T05:03:45.438Z" },
+    { url = "https://files.pythonhosted.org/packages/13/33/1ec89c8f21c89d21a2eaff7def3676e21d8248d2675705e72554fb5a6f3f/pyzmq-27.0.1-cp312-abi3-win_arm64.whl", hash = "sha256:df2c55c958d3766bdb3e9d858b911288acec09a9aab15883f384fc7180df5bed", size = 552358, upload-time = "2025-08-03T05:03:46.887Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/a0/f26e276211ec8090a4d11e4ec70eb8a8b15781e591c1d44ce62f372963a0/pyzmq-27.0.1-cp313-cp313-android_24_arm64_v8a.whl", hash = "sha256:497bd8af534ae55dc4ef67eebd1c149ff2a0b0f1e146db73c8b5a53d83c1a5f5", size = 1122287, upload-time = "2025-08-03T05:03:48.838Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/d8/af4b507e4f7eeea478cc8ee873995a6fd55582bfb99140593ed460e1db3c/pyzmq-27.0.1-cp313-cp313-android_24_x86_64.whl", hash = "sha256:a066ea6ad6218b4c233906adf0ae67830f451ed238419c0db609310dd781fbe7", size = 1155756, upload-time = "2025-08-03T05:03:50.907Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/55/37fae0013e11f88681da42698e550b08a316d608242551f65095cc99232a/pyzmq-27.0.1-cp313-cp313t-macosx_10_15_universal2.whl", hash = "sha256:72d235d6365ca73d8ce92f7425065d70f5c1e19baa458eb3f0d570e425b73a96", size = 1340826, upload-time = "2025-08-03T05:03:52.568Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/e4/3a87854c64b26fcf63a9d1b6f4382bd727d4797c772ceb334a97b7489be9/pyzmq-27.0.1-cp313-cp313t-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:313a7b374e3dc64848644ca348a51004b41726f768b02e17e689f1322366a4d9", size = 897283, upload-time = "2025-08-03T05:03:54.167Z" },
+    { url = "https://files.pythonhosted.org/packages/17/3e/4296c6b0ad2d07be11ae1395dccf9cae48a0a655cf9be1c3733ad2b591d1/pyzmq-27.0.1-cp313-cp313t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:119ce8590409702394f959c159d048002cbed2f3c0645ec9d6a88087fc70f0f1", size = 660565, upload-time = "2025-08-03T05:03:56.152Z" },
+    { url = "https://files.pythonhosted.org/packages/72/41/a33ba3aa48b45b23c4cd4ac49aafde46f3e0f81939f2bfb3b6171a437122/pyzmq-27.0.1-cp313-cp313t-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:45c3e00ce16896ace2cd770ab9057a7cf97d4613ea5f2a13f815141d8b6894b9", size = 847680, upload-time = "2025-08-03T05:03:57.696Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/8c/bf2350bb25b3b58d2e5b5d2290ffab0e923f0cc6d02288d3fbf4baa6e4d1/pyzmq-27.0.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:678e50ec112bdc6df5a83ac259a55a4ba97a8b314c325ab26b3b5b071151bc61", size = 1650151, upload-time = "2025-08-03T05:03:59.387Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/1a/a5a07c54890891344a8ddc3d5ab320dd3c4e39febb6e4472546e456d5157/pyzmq-27.0.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:d0b96c30be9f9387b18b18b6133c75a7b1b0065da64e150fe1feb5ebf31ece1c", size = 2023766, upload-time = "2025-08-03T05:04:01.883Z" },
+    { url = "https://files.pythonhosted.org/packages/62/5e/514dcff08f02c6c8a45a6e23621901139cf853be7ac5ccd0b9407c3aa3de/pyzmq-27.0.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:88dc92d9eb5ea4968123e74db146d770b0c8d48f0e2bfb1dbc6c50a8edb12d64", size = 1885195, upload-time = "2025-08-03T05:04:03.923Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/91/87f74f98a487fbef0b115f6025e4a295129fd56b2b633a03ba7d5816ecc2/pyzmq-27.0.1-cp313-cp313t-win32.whl", hash = "sha256:6dcbcb34f5c9b0cefdfc71ff745459241b7d3cda5b27c7ad69d45afc0821d1e1", size = 574213, upload-time = "2025-08-03T05:04:05.905Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/d7/07f7d0d7f4c81e08be7b60e52ff2591c557377c017f96204d33d5fca1b07/pyzmq-27.0.1-cp313-cp313t-win_amd64.whl", hash = "sha256:b9fd0fda730461f510cfd9a40fafa5355d65f5e3dbdd8d6dfa342b5b3f5d1949", size = 640202, upload-time = "2025-08-03T05:04:07.439Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/83/21d66bcef6fb803647a223cbde95111b099e2176277c0cbc8b099c485510/pyzmq-27.0.1-cp313-cp313t-win_arm64.whl", hash = "sha256:56a3b1853f3954ec1f0e91085f1350cc57d18f11205e4ab6e83e4b7c414120e0", size = 561514, upload-time = "2025-08-03T05:04:09.071Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/0b/d5ea75cf46b52cdce85a85200c963cb498932953df443892238be49b1a01/pyzmq-27.0.1-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:f98f6b7787bd2beb1f0dde03f23a0621a0c978edf673b7d8f5e7bc039cbe1b60", size = 1340836, upload-time = "2025-08-03T05:04:10.774Z" },
+    { url = "https://files.pythonhosted.org/packages/be/4c/0dbce882550e17db6846b29e9dc242aea7590e7594e1ca5043e8e58fff2d/pyzmq-27.0.1-cp314-cp314t-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:351bf5d8ca0788ca85327fda45843b6927593ff4c807faee368cc5aaf9f809c2", size = 897236, upload-time = "2025-08-03T05:04:13.221Z" },
+    { url = "https://files.pythonhosted.org/packages/1b/22/461e131cf16b8814f3c356fa1ea0912697dbc4c64cddf01f7756ec704c1e/pyzmq-27.0.1-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5268a5a9177afff53dc6d70dffe63114ba2a6e7b20d9411cc3adeba09eeda403", size = 660374, upload-time = "2025-08-03T05:04:15.032Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/0c/bbd65a814395bf4fc3e57c6c13af27601c07e4009bdfb75ebcf500537bbd/pyzmq-27.0.1-cp314-cp314t-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a4aca06ba295aa78bec9b33ec028d1ca08744c36294338c41432b7171060c808", size = 847497, upload-time = "2025-08-03T05:04:16.967Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/df/3d1f4a03b561d824cbd491394f67591957e2f1acf6dc85d96f970312a76a/pyzmq-27.0.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1c363c6dc66352331d5ad64bb838765c6692766334a6a02fdb05e76bd408ae18", size = 1650028, upload-time = "2025-08-03T05:04:19.398Z" },
+    { url = "https://files.pythonhosted.org/packages/41/c9/a3987540f59a412bdaae3f362f78e00e6769557a598c63b7e32956aade5a/pyzmq-27.0.1-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:87aebf4acd7249bdff8d3df03aed4f09e67078e6762cfe0aecf8d0748ff94cde", size = 2023808, upload-time = "2025-08-03T05:04:21.145Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/a5/c388f4cd80498a8eaef7535f2a8eaca0a35b82b87a0b47fa1856fc135004/pyzmq-27.0.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:e4f22d67756518d71901edf73b38dc0eb4765cce22c8fe122cc81748d425262b", size = 1884970, upload-time = "2025-08-03T05:04:22.908Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/ac/b2a89a1ed90526a1b9a260cdc5cd42f055fd44ee8d2a59902b5ac35ddeb1/pyzmq-27.0.1-cp314-cp314t-win32.whl", hash = "sha256:8c62297bc7aea2147b472ca5ca2b4389377ad82898c87cabab2a94aedd75e337", size = 586905, upload-time = "2025-08-03T05:04:24.492Z" },
+    { url = "https://files.pythonhosted.org/packages/68/62/7aa5ea04e836f7a788b2a67405f83011cef59ca76d7bac91d1fc9a0476da/pyzmq-27.0.1-cp314-cp314t-win_amd64.whl", hash = "sha256:bee5248d5ec9223545f8cc4f368c2d571477ae828c99409125c3911511d98245", size = 660503, upload-time = "2025-08-03T05:04:26.382Z" },
+    { url = "https://files.pythonhosted.org/packages/89/32/3836ed85947b06f1d67c07ce16c00b0cf8c053ab0b249d234f9f81ff95ff/pyzmq-27.0.1-cp314-cp314t-win_arm64.whl", hash = "sha256:0fc24bf45e4a454e55ef99d7f5c8b8712539200ce98533af25a5bfa954b6b390", size = 575098, upload-time = "2025-08-03T05:04:27.974Z" },
+]
+
+[[package]]
+name = "qwen-vl-utils"
+version = "0.0.11"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "av" },
+    { name = "packaging" },
+    { name = "pillow" },
+    { name = "requests" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/42/9f/1229a40ebd49f689a0252144126f3865f31bb4151e942cf781a2936f0c4d/qwen_vl_utils-0.0.11.tar.gz", hash = "sha256:083ba1e5cfa5002165b1e3bddd4d6d26d1d6d34473884033ef12ae3fe8496cd5", size = 7924, upload-time = "2025-04-21T10:38:47.461Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0a/c2/ad7f93e1eea4ea0aefd1cc6fbe7a7095fd2f03a4d8fe2c3707e612b0866e/qwen_vl_utils-0.0.11-py3-none-any.whl", hash = "sha256:7fd5287ac04d6c1f01b93bf053b0be236a35149e414c9e864e3cc5bf2fe8cb7b", size = 7584, upload-time = "2025-04-21T10:38:45.595Z" },
+]
+
+[package.optional-dependencies]
+decord = [
+    { name = "decord" },
 ]
 
 [[package]]
 name = "ray"
-version = "2.46.0"
+version = "2.49.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "click" },
@@ -4114,15 +5113,15 @@ dependencies = [
     { name = "requests" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/f9/a6/39aeebaec26afdae18ead68e6da1f1ea59d14c6e4b869f4b5f0c1d0647d6/ray-2.46.0-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:d1f37ead29299637144726f809c2e0ff958dd9c0e75930ef614156d6a0a3a57f", size = 68426042, upload-time = "2025-05-07T21:05:33.616Z" },
-    { url = "https://files.pythonhosted.org/packages/43/d8/9bdf2980bbaee14d941e1d12edd909416d655d768a0f03150a9c647d07f1/ray-2.46.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b7a064acfeee7f0677d9e3f25daef9c59593559faea764b44a3e2c5331d5d832", size = 65739806, upload-time = "2025-05-07T21:05:40.426Z" },
-    { url = "https://files.pythonhosted.org/packages/ef/a8/f5653816755171eb000aae416e916903a6630e747f979bae62d6418097ca/ray-2.46.0-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:006cbe1a8fdc37664114aa218773100ee891399785e256c202e48958d2dac167", size = 67581031, upload-time = "2025-05-07T21:05:47.971Z" },
-    { url = "https://files.pythonhosted.org/packages/21/c3/b2f2f09da4a85a8dcd5a3e63a5707b6c126c5be29bf846aa78dfb1168631/ray-2.46.0-cp312-cp312-manylinux2014_x86_64.whl", hash = "sha256:5cec1edda93f618ffd2301f81d5398037f03fa9b16825e7e4d8a00ae7a9a4381", size = 68518627, upload-time = "2025-05-07T21:05:55.354Z" },
-    { url = "https://files.pythonhosted.org/packages/37/be/9bbfd67580b8a412d2fdb3086440f1183407277316e4213b036a6c4ff4c3/ray-2.46.0-cp312-cp312-win_amd64.whl", hash = "sha256:7d3160f8d187baaea91a86d16a9fd81136cf8607419c94b7a74d66fce774b5c2", size = 25960038, upload-time = "2025-05-07T21:06:03.855Z" },
-    { url = "https://files.pythonhosted.org/packages/7b/95/81d71592f294526a8a0ada660e2c452ec6d6523a5fad4f50a765b35ab1e7/ray-2.46.0-cp313-cp313-macosx_10_15_x86_64.whl", hash = "sha256:b2fc2c43ea0a37521193c61ef9a27b6fca8dbab116a58a52fd44344cd73e1ece", size = 68418157, upload-time = "2025-05-07T21:06:14.249Z" },
-    { url = "https://files.pythonhosted.org/packages/e4/ac/300ad38988e17ac0d860fdde662ee3ec9d2f2f5f4f42a4ad7394cac482e0/ray-2.46.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:4296dd8c0174256a04ee4b54abe013b6802a45fb85fb7cfdb1375231965d6d4d", size = 65730961, upload-time = "2025-05-07T21:06:21.996Z" },
-    { url = "https://files.pythonhosted.org/packages/01/75/29fa07686becd4c61f92f6356bbfcda333bbc060f97e58401d6d19da62cb/ray-2.46.0-cp313-cp313-manylinux2014_aarch64.whl", hash = "sha256:808daece1f12bd8924b9c6382a0f98da6f5c6886cfb271ed8d89407a89413cd5", size = 67531710, upload-time = "2025-05-07T21:06:33.387Z" },
-    { url = "https://files.pythonhosted.org/packages/b2/99/78fa9cb52d4d396af51400c249978881b9bb6febd9dd462c082a398de697/ray-2.46.0-cp313-cp313-manylinux2014_x86_64.whl", hash = "sha256:a5a28c0a311d2c3221dcf729c40898a6df82466bb5af21e81be0453e09856adf", size = 68471790, upload-time = "2025-05-07T21:06:41.73Z" },
+    { url = "https://files.pythonhosted.org/packages/01/66/0d4e518d611486244b357a6cf58a31d7d184f5558e03d5e482c335749616/ray-2.49.2-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:d6d612de5c6341b776fc75edeee5b698bb4af7ee84a2ff30552b32a9e6e4a772", size = 66857495, upload-time = "2025-09-19T19:15:31.427Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/4c/76f2c7c0946645fdd8d286a3e00e2c42130d676286de206be5d60d271218/ray-2.49.2-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:6784e076e4418222ef8ee3b6a8bfeb867d8797803b25bcfcce3bf3bc5414bef1", size = 69262599, upload-time = "2025-09-19T19:15:36.732Z" },
+    { url = "https://files.pythonhosted.org/packages/da/99/23b732c0b7b2ee2ffd28bf632257fb98924a03251d251810cb637512fcab/ray-2.49.2-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:dd0d8d8641d142fafe6d83e87d3c19bd5637d21e34608d3ff69ad71ea3e2f462", size = 69287193, upload-time = "2025-09-19T19:15:42.093Z" },
+    { url = "https://files.pythonhosted.org/packages/69/ca/94791be5c3b68ed0df85589a8ca558334818a47bf2978000f85533245aed/ray-2.49.2-cp312-cp312-manylinux2014_x86_64.whl", hash = "sha256:2ecaaa51f588ccdda2b61563a8be3843bf65dfaaa83a240588a307f4ebb82471", size = 70114942, upload-time = "2025-09-19T19:15:47.536Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/22/3f4b77498eefb3152a5946f9f544fcf336e7b9970c5c8af8e2d5eed13f0b/ray-2.49.2-cp312-cp312-win_amd64.whl", hash = "sha256:cba59684f031c9e778c588bc925777967e1b49bab3f00c638e4980bfdab07aec", size = 26223595, upload-time = "2025-09-19T19:15:51.803Z" },
+    { url = "https://files.pythonhosted.org/packages/99/dc/a7e569bf7030e0ec50163aed731189e744ca857d74f51b24361ce426697a/ray-2.49.2-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:2e2fe20fa90562e73630da9ff7932d3ed6507e73291c4d9bdf566537ae9deddf", size = 66803846, upload-time = "2025-09-19T19:15:56.928Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/cf/6667e01f39cd28637f082273e9147f16d5f8fff34e2fb0ca60cc5da76e22/ray-2.49.2-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:b2f4f0fed936faf688e87ffdcc9356c034513c00259a2f1a8589e345fcfbdbc0", size = 69208426, upload-time = "2025-09-19T19:16:02.085Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/84/5361bcdc9c9fb9f4abbf836801803b7df75c76c16a56493413eb154b8a34/ray-2.49.2-cp313-cp313-manylinux2014_aarch64.whl", hash = "sha256:b4c7869688c518e902f7b6288edec2365ab4d28a464291e6d0a7040c7d01b5f7", size = 69198140, upload-time = "2025-09-19T19:16:07.413Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/0c/9e49c3da7502f18483e4deb3273a3104d501c5e9cf1664a136b8ea36df48/ray-2.49.2-cp313-cp313-manylinux2014_x86_64.whl", hash = "sha256:b7d8214cff86df044fec727eeeabccc3bfc9b0271d28d61ba92c09f0d127d01d", size = 70027331, upload-time = "2025-09-19T19:16:12.968Z" },
 ]
 
 [package.optional-dependencies]
@@ -4135,6 +5134,9 @@ default = [
     { name = "colorful" },
     { name = "grpcio" },
     { name = "opencensus" },
+    { name = "opentelemetry-exporter-prometheus" },
+    { name = "opentelemetry-proto" },
+    { name = "opentelemetry-sdk" },
     { name = "prometheus-client" },
     { name = "py-spy" },
     { name = "pydantic" },
@@ -4159,45 +5161,57 @@ wheels = [
 
 [[package]]
 name = "regex"
-version = "2024.11.6"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/8e/5f/bd69653fbfb76cf8604468d3b4ec4c403197144c7bfe0e6a5fc9e02a07cb/regex-2024.11.6.tar.gz", hash = "sha256:7ab159b063c52a0333c884e4679f8d7a85112ee3078fe3d9004b2dd875585519", size = 399494, upload-time = "2024-11-06T20:12:31.635Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/ba/30/9a87ce8336b172cc232a0db89a3af97929d06c11ceaa19d97d84fa90a8f8/regex-2024.11.6-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:52fb28f528778f184f870b7cf8f225f5eef0a8f6e3778529bdd40c7b3920796a", size = 483781, upload-time = "2024-11-06T20:10:07.07Z" },
-    { url = "https://files.pythonhosted.org/packages/01/e8/00008ad4ff4be8b1844786ba6636035f7ef926db5686e4c0f98093612add/regex-2024.11.6-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:fdd6028445d2460f33136c55eeb1f601ab06d74cb3347132e1c24250187500d9", size = 288455, upload-time = "2024-11-06T20:10:09.117Z" },
-    { url = "https://files.pythonhosted.org/packages/60/85/cebcc0aff603ea0a201667b203f13ba75d9fc8668fab917ac5b2de3967bc/regex-2024.11.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:805e6b60c54bf766b251e94526ebad60b7de0c70f70a4e6210ee2891acb70bf2", size = 284759, upload-time = "2024-11-06T20:10:11.155Z" },
-    { url = "https://files.pythonhosted.org/packages/94/2b/701a4b0585cb05472a4da28ee28fdfe155f3638f5e1ec92306d924e5faf0/regex-2024.11.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b85c2530be953a890eaffde05485238f07029600e8f098cdf1848d414a8b45e4", size = 794976, upload-time = "2024-11-06T20:10:13.24Z" },
-    { url = "https://files.pythonhosted.org/packages/4b/bf/fa87e563bf5fee75db8915f7352e1887b1249126a1be4813837f5dbec965/regex-2024.11.6-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bb26437975da7dc36b7efad18aa9dd4ea569d2357ae6b783bf1118dabd9ea577", size = 833077, upload-time = "2024-11-06T20:10:15.37Z" },
-    { url = "https://files.pythonhosted.org/packages/a1/56/7295e6bad94b047f4d0834e4779491b81216583c00c288252ef625c01d23/regex-2024.11.6-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:abfa5080c374a76a251ba60683242bc17eeb2c9818d0d30117b4486be10c59d3", size = 823160, upload-time = "2024-11-06T20:10:19.027Z" },
-    { url = "https://files.pythonhosted.org/packages/fb/13/e3b075031a738c9598c51cfbc4c7879e26729c53aa9cca59211c44235314/regex-2024.11.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b7fa6606c2881c1db9479b0eaa11ed5dfa11c8d60a474ff0e095099f39d98e", size = 796896, upload-time = "2024-11-06T20:10:21.85Z" },
-    { url = "https://files.pythonhosted.org/packages/24/56/0b3f1b66d592be6efec23a795b37732682520b47c53da5a32c33ed7d84e3/regex-2024.11.6-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0c32f75920cf99fe6b6c539c399a4a128452eaf1af27f39bce8909c9a3fd8cbe", size = 783997, upload-time = "2024-11-06T20:10:24.329Z" },
-    { url = "https://files.pythonhosted.org/packages/f9/a1/eb378dada8b91c0e4c5f08ffb56f25fcae47bf52ad18f9b2f33b83e6d498/regex-2024.11.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:982e6d21414e78e1f51cf595d7f321dcd14de1f2881c5dc6a6e23bbbbd68435e", size = 781725, upload-time = "2024-11-06T20:10:28.067Z" },
-    { url = "https://files.pythonhosted.org/packages/83/f2/033e7dec0cfd6dda93390089864732a3409246ffe8b042e9554afa9bff4e/regex-2024.11.6-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a7c2155f790e2fb448faed6dd241386719802296ec588a8b9051c1f5c481bc29", size = 789481, upload-time = "2024-11-06T20:10:31.612Z" },
-    { url = "https://files.pythonhosted.org/packages/83/23/15d4552ea28990a74e7696780c438aadd73a20318c47e527b47a4a5a596d/regex-2024.11.6-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:149f5008d286636e48cd0b1dd65018548944e495b0265b45e1bffecce1ef7f39", size = 852896, upload-time = "2024-11-06T20:10:34.054Z" },
-    { url = "https://files.pythonhosted.org/packages/e3/39/ed4416bc90deedbfdada2568b2cb0bc1fdb98efe11f5378d9892b2a88f8f/regex-2024.11.6-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:e5364a4502efca094731680e80009632ad6624084aff9a23ce8c8c6820de3e51", size = 860138, upload-time = "2024-11-06T20:10:36.142Z" },
-    { url = "https://files.pythonhosted.org/packages/93/2d/dd56bb76bd8e95bbce684326302f287455b56242a4f9c61f1bc76e28360e/regex-2024.11.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:0a86e7eeca091c09e021db8eb72d54751e527fa47b8d5787caf96d9831bd02ad", size = 787692, upload-time = "2024-11-06T20:10:38.394Z" },
-    { url = "https://files.pythonhosted.org/packages/0b/55/31877a249ab7a5156758246b9c59539abbeba22461b7d8adc9e8475ff73e/regex-2024.11.6-cp312-cp312-win32.whl", hash = "sha256:32f9a4c643baad4efa81d549c2aadefaeba12249b2adc5af541759237eee1c54", size = 262135, upload-time = "2024-11-06T20:10:40.367Z" },
-    { url = "https://files.pythonhosted.org/packages/38/ec/ad2d7de49a600cdb8dd78434a1aeffe28b9d6fc42eb36afab4a27ad23384/regex-2024.11.6-cp312-cp312-win_amd64.whl", hash = "sha256:a93c194e2df18f7d264092dc8539b8ffb86b45b899ab976aa15d48214138e81b", size = 273567, upload-time = "2024-11-06T20:10:43.467Z" },
-    { url = "https://files.pythonhosted.org/packages/90/73/bcb0e36614601016552fa9344544a3a2ae1809dc1401b100eab02e772e1f/regex-2024.11.6-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a6ba92c0bcdf96cbf43a12c717eae4bc98325ca3730f6b130ffa2e3c3c723d84", size = 483525, upload-time = "2024-11-06T20:10:45.19Z" },
-    { url = "https://files.pythonhosted.org/packages/0f/3f/f1a082a46b31e25291d830b369b6b0c5576a6f7fb89d3053a354c24b8a83/regex-2024.11.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:525eab0b789891ac3be914d36893bdf972d483fe66551f79d3e27146191a37d4", size = 288324, upload-time = "2024-11-06T20:10:47.177Z" },
-    { url = "https://files.pythonhosted.org/packages/09/c9/4e68181a4a652fb3ef5099e077faf4fd2a694ea6e0f806a7737aff9e758a/regex-2024.11.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:086a27a0b4ca227941700e0b31425e7a28ef1ae8e5e05a33826e17e47fbfdba0", size = 284617, upload-time = "2024-11-06T20:10:49.312Z" },
-    { url = "https://files.pythonhosted.org/packages/fc/fd/37868b75eaf63843165f1d2122ca6cb94bfc0271e4428cf58c0616786dce/regex-2024.11.6-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bde01f35767c4a7899b7eb6e823b125a64de314a8ee9791367c9a34d56af18d0", size = 795023, upload-time = "2024-11-06T20:10:51.102Z" },
-    { url = "https://files.pythonhosted.org/packages/c4/7c/d4cd9c528502a3dedb5c13c146e7a7a539a3853dc20209c8e75d9ba9d1b2/regex-2024.11.6-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b583904576650166b3d920d2bcce13971f6f9e9a396c673187f49811b2769dc7", size = 833072, upload-time = "2024-11-06T20:10:52.926Z" },
-    { url = "https://files.pythonhosted.org/packages/4f/db/46f563a08f969159c5a0f0e722260568425363bea43bb7ae370becb66a67/regex-2024.11.6-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1c4de13f06a0d54fa0d5ab1b7138bfa0d883220965a29616e3ea61b35d5f5fc7", size = 823130, upload-time = "2024-11-06T20:10:54.828Z" },
-    { url = "https://files.pythonhosted.org/packages/db/60/1eeca2074f5b87df394fccaa432ae3fc06c9c9bfa97c5051aed70e6e00c2/regex-2024.11.6-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3cde6e9f2580eb1665965ce9bf17ff4952f34f5b126beb509fee8f4e994f143c", size = 796857, upload-time = "2024-11-06T20:10:56.634Z" },
-    { url = "https://files.pythonhosted.org/packages/10/db/ac718a08fcee981554d2f7bb8402f1faa7e868c1345c16ab1ebec54b0d7b/regex-2024.11.6-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0d7f453dca13f40a02b79636a339c5b62b670141e63efd511d3f8f73fba162b3", size = 784006, upload-time = "2024-11-06T20:10:59.369Z" },
-    { url = "https://files.pythonhosted.org/packages/c2/41/7da3fe70216cea93144bf12da2b87367590bcf07db97604edeea55dac9ad/regex-2024.11.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:59dfe1ed21aea057a65c6b586afd2a945de04fc7db3de0a6e3ed5397ad491b07", size = 781650, upload-time = "2024-11-06T20:11:02.042Z" },
-    { url = "https://files.pythonhosted.org/packages/a7/d5/880921ee4eec393a4752e6ab9f0fe28009435417c3102fc413f3fe81c4e5/regex-2024.11.6-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b97c1e0bd37c5cd7902e65f410779d39eeda155800b65fc4d04cc432efa9bc6e", size = 789545, upload-time = "2024-11-06T20:11:03.933Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/96/53770115e507081122beca8899ab7f5ae28ae790bfcc82b5e38976df6a77/regex-2024.11.6-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f9d1e379028e0fc2ae3654bac3cbbef81bf3fd571272a42d56c24007979bafb6", size = 853045, upload-time = "2024-11-06T20:11:06.497Z" },
-    { url = "https://files.pythonhosted.org/packages/31/d3/1372add5251cc2d44b451bd94f43b2ec78e15a6e82bff6a290ef9fd8f00a/regex-2024.11.6-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:13291b39131e2d002a7940fb176e120bec5145f3aeb7621be6534e46251912c4", size = 860182, upload-time = "2024-11-06T20:11:09.06Z" },
-    { url = "https://files.pythonhosted.org/packages/ed/e3/c446a64984ea9f69982ba1a69d4658d5014bc7a0ea468a07e1a1265db6e2/regex-2024.11.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4f51f88c126370dcec4908576c5a627220da6c09d0bff31cfa89f2523843316d", size = 787733, upload-time = "2024-11-06T20:11:11.256Z" },
-    { url = "https://files.pythonhosted.org/packages/2b/f1/e40c8373e3480e4f29f2692bd21b3e05f296d3afebc7e5dcf21b9756ca1c/regex-2024.11.6-cp313-cp313-win32.whl", hash = "sha256:63b13cfd72e9601125027202cad74995ab26921d8cd935c25f09c630436348ff", size = 262122, upload-time = "2024-11-06T20:11:13.161Z" },
-    { url = "https://files.pythonhosted.org/packages/45/94/bc295babb3062a731f52621cdc992d123111282e291abaf23faa413443ea/regex-2024.11.6-cp313-cp313-win_amd64.whl", hash = "sha256:2b3361af3198667e99927da8b84c1b010752fa4b1115ee30beaa332cabc3ef1a", size = 273545, upload-time = "2024-11-06T20:11:15Z" },
+version = "2025.7.34"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/0b/de/e13fa6dc61d78b30ba47481f99933a3b49a57779d625c392d8036770a60d/regex-2025.7.34.tar.gz", hash = "sha256:9ead9765217afd04a86822dfcd4ed2747dfe426e887da413b15ff0ac2457e21a", size = 400714, upload-time = "2025-07-31T00:21:16.262Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ff/f0/31d62596c75a33f979317658e8d261574785c6cd8672c06741ce2e2e2070/regex-2025.7.34-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:7f7211a746aced993bef487de69307a38c5ddd79257d7be83f7b202cb59ddb50", size = 485492, upload-time = "2025-07-31T00:19:35.57Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/16/b818d223f1c9758c3434be89aa1a01aae798e0e0df36c1f143d1963dd1ee/regex-2025.7.34-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:fb31080f2bd0681484b275461b202b5ad182f52c9ec606052020fe13eb13a72f", size = 290000, upload-time = "2025-07-31T00:19:37.175Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/70/69506d53397b4bd6954061bae75677ad34deb7f6ca3ba199660d6f728ff5/regex-2025.7.34-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0200a5150c4cf61e407038f4b4d5cdad13e86345dac29ff9dab3d75d905cf130", size = 286072, upload-time = "2025-07-31T00:19:38.612Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/73/536a216d5f66084fb577bb0543b5cb7de3272eb70a157f0c3a542f1c2551/regex-2025.7.34-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:739a74970e736df0773788377969c9fea3876c2fc13d0563f98e5503e5185f46", size = 797341, upload-time = "2025-07-31T00:19:40.119Z" },
+    { url = "https://files.pythonhosted.org/packages/26/af/733f8168449e56e8f404bb807ea7189f59507cbea1b67a7bbcd92f8bf844/regex-2025.7.34-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:4fef81b2f7ea6a2029161ed6dea9ae13834c28eb5a95b8771828194a026621e4", size = 862556, upload-time = "2025-07-31T00:19:41.556Z" },
+    { url = "https://files.pythonhosted.org/packages/19/dd/59c464d58c06c4f7d87de4ab1f590e430821345a40c5d345d449a636d15f/regex-2025.7.34-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ea74cf81fe61a7e9d77989050d0089a927ab758c29dac4e8e1b6c06fccf3ebf0", size = 910762, upload-time = "2025-07-31T00:19:43Z" },
+    { url = "https://files.pythonhosted.org/packages/37/a8/b05ccf33ceca0815a1e253693b2c86544932ebcc0049c16b0fbdf18b688b/regex-2025.7.34-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e4636a7f3b65a5f340ed9ddf53585c42e3ff37101d383ed321bfe5660481744b", size = 801892, upload-time = "2025-07-31T00:19:44.645Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/9a/b993cb2e634cc22810afd1652dba0cae156c40d4864285ff486c73cd1996/regex-2025.7.34-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6cef962d7834437fe8d3da6f9bfc6f93f20f218266dcefec0560ed7765f5fe01", size = 786551, upload-time = "2025-07-31T00:19:46.127Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/79/7849d67910a0de4e26834b5bb816e028e35473f3d7ae563552ea04f58ca2/regex-2025.7.34-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:cbe1698e5b80298dbce8df4d8d1182279fbdaf1044e864cbc9d53c20e4a2be77", size = 856457, upload-time = "2025-07-31T00:19:47.562Z" },
+    { url = "https://files.pythonhosted.org/packages/91/c6/de516bc082524b27e45cb4f54e28bd800c01efb26d15646a65b87b13a91e/regex-2025.7.34-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:32b9f9bcf0f605eb094b08e8da72e44badabb63dde6b83bd530580b488d1c6da", size = 848902, upload-time = "2025-07-31T00:19:49.312Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/22/519ff8ba15f732db099b126f039586bd372da6cd4efb810d5d66a5daeda1/regex-2025.7.34-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:524c868ba527eab4e8744a9287809579f54ae8c62fbf07d62aacd89f6026b282", size = 788038, upload-time = "2025-07-31T00:19:50.794Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/7d/aabb467d8f57d8149895d133c88eb809a1a6a0fe262c1d508eb9dfabb6f9/regex-2025.7.34-cp312-cp312-win32.whl", hash = "sha256:d600e58ee6d036081c89696d2bdd55d507498a7180df2e19945c6642fac59588", size = 264417, upload-time = "2025-07-31T00:19:52.292Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/39/bd922b55a4fc5ad5c13753274e5b536f5b06ec8eb9747675668491c7ab7a/regex-2025.7.34-cp312-cp312-win_amd64.whl", hash = "sha256:9a9ab52a466a9b4b91564437b36417b76033e8778e5af8f36be835d8cb370d62", size = 275387, upload-time = "2025-07-31T00:19:53.593Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/3c/c61d2fdcecb754a40475a3d1ef9a000911d3e3fc75c096acf44b0dfb786a/regex-2025.7.34-cp312-cp312-win_arm64.whl", hash = "sha256:c83aec91af9c6fbf7c743274fd952272403ad9a9db05fe9bfc9df8d12b45f176", size = 268482, upload-time = "2025-07-31T00:19:55.183Z" },
+    { url = "https://files.pythonhosted.org/packages/15/16/b709b2119975035169a25aa8e4940ca177b1a2e25e14f8d996d09130368e/regex-2025.7.34-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:c3c9740a77aeef3f5e3aaab92403946a8d34437db930a0280e7e81ddcada61f5", size = 485334, upload-time = "2025-07-31T00:19:56.58Z" },
+    { url = "https://files.pythonhosted.org/packages/94/a6/c09136046be0595f0331bc58a0e5f89c2d324cf734e0b0ec53cf4b12a636/regex-2025.7.34-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:69ed3bc611540f2ea70a4080f853741ec698be556b1df404599f8724690edbcd", size = 289942, upload-time = "2025-07-31T00:19:57.943Z" },
+    { url = "https://files.pythonhosted.org/packages/36/91/08fc0fd0f40bdfb0e0df4134ee37cfb16e66a1044ac56d36911fd01c69d2/regex-2025.7.34-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d03c6f9dcd562c56527c42b8530aad93193e0b3254a588be1f2ed378cdfdea1b", size = 285991, upload-time = "2025-07-31T00:19:59.837Z" },
+    { url = "https://files.pythonhosted.org/packages/be/2f/99dc8f6f756606f0c214d14c7b6c17270b6bbe26d5c1f05cde9dbb1c551f/regex-2025.7.34-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6164b1d99dee1dfad33f301f174d8139d4368a9fb50bf0a3603b2eaf579963ad", size = 797415, upload-time = "2025-07-31T00:20:01.668Z" },
+    { url = "https://files.pythonhosted.org/packages/62/cf/2fcdca1110495458ba4e95c52ce73b361cf1cafd8a53b5c31542cde9a15b/regex-2025.7.34-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:1e4f4f62599b8142362f164ce776f19d79bdd21273e86920a7b604a4275b4f59", size = 862487, upload-time = "2025-07-31T00:20:03.142Z" },
+    { url = "https://files.pythonhosted.org/packages/90/38/899105dd27fed394e3fae45607c1983e138273ec167e47882fc401f112b9/regex-2025.7.34-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:72a26dcc6a59c057b292f39d41465d8233a10fd69121fa24f8f43ec6294e5415", size = 910717, upload-time = "2025-07-31T00:20:04.727Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/f6/4716198dbd0bcc9c45625ac4c81a435d1c4d8ad662e8576dac06bab35b17/regex-2025.7.34-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d5273fddf7a3e602695c92716c420c377599ed3c853ea669c1fe26218867002f", size = 801943, upload-time = "2025-07-31T00:20:07.1Z" },
+    { url = "https://files.pythonhosted.org/packages/40/5d/cff8896d27e4e3dd11dd72ac78797c7987eb50fe4debc2c0f2f1682eb06d/regex-2025.7.34-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c1844be23cd40135b3a5a4dd298e1e0c0cb36757364dd6cdc6025770363e06c1", size = 786664, upload-time = "2025-07-31T00:20:08.818Z" },
+    { url = "https://files.pythonhosted.org/packages/10/29/758bf83cf7b4c34f07ac3423ea03cee3eb3176941641e4ccc05620f6c0b8/regex-2025.7.34-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:dde35e2afbbe2272f8abee3b9fe6772d9b5a07d82607b5788e8508974059925c", size = 856457, upload-time = "2025-07-31T00:20:10.328Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/30/c19d212b619963c5b460bfed0ea69a092c6a43cba52a973d46c27b3e2975/regex-2025.7.34-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:f3f6e8e7af516a7549412ce57613e859c3be27d55341a894aacaa11703a4c31a", size = 849008, upload-time = "2025-07-31T00:20:11.823Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/b8/3c35da3b12c87e3cc00010ef6c3a4ae787cff0bc381aa3d251def219969a/regex-2025.7.34-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:469142fb94a869beb25b5f18ea87646d21def10fbacb0bcb749224f3509476f0", size = 788101, upload-time = "2025-07-31T00:20:13.729Z" },
+    { url = "https://files.pythonhosted.org/packages/47/80/2f46677c0b3c2b723b2c358d19f9346e714113865da0f5f736ca1a883bde/regex-2025.7.34-cp313-cp313-win32.whl", hash = "sha256:da7507d083ee33ccea1310447410c27ca11fb9ef18c95899ca57ff60a7e4d8f1", size = 264401, upload-time = "2025-07-31T00:20:15.233Z" },
+    { url = "https://files.pythonhosted.org/packages/be/fa/917d64dd074682606a003cba33585c28138c77d848ef72fc77cbb1183849/regex-2025.7.34-cp313-cp313-win_amd64.whl", hash = "sha256:9d644de5520441e5f7e2db63aec2748948cc39ed4d7a87fd5db578ea4043d997", size = 275368, upload-time = "2025-07-31T00:20:16.711Z" },
+    { url = "https://files.pythonhosted.org/packages/65/cd/f94383666704170a2154a5df7b16be28f0c27a266bffcd843e58bc84120f/regex-2025.7.34-cp313-cp313-win_arm64.whl", hash = "sha256:7bf1c5503a9f2cbd2f52d7e260acb3131b07b6273c470abb78568174fe6bde3f", size = 268482, upload-time = "2025-07-31T00:20:18.189Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/23/6376f3a23cf2f3c00514b1cdd8c990afb4dfbac3cb4a68b633c6b7e2e307/regex-2025.7.34-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:8283afe7042d8270cecf27cca558873168e771183d4d593e3c5fe5f12402212a", size = 485385, upload-time = "2025-07-31T00:20:19.692Z" },
+    { url = "https://files.pythonhosted.org/packages/73/5b/6d4d3a0b4d312adbfd6d5694c8dddcf1396708976dd87e4d00af439d962b/regex-2025.7.34-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:6c053f9647e3421dd2f5dff8172eb7b4eec129df9d1d2f7133a4386319b47435", size = 289788, upload-time = "2025-07-31T00:20:21.941Z" },
+    { url = "https://files.pythonhosted.org/packages/92/71/5862ac9913746e5054d01cb9fb8125b3d0802c0706ef547cae1e7f4428fa/regex-2025.7.34-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:a16dd56bbcb7d10e62861c3cd000290ddff28ea142ffb5eb3470f183628011ac", size = 286136, upload-time = "2025-07-31T00:20:26.146Z" },
+    { url = "https://files.pythonhosted.org/packages/27/df/5b505dc447eb71278eba10d5ec940769ca89c1af70f0468bfbcb98035dc2/regex-2025.7.34-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:69c593ff5a24c0d5c1112b0df9b09eae42b33c014bdca7022d6523b210b69f72", size = 797753, upload-time = "2025-07-31T00:20:27.919Z" },
+    { url = "https://files.pythonhosted.org/packages/86/38/3e3dc953d13998fa047e9a2414b556201dbd7147034fbac129392363253b/regex-2025.7.34-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:98d0ce170fcde1a03b5df19c5650db22ab58af375aaa6ff07978a85c9f250f0e", size = 863263, upload-time = "2025-07-31T00:20:29.803Z" },
+    { url = "https://files.pythonhosted.org/packages/68/e5/3ff66b29dde12f5b874dda2d9dec7245c2051f2528d8c2a797901497f140/regex-2025.7.34-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d72765a4bff8c43711d5b0f5b452991a9947853dfa471972169b3cc0ba1d0751", size = 910103, upload-time = "2025-07-31T00:20:31.313Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/fe/14176f2182125977fba3711adea73f472a11f3f9288c1317c59cd16ad5e6/regex-2025.7.34-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4494f8fd95a77eb434039ad8460e64d57baa0434f1395b7da44015bef650d0e4", size = 801709, upload-time = "2025-07-31T00:20:33.323Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/0d/80d4e66ed24f1ba876a9e8e31b709f9fd22d5c266bf5f3ab3c1afe683d7d/regex-2025.7.34-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:4f42b522259c66e918a0121a12429b2abcf696c6f967fa37bdc7b72e61469f98", size = 786726, upload-time = "2025-07-31T00:20:35.252Z" },
+    { url = "https://files.pythonhosted.org/packages/12/75/c3ebb30e04a56c046f5c85179dc173818551037daae2c0c940c7b19152cb/regex-2025.7.34-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:aaef1f056d96a0a5d53ad47d019d5b4c66fe4be2da87016e0d43b7242599ffc7", size = 857306, upload-time = "2025-07-31T00:20:37.12Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/b2/a4dc5d8b14f90924f27f0ac4c4c4f5e195b723be98adecc884f6716614b6/regex-2025.7.34-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:656433e5b7dccc9bc0da6312da8eb897b81f5e560321ec413500e5367fcd5d47", size = 848494, upload-time = "2025-07-31T00:20:38.818Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/21/9ac6e07a4c5e8646a90b56b61f7e9dac11ae0747c857f91d3d2bc7c241d9/regex-2025.7.34-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e91eb2c62c39705e17b4d42d4b86c4e86c884c0d15d9c5a47d0835f8387add8e", size = 787850, upload-time = "2025-07-31T00:20:40.478Z" },
+    { url = "https://files.pythonhosted.org/packages/be/6c/d51204e28e7bc54f9a03bb799b04730d7e54ff2718862b8d4e09e7110a6a/regex-2025.7.34-cp314-cp314-win32.whl", hash = "sha256:f978ddfb6216028c8f1d6b0f7ef779949498b64117fc35a939022f67f810bdcb", size = 269730, upload-time = "2025-07-31T00:20:42.253Z" },
+    { url = "https://files.pythonhosted.org/packages/74/52/a7e92d02fa1fdef59d113098cb9f02c5d03289a0e9f9e5d4d6acccd10677/regex-2025.7.34-cp314-cp314-win_amd64.whl", hash = "sha256:4b7dc33b9b48fb37ead12ffc7bdb846ac72f99a80373c4da48f64b373a7abeae", size = 278640, upload-time = "2025-07-31T00:20:44.42Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/78/a815529b559b1771080faa90c3ab401730661f99d495ab0071649f139ebd/regex-2025.7.34-cp314-cp314-win_arm64.whl", hash = "sha256:4b8c4d39f451e64809912c82392933d80fe2e4a87eeef8859fcc5380d0173c64", size = 271757, upload-time = "2025-07-31T00:20:46.355Z" },
 ]
 
 [[package]]
 name = "requests"
-version = "2.32.4"
+version = "2.32.5"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "certifi" },
@@ -4205,36 +5219,82 @@ dependencies = [
     { name = "idna" },
     { name = "urllib3" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/e1/0a/929373653770d8a0d7ea76c37de6e41f11eb07559b103b1c02cafb3f7cf8/requests-2.32.4.tar.gz", hash = "sha256:27d0316682c8a29834d3264820024b62a36942083d52caf2f14c0591336d3422", size = 135258, upload-time = "2025-06-09T16:43:07.34Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/c9/74/b3ff8e6c8446842c3f5c837e9c3dfcfe2018ea6ecef224c710c85ef728f4/requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf", size = 134517, upload-time = "2025-08-18T20:46:02.573Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/7c/e4/56027c4a6b4ae70ca9de302488c5ca95ad4a39e190093d6c1a8ace08341b/requests-2.32.4-py3-none-any.whl", hash = "sha256:27babd3cda2a6d50b30443204ee89830707d396671944c998b5975b031ac2b2c", size = 64847, upload-time = "2025-06-09T16:43:05.728Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload-time = "2025-08-18T20:46:00.542Z" },
 ]
 
 [[package]]
 name = "rich"
-version = "14.0.0"
+version = "13.9.4"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "markdown-it-py" },
     { name = "pygments" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/a1/53/830aa4c3066a8ab0ae9a9955976fb770fe9c6102117c8ec4ab3ea62d89e8/rich-14.0.0.tar.gz", hash = "sha256:82f1bc23a6a21ebca4ae0c45af9bdbc492ed20231dcb63f297d6d1021a9d5725", size = 224078, upload-time = "2025-03-30T14:15:14.23Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/ab/3a/0316b28d0761c6734d6bc14e770d85506c986c85ffb239e688eeaab2c2bc/rich-13.9.4.tar.gz", hash = "sha256:439594978a49a09530cff7ebc4b5c7103ef57baf48d5ea3184f21d9a2befa098", size = 223149, upload-time = "2024-11-01T16:43:57.873Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/0d/9b/63f4c7ebc259242c89b3acafdb37b41d1185c07ff0011164674e9076b491/rich-14.0.0-py3-none-any.whl", hash = "sha256:1c9491e1951aac09caffd42f448ee3d04e58923ffe14993f6e83068dc395d7e0", size = 243229, upload-time = "2025-03-30T14:15:12.283Z" },
+    { url = "https://files.pythonhosted.org/packages/19/71/39c7c0d87f8d4e6c020a393182060eaefeeae6c01dab6a84ec346f2567df/rich-13.9.4-py3-none-any.whl", hash = "sha256:6049d5e6ec054bf2779ab3358186963bac2ea89175919d699e378b99738c2a90", size = 242424, upload-time = "2024-11-01T16:43:55.817Z" },
 ]
 
 [[package]]
 name = "rich-toolkit"
-version = "0.14.7"
+version = "0.15.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "click" },
     { name = "rich" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/5b/7a/cb48b7024b247631ce39b1f14a0f1abedf311fb27b892b0e0387d809d4b5/rich_toolkit-0.14.7.tar.gz", hash = "sha256:6cca5a68850cc5778915f528eb785662c27ba3b4b2624612cce8340fa9701c5e", size = 104977, upload-time = "2025-05-27T15:48:09.377Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/65/36/cdb3d51371ad0cccbf1541506304783bd72d55790709b8eb68c0d401a13a/rich_toolkit-0.15.0.tar.gz", hash = "sha256:3f5730e9f2d36d0bfe01cf723948b7ecf4cc355d2b71e2c00e094f7963128c09", size = 115118, upload-time = "2025-08-11T10:55:37.909Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/0f/2e/95fde5b818dac9a37683ea064096323f593442d0f6358923c5f635974393/rich_toolkit-0.14.7-py3-none-any.whl", hash = "sha256:def05cc6e0f1176d6263b6a26648f16a62c4563b277ca2f8538683acdba1e0da", size = 24870, upload-time = "2025-05-27T15:48:07.942Z" },
+    { url = "https://files.pythonhosted.org/packages/75/e4/b0794eefb3cf78566b15e5bf576492c1d4a92ce5f6da55675bc11e9ef5d8/rich_toolkit-0.15.0-py3-none-any.whl", hash = "sha256:ddb91008283d4a7989fd8ff0324a48773a7a2276229c6a3070755645538ef1bb", size = 29062, upload-time = "2025-08-11T10:55:37.152Z" },
+]
+
+[[package]]
+name = "rignore"
+version = "0.6.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/73/46/05a94dc55ac03cf931d18e43b86ecee5ee054cb88b7853fffd741e35009c/rignore-0.6.4.tar.gz", hash = "sha256:e893fdd2d7fdcfa9407d0b7600ef2c2e2df97f55e1c45d4a8f54364829ddb0ab", size = 11633, upload-time = "2025-07-19T19:24:46.219Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ec/6c/e5af4383cdd7829ef9aa63ac82a6507983e02dbc7c2e7b9aa64b7b8e2c7a/rignore-0.6.4-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:74720d074b79f32449d5d212ce732e0144a294a184246d1f1e7bcc1fc5c83b69", size = 885885, upload-time = "2025-07-19T19:23:53.236Z" },
+    { url = "https://files.pythonhosted.org/packages/89/3e/1b02a868830e464769aa417ee195ac352fe71ff818df8ce50c4b998edb9c/rignore-0.6.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0a8184fcf567bd6b6d7b85a0c138d98dd40f63054141c96b175844414c5530d7", size = 819736, upload-time = "2025-07-19T19:23:46.565Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/75/b9be0c523d97c09f3c6508a67ce376aba4efe41c333c58903a0d7366439a/rignore-0.6.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bcb0d7d7ecc3fbccf6477bb187c04a091579ea139f15f139abe0b3b48bdfef69", size = 892779, upload-time = "2025-07-19T19:22:35.167Z" },
+    { url = "https://files.pythonhosted.org/packages/91/f4/3064b06233697f2993485d132f06fe95061fef71631485da75aed246c4fd/rignore-0.6.4-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:feac73377a156fb77b3df626c76f7e5893d9b4e9e886ac8c0f9d44f1206a2a91", size = 872116, upload-time = "2025-07-19T19:22:47.828Z" },
+    { url = "https://files.pythonhosted.org/packages/99/94/cb8e7af9a3c0a665f10e2366144e0ebc66167cf846aca5f1ac31b3661598/rignore-0.6.4-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:465179bc30beb1f7a3439e428739a2b5777ed26660712b8c4e351b15a7c04483", size = 1163345, upload-time = "2025-07-19T19:23:00.557Z" },
+    { url = "https://files.pythonhosted.org/packages/86/6b/49faa7ad85ceb6ccef265df40091d9992232d7f6055fa664fe0a8b13781c/rignore-0.6.4-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4a4877b4dca9cf31a4d09845b300c677c86267657540d0b4d3e6d0ce3110e6e9", size = 939967, upload-time = "2025-07-19T19:23:13.494Z" },
+    { url = "https://files.pythonhosted.org/packages/80/c8/b91afda10bd5ca1e3a80463340b899c0dc26a7750a9f3c94f668585c7f40/rignore-0.6.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:456456802b1e77d1e2d149320ee32505b8183e309e228129950b807d204ddd17", size = 949717, upload-time = "2025-07-19T19:23:36.404Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/f1/88bfdde58ae3fb1c1a92bb801f492eea8eafcdaf05ab9b75130023a4670b/rignore-0.6.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4c1ff2fc223f1d9473d36923160af37bf765548578eb9d47a2f52e90da8ae408", size = 975534, upload-time = "2025-07-19T19:23:25.988Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/8f/a80b4a2e48ceba56ba19e096d41263d844757e10aa36ede212571b5d8117/rignore-0.6.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e445fbc214ae18e0e644a78086ea5d0f579e210229a4fbe86367d11a4cd03c11", size = 1067837, upload-time = "2025-07-19T19:23:59.888Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/90/0905597af0e78748909ef58418442a480ddd93e9fc89b0ca9ab170c357c0/rignore-0.6.4-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:e07d9c5270fc869bc431aadcfb6ed0447f89b8aafaa666914c077435dc76a123", size = 1134959, upload-time = "2025-07-19T19:24:12.396Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/7d/0fa29adf9183b61947ce6dc8a1a9779a8ea16573f557be28ec893f6ddbaa/rignore-0.6.4-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:7a6ccc0ea83d2c0c6df6b166f2acacedcc220a516436490f41e99a5ae73b6019", size = 1109708, upload-time = "2025-07-19T19:24:24.176Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/a7/92892ed86b2e36da403dd3a0187829f2d880414cef75bd612bfdf4dedebc/rignore-0.6.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:536392c5ec91755db48389546c833c4ab1426fe03e5a8522992b54ef8a244e7e", size = 1120546, upload-time = "2025-07-19T19:24:36.377Z" },
+    { url = "https://files.pythonhosted.org/packages/31/1b/d29ae1fe901d523741d6d1d3ffe0d630734dd0ed6b047628a69c1e15ea44/rignore-0.6.4-cp312-cp312-win32.whl", hash = "sha256:f5f9dca46fc41c0a1e236767f68be9d63bdd2726db13a0ae3a30f68414472969", size = 642005, upload-time = "2025-07-19T19:24:56.671Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/41/a224944824688995374e4525115ce85fecd82442fc85edd5bcd81f4f256d/rignore-0.6.4-cp312-cp312-win_amd64.whl", hash = "sha256:e02eecb9e1b9f9bf7c9030ae73308a777bed3b2486204cc74dfcfbe699ab1497", size = 720358, upload-time = "2025-07-19T19:24:49.959Z" },
+    { url = "https://files.pythonhosted.org/packages/db/a3/edd7d0d5cc0720de132b6651cef95ee080ce5fca11c77d8a47db848e5f90/rignore-0.6.4-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:2b3b1e266ce45189240d14dfa1057f8013ea34b9bc8b3b44125ec8d25fdb3985", size = 885304, upload-time = "2025-07-19T19:23:54.268Z" },
+    { url = "https://files.pythonhosted.org/packages/93/a1/d8d2fb97a6548307507d049b7e93885d4a0dfa1c907af5983fd9f9362a21/rignore-0.6.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:45fe803628cc14714df10e8d6cdc23950a47eb9eb37dfea9a4779f4c672d2aa0", size = 818799, upload-time = "2025-07-19T19:23:47.544Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/cd/949981fcc180ad5ba7b31c52e78b74b2dea6b7bf744ad4c0c4b212f6da78/rignore-0.6.4-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e439f034277a947a4126e2da79dbb43e33d73d7c09d3d72a927e02f8a16f59aa", size = 892024, upload-time = "2025-07-19T19:22:36.18Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/d3/9042d701a8062d9c88f87760bbc2695ee2c23b3f002d34486b72a85f8efe/rignore-0.6.4-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:84b5121650ae24621154c7bdba8b8970b0739d8146505c9f38e0cda9385d1004", size = 871430, upload-time = "2025-07-19T19:22:49.62Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/50/3370249b984212b7355f3d9241aa6d02e706067c6d194a2614dfbc0f5b27/rignore-0.6.4-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:52b0957b585ab48a445cf8ac1dbc33a272ab060835e583b4f95aa8c67c23fb2b", size = 1160559, upload-time = "2025-07-19T19:23:01.629Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/6f/2ad7f925838091d065524f30a8abda846d1813eee93328febf262b5cda21/rignore-0.6.4-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:50359e0d5287b5e2743bd2f2fbf05df619c8282fd3af12f6628ff97b9675551d", size = 939947, upload-time = "2025-07-19T19:23:14.608Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/01/626ec94d62475ae7ef8b00ef98cea61cbea52a389a666703c97c4673d406/rignore-0.6.4-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:efe18096dcb1596757dfe0b412aab6d32564473ae7ee58dea0a8b4be5b1a2e3b", size = 949471, upload-time = "2025-07-19T19:23:37.521Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/c3/699c4f03b3c46f4b5c02f17a0a339225da65aad547daa5b03001e7c6a382/rignore-0.6.4-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b79c212d9990a273ad91e8d9765e1766ef6ecedd3be65375d786a252762ba385", size = 974912, upload-time = "2025-07-19T19:23:27.13Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/35/04626c12f9f92a9fc789afc2be32838a5d9b23b6fa8b2ad4a8625638d15b/rignore-0.6.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c6ffa7f2a8894c65aa5dc4e8ac8bbdf39a326c0c6589efd27686cfbb48f0197d", size = 1067281, upload-time = "2025-07-19T19:24:01.016Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/9c/8f17baf3b984afea151cb9094716f6f1fb8e8737db97fc6eb6d494bd0780/rignore-0.6.4-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:a63f5720dffc8d8fb0a4d02fafb8370a4031ebf3f99a4e79f334a91e905b7349", size = 1134414, upload-time = "2025-07-19T19:24:13.534Z" },
+    { url = "https://files.pythonhosted.org/packages/10/88/ef84ffa916a96437c12cefcc39d474122da9626d75e3a2ebe09ec5d32f1b/rignore-0.6.4-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ce33982da47ac5dc09d19b04fa8d7c9aa6292fc0bd1ecf33076989faa8886094", size = 1109330, upload-time = "2025-07-19T19:24:25.303Z" },
+    { url = "https://files.pythonhosted.org/packages/27/43/2ada5a2ec03b82e903610a1c483f516f78e47700ee6db9823f739e08b3af/rignore-0.6.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d899621867aa266824fbd9150e298f19d25b93903ef0133c09f70c65a3416eca", size = 1120381, upload-time = "2025-07-19T19:24:37.798Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/99/e7bcc643085131cb14dbea772def72bf1f6fe9037171ebe177c4f228abc8/rignore-0.6.4-cp313-cp313-win32.whl", hash = "sha256:d0615a6bf4890ec5a90b5fb83666822088fbd4e8fcd740c386fcce51e2f6feea", size = 641761, upload-time = "2025-07-19T19:24:58.096Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/25/7798908044f27dea1a8abdc75c14523e33770137651e5f775a15143f4218/rignore-0.6.4-cp313-cp313-win_amd64.whl", hash = "sha256:145177f0e32716dc2f220b07b3cde2385b994b7ea28d5c96fbec32639e9eac6f", size = 719876, upload-time = "2025-07-19T19:24:51.125Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/e3/ae1e30b045bf004ad77bbd1679b9afff2be8edb166520921c6f29420516a/rignore-0.6.4-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e55bf8f9bbd186f58ab646b4a08718c77131d28a9004e477612b0cbbd5202db2", size = 891776, upload-time = "2025-07-19T19:22:37.78Z" },
+    { url = "https://files.pythonhosted.org/packages/45/a9/1193e3bc23ca0e6eb4f17cf4b99971237f97cfa6f241d98366dff90a6d09/rignore-0.6.4-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2521f7bf3ee1f2ab22a100a3a4eed39a97b025804e5afe4323528e9ce8f084a5", size = 871442, upload-time = "2025-07-19T19:22:50.972Z" },
+    { url = "https://files.pythonhosted.org/packages/20/83/4c52ae429a0b2e1ce667e35b480e9a6846f9468c443baeaed5d775af9485/rignore-0.6.4-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0cc35773a8a9c119359ef974d0856988d4601d4daa6f532c05f66b4587cf35bc", size = 1159844, upload-time = "2025-07-19T19:23:02.751Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/2f/c740f5751f464c937bfe252dc15a024ae081352cfe80d94aa16d6a617482/rignore-0.6.4-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b665b1ea14457d7b49e834baabc635a3b8c10cfb5cca5c21161fabdbfc2b850e", size = 939456, upload-time = "2025-07-19T19:23:15.72Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/dd/68dbb08ac0edabf44dd144ff546a3fb0253c5af708e066847df39fc9188f/rignore-0.6.4-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:c7fd339f344a8548724f289495b835bed7b81174a0bc1c28c6497854bd8855db", size = 1067070, upload-time = "2025-07-19T19:24:02.803Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/3a/7e7ea6f0d31d3f5beb0f2cf2c4c362672f5f7f125714458673fc579e2bed/rignore-0.6.4-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:91dc94b1cc5af8d6d25ce6edd29e7351830f19b0a03b75cb3adf1f76d00f3007", size = 1134598, upload-time = "2025-07-19T19:24:15.039Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/06/1b3307f6437d29bede5a95738aa89e6d910ba68d4054175c9f60d8e2c6b1/rignore-0.6.4-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:4d1918221a249e5342b60fd5fa513bf3d6bf272a8738e66023799f0c82ecd788", size = 1108862, upload-time = "2025-07-19T19:24:26.765Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/d5/b37c82519f335f2c472a63fc6215c6f4c51063ecf3166e3acf508011afbd/rignore-0.6.4-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:240777332b859dc89dcba59ab6e3f1e062bc8e862ffa3e5f456e93f7fd5cb415", size = 1120002, upload-time = "2025-07-19T19:24:38.952Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/72/2f05559ed5e69bdfdb56ea3982b48e6c0017c59f7241f7e1c5cae992b347/rignore-0.6.4-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:66b0e548753e55cc648f1e7b02d9f74285fe48bb49cec93643d31e563773ab3f", size = 949454, upload-time = "2025-07-19T19:23:38.664Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/92/186693c8f838d670510ac1dfb35afbe964320fbffb343ba18f3d24441941/rignore-0.6.4-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6971ac9fdd5a0bd299a181096f091c4f3fd286643adceba98eccc03c688a6637", size = 974663, upload-time = "2025-07-19T19:23:28.24Z" },
 ]
 
 [[package]]
@@ -4248,51 +5308,83 @@ wheels = [
 
 [[package]]
 name = "rpds-py"
-version = "0.25.1"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/8c/a6/60184b7fc00dd3ca80ac635dd5b8577d444c57e8e8742cecabfacb829921/rpds_py-0.25.1.tar.gz", hash = "sha256:8960b6dac09b62dac26e75d7e2c4a22efb835d827a7278c34f72b2b84fa160e3", size = 27304, upload-time = "2025-05-21T12:46:12.502Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/7f/81/28ab0408391b1dc57393653b6a0cf2014cc282cc2909e4615e63e58262be/rpds_py-0.25.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:b5ffe453cde61f73fea9430223c81d29e2fbf412a6073951102146c84e19e34c", size = 364647, upload-time = "2025-05-21T12:43:28.559Z" },
-    { url = "https://files.pythonhosted.org/packages/2c/9a/7797f04cad0d5e56310e1238434f71fc6939d0bc517192a18bb99a72a95f/rpds_py-0.25.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:115874ae5e2fdcfc16b2aedc95b5eef4aebe91b28e7e21951eda8a5dc0d3461b", size = 350454, upload-time = "2025-05-21T12:43:30.615Z" },
-    { url = "https://files.pythonhosted.org/packages/69/3c/93d2ef941b04898011e5d6eaa56a1acf46a3b4c9f4b3ad1bbcbafa0bee1f/rpds_py-0.25.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a714bf6e5e81b0e570d01f56e0c89c6375101b8463999ead3a93a5d2a4af91fa", size = 389665, upload-time = "2025-05-21T12:43:32.629Z" },
-    { url = "https://files.pythonhosted.org/packages/c1/57/ad0e31e928751dde8903a11102559628d24173428a0f85e25e187defb2c1/rpds_py-0.25.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:35634369325906bcd01577da4c19e3b9541a15e99f31e91a02d010816b49bfda", size = 403873, upload-time = "2025-05-21T12:43:34.576Z" },
-    { url = "https://files.pythonhosted.org/packages/16/ad/c0c652fa9bba778b4f54980a02962748479dc09632e1fd34e5282cf2556c/rpds_py-0.25.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d4cb2b3ddc16710548801c6fcc0cfcdeeff9dafbc983f77265877793f2660309", size = 525866, upload-time = "2025-05-21T12:43:36.123Z" },
-    { url = "https://files.pythonhosted.org/packages/2a/39/3e1839bc527e6fcf48d5fec4770070f872cdee6c6fbc9b259932f4e88a38/rpds_py-0.25.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9ceca1cf097ed77e1a51f1dbc8d174d10cb5931c188a4505ff9f3e119dfe519b", size = 416886, upload-time = "2025-05-21T12:43:38.034Z" },
-    { url = "https://files.pythonhosted.org/packages/7a/95/dd6b91cd4560da41df9d7030a038298a67d24f8ca38e150562644c829c48/rpds_py-0.25.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c2cd1a4b0c2b8c5e31ffff50d09f39906fe351389ba143c195566056c13a7ea", size = 390666, upload-time = "2025-05-21T12:43:40.065Z" },
-    { url = "https://files.pythonhosted.org/packages/64/48/1be88a820e7494ce0a15c2d390ccb7c52212370badabf128e6a7bb4cb802/rpds_py-0.25.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1de336a4b164c9188cb23f3703adb74a7623ab32d20090d0e9bf499a2203ad65", size = 425109, upload-time = "2025-05-21T12:43:42.263Z" },
-    { url = "https://files.pythonhosted.org/packages/cf/07/3e2a17927ef6d7720b9949ec1b37d1e963b829ad0387f7af18d923d5cfa5/rpds_py-0.25.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:9fca84a15333e925dd59ce01da0ffe2ffe0d6e5d29a9eeba2148916d1824948c", size = 567244, upload-time = "2025-05-21T12:43:43.846Z" },
-    { url = "https://files.pythonhosted.org/packages/d2/e5/76cf010998deccc4f95305d827847e2eae9c568099c06b405cf96384762b/rpds_py-0.25.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:88ec04afe0c59fa64e2f6ea0dd9657e04fc83e38de90f6de201954b4d4eb59bd", size = 596023, upload-time = "2025-05-21T12:43:45.932Z" },
-    { url = "https://files.pythonhosted.org/packages/52/9a/df55efd84403736ba37a5a6377b70aad0fd1cb469a9109ee8a1e21299a1c/rpds_py-0.25.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a8bd2f19e312ce3e1d2c635618e8a8d8132892bb746a7cf74780a489f0f6cdcb", size = 561634, upload-time = "2025-05-21T12:43:48.263Z" },
-    { url = "https://files.pythonhosted.org/packages/ab/aa/dc3620dd8db84454aaf9374bd318f1aa02578bba5e567f5bf6b79492aca4/rpds_py-0.25.1-cp312-cp312-win32.whl", hash = "sha256:e5e2f7280d8d0d3ef06f3ec1b4fd598d386cc6f0721e54f09109a8132182fbfe", size = 222713, upload-time = "2025-05-21T12:43:49.897Z" },
-    { url = "https://files.pythonhosted.org/packages/a3/7f/7cef485269a50ed5b4e9bae145f512d2a111ca638ae70cc101f661b4defd/rpds_py-0.25.1-cp312-cp312-win_amd64.whl", hash = "sha256:db58483f71c5db67d643857404da360dce3573031586034b7d59f245144cc192", size = 235280, upload-time = "2025-05-21T12:43:51.893Z" },
-    { url = "https://files.pythonhosted.org/packages/99/f2/c2d64f6564f32af913bf5f3f7ae41c7c263c5ae4c4e8f1a17af8af66cd46/rpds_py-0.25.1-cp312-cp312-win_arm64.whl", hash = "sha256:6d50841c425d16faf3206ddbba44c21aa3310a0cebc3c1cdfc3e3f4f9f6f5728", size = 225399, upload-time = "2025-05-21T12:43:53.351Z" },
-    { url = "https://files.pythonhosted.org/packages/2b/da/323848a2b62abe6a0fec16ebe199dc6889c5d0a332458da8985b2980dffe/rpds_py-0.25.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:659d87430a8c8c704d52d094f5ba6fa72ef13b4d385b7e542a08fc240cb4a559", size = 364498, upload-time = "2025-05-21T12:43:54.841Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/b4/4d3820f731c80fd0cd823b3e95b9963fec681ae45ba35b5281a42382c67d/rpds_py-0.25.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:68f6f060f0bbdfb0245267da014d3a6da9be127fe3e8cc4a68c6f833f8a23bb1", size = 350083, upload-time = "2025-05-21T12:43:56.428Z" },
-    { url = "https://files.pythonhosted.org/packages/d5/b1/3a8ee1c9d480e8493619a437dec685d005f706b69253286f50f498cbdbcf/rpds_py-0.25.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:083a9513a33e0b92cf6e7a6366036c6bb43ea595332c1ab5c8ae329e4bcc0a9c", size = 389023, upload-time = "2025-05-21T12:43:57.995Z" },
-    { url = "https://files.pythonhosted.org/packages/3b/31/17293edcfc934dc62c3bf74a0cb449ecd549531f956b72287203e6880b87/rpds_py-0.25.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:816568614ecb22b18a010c7a12559c19f6fe993526af88e95a76d5a60b8b75fb", size = 403283, upload-time = "2025-05-21T12:43:59.546Z" },
-    { url = "https://files.pythonhosted.org/packages/d1/ca/e0f0bc1a75a8925024f343258c8ecbd8828f8997ea2ac71e02f67b6f5299/rpds_py-0.25.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3c6564c0947a7f52e4792983f8e6cf9bac140438ebf81f527a21d944f2fd0a40", size = 524634, upload-time = "2025-05-21T12:44:01.087Z" },
-    { url = "https://files.pythonhosted.org/packages/3e/03/5d0be919037178fff33a6672ffc0afa04ea1cfcb61afd4119d1b5280ff0f/rpds_py-0.25.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5c4a128527fe415d73cf1f70a9a688d06130d5810be69f3b553bf7b45e8acf79", size = 416233, upload-time = "2025-05-21T12:44:02.604Z" },
-    { url = "https://files.pythonhosted.org/packages/05/7c/8abb70f9017a231c6c961a8941403ed6557664c0913e1bf413cbdc039e75/rpds_py-0.25.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a49e1d7a4978ed554f095430b89ecc23f42014a50ac385eb0c4d163ce213c325", size = 390375, upload-time = "2025-05-21T12:44:04.162Z" },
-    { url = "https://files.pythonhosted.org/packages/7a/ac/a87f339f0e066b9535074a9f403b9313fd3892d4a164d5d5f5875ac9f29f/rpds_py-0.25.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d74ec9bc0e2feb81d3f16946b005748119c0f52a153f6db6a29e8cd68636f295", size = 424537, upload-time = "2025-05-21T12:44:06.175Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/8f/8d5c1567eaf8c8afe98a838dd24de5013ce6e8f53a01bd47fe8bb06b5533/rpds_py-0.25.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:3af5b4cc10fa41e5bc64e5c198a1b2d2864337f8fcbb9a67e747e34002ce812b", size = 566425, upload-time = "2025-05-21T12:44:08.242Z" },
-    { url = "https://files.pythonhosted.org/packages/95/33/03016a6be5663b389c8ab0bbbcca68d9e96af14faeff0a04affcb587e776/rpds_py-0.25.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:79dc317a5f1c51fd9c6a0c4f48209c6b8526d0524a6904fc1076476e79b00f98", size = 595197, upload-time = "2025-05-21T12:44:10.449Z" },
-    { url = "https://files.pythonhosted.org/packages/33/8d/da9f4d3e208c82fda311bff0cf0a19579afceb77cf456e46c559a1c075ba/rpds_py-0.25.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1521031351865e0181bc585147624d66b3b00a84109b57fcb7a779c3ec3772cd", size = 561244, upload-time = "2025-05-21T12:44:12.387Z" },
-    { url = "https://files.pythonhosted.org/packages/e2/b3/39d5dcf7c5f742ecd6dbc88f6f84ae54184b92f5f387a4053be2107b17f1/rpds_py-0.25.1-cp313-cp313-win32.whl", hash = "sha256:5d473be2b13600b93a5675d78f59e63b51b1ba2d0476893415dfbb5477e65b31", size = 222254, upload-time = "2025-05-21T12:44:14.261Z" },
-    { url = "https://files.pythonhosted.org/packages/5f/19/2d6772c8eeb8302c5f834e6d0dfd83935a884e7c5ce16340c7eaf89ce925/rpds_py-0.25.1-cp313-cp313-win_amd64.whl", hash = "sha256:a7b74e92a3b212390bdce1d93da9f6488c3878c1d434c5e751cbc202c5e09500", size = 234741, upload-time = "2025-05-21T12:44:16.236Z" },
-    { url = "https://files.pythonhosted.org/packages/5b/5a/145ada26cfaf86018d0eb304fe55eafdd4f0b6b84530246bb4a7c4fb5c4b/rpds_py-0.25.1-cp313-cp313-win_arm64.whl", hash = "sha256:dd326a81afe332ede08eb39ab75b301d5676802cdffd3a8f287a5f0b694dc3f5", size = 224830, upload-time = "2025-05-21T12:44:17.749Z" },
-    { url = "https://files.pythonhosted.org/packages/4b/ca/d435844829c384fd2c22754ff65889c5c556a675d2ed9eb0e148435c6690/rpds_py-0.25.1-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:a58d1ed49a94d4183483a3ce0af22f20318d4a1434acee255d683ad90bf78129", size = 359668, upload-time = "2025-05-21T12:44:19.322Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/01/b056f21db3a09f89410d493d2f6614d87bb162499f98b649d1dbd2a81988/rpds_py-0.25.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:f251bf23deb8332823aef1da169d5d89fa84c89f67bdfb566c49dea1fccfd50d", size = 345649, upload-time = "2025-05-21T12:44:20.962Z" },
-    { url = "https://files.pythonhosted.org/packages/e0/0f/e0d00dc991e3d40e03ca36383b44995126c36b3eafa0ccbbd19664709c88/rpds_py-0.25.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8dbd586bfa270c1103ece2109314dd423df1fa3d9719928b5d09e4840cec0d72", size = 384776, upload-time = "2025-05-21T12:44:22.516Z" },
-    { url = "https://files.pythonhosted.org/packages/9f/a2/59374837f105f2ca79bde3c3cd1065b2f8c01678900924949f6392eab66d/rpds_py-0.25.1-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6d273f136e912aa101a9274c3145dcbddbe4bac560e77e6d5b3c9f6e0ed06d34", size = 395131, upload-time = "2025-05-21T12:44:24.147Z" },
-    { url = "https://files.pythonhosted.org/packages/9c/dc/48e8d84887627a0fe0bac53f0b4631e90976fd5d35fff8be66b8e4f3916b/rpds_py-0.25.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:666fa7b1bd0a3810a7f18f6d3a25ccd8866291fbbc3c9b912b917a6715874bb9", size = 520942, upload-time = "2025-05-21T12:44:25.915Z" },
-    { url = "https://files.pythonhosted.org/packages/7c/f5/ee056966aeae401913d37befeeab57a4a43a4f00099e0a20297f17b8f00c/rpds_py-0.25.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:921954d7fbf3fccc7de8f717799304b14b6d9a45bbeec5a8d7408ccbf531faf5", size = 411330, upload-time = "2025-05-21T12:44:27.638Z" },
-    { url = "https://files.pythonhosted.org/packages/ab/74/b2cffb46a097cefe5d17f94ede7a174184b9d158a0aeb195f39f2c0361e8/rpds_py-0.25.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3d86373ff19ca0441ebeb696ef64cb58b8b5cbacffcda5a0ec2f3911732a194", size = 387339, upload-time = "2025-05-21T12:44:29.292Z" },
-    { url = "https://files.pythonhosted.org/packages/7f/9a/0ff0b375dcb5161c2b7054e7d0b7575f1680127505945f5cabaac890bc07/rpds_py-0.25.1-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c8980cde3bb8575e7c956a530f2c217c1d6aac453474bf3ea0f9c89868b531b6", size = 418077, upload-time = "2025-05-21T12:44:30.877Z" },
-    { url = "https://files.pythonhosted.org/packages/0d/a1/fda629bf20d6b698ae84c7c840cfb0e9e4200f664fc96e1f456f00e4ad6e/rpds_py-0.25.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:8eb8c84ecea987a2523e057c0d950bcb3f789696c0499290b8d7b3107a719d78", size = 562441, upload-time = "2025-05-21T12:44:32.541Z" },
-    { url = "https://files.pythonhosted.org/packages/20/15/ce4b5257f654132f326f4acd87268e1006cc071e2c59794c5bdf4bebbb51/rpds_py-0.25.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:e43a005671a9ed5a650f3bc39e4dbccd6d4326b24fb5ea8be5f3a43a6f576c72", size = 590750, upload-time = "2025-05-21T12:44:34.557Z" },
-    { url = "https://files.pythonhosted.org/packages/fb/ab/e04bf58a8d375aeedb5268edcc835c6a660ebf79d4384d8e0889439448b0/rpds_py-0.25.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:58f77c60956501a4a627749a6dcb78dac522f249dd96b5c9f1c6af29bfacfb66", size = 558891, upload-time = "2025-05-21T12:44:37.358Z" },
-    { url = "https://files.pythonhosted.org/packages/90/82/cb8c6028a6ef6cd2b7991e2e4ced01c854b6236ecf51e81b64b569c43d73/rpds_py-0.25.1-cp313-cp313t-win32.whl", hash = "sha256:2cb9e5b5e26fc02c8a4345048cd9998c2aca7c2712bd1b36da0c72ee969a3523", size = 218718, upload-time = "2025-05-21T12:44:38.969Z" },
-    { url = "https://files.pythonhosted.org/packages/b6/97/5a4b59697111c89477d20ba8a44df9ca16b41e737fa569d5ae8bff99e650/rpds_py-0.25.1-cp313-cp313t-win_amd64.whl", hash = "sha256:401ca1c4a20cc0510d3435d89c069fe0a9ae2ee6495135ac46bdd49ec0495763", size = 232218, upload-time = "2025-05-21T12:44:40.512Z" },
+version = "0.27.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/1e/d9/991a0dee12d9fc53ed027e26a26a64b151d77252ac477e22666b9688bc16/rpds_py-0.27.0.tar.gz", hash = "sha256:8b23cf252f180cda89220b378d917180f29d313cd6a07b2431c0d3b776aae86f", size = 27420, upload-time = "2025-08-07T08:26:39.624Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/cd/17/e67309ca1ac993fa1888a0d9b2f5ccc1f67196ace32e76c9f8e1dbbbd50c/rpds_py-0.27.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:19c990fdf5acecbf0623e906ae2e09ce1c58947197f9bced6bbd7482662231c4", size = 362611, upload-time = "2025-08-07T08:23:44.773Z" },
+    { url = "https://files.pythonhosted.org/packages/93/2e/28c2fb84aa7aa5d75933d1862d0f7de6198ea22dfd9a0cca06e8a4e7509e/rpds_py-0.27.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6c27a7054b5224710fcfb1a626ec3ff4f28bcb89b899148c72873b18210e446b", size = 347680, upload-time = "2025-08-07T08:23:46.014Z" },
+    { url = "https://files.pythonhosted.org/packages/44/3e/9834b4c8f4f5fe936b479e623832468aa4bd6beb8d014fecaee9eac6cdb1/rpds_py-0.27.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:09965b314091829b378b60607022048953e25f0b396c2b70e7c4c81bcecf932e", size = 384600, upload-time = "2025-08-07T08:23:48Z" },
+    { url = "https://files.pythonhosted.org/packages/19/78/744123c7b38865a965cd9e6f691fde7ef989a00a256fa8bf15b75240d12f/rpds_py-0.27.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:14f028eb47f59e9169bfdf9f7ceafd29dd64902141840633683d0bad5b04ff34", size = 400697, upload-time = "2025-08-07T08:23:49.407Z" },
+    { url = "https://files.pythonhosted.org/packages/32/97/3c3d32fe7daee0a1f1a678b6d4dfb8c4dcf88197fa2441f9da7cb54a8466/rpds_py-0.27.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6168af0be75bba990a39f9431cdfae5f0ad501f4af32ae62e8856307200517b8", size = 517781, upload-time = "2025-08-07T08:23:50.557Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/be/28f0e3e733680aa13ecec1212fc0f585928a206292f14f89c0b8a684cad1/rpds_py-0.27.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ab47fe727c13c09d0e6f508e3a49e545008e23bf762a245b020391b621f5b726", size = 406449, upload-time = "2025-08-07T08:23:51.732Z" },
+    { url = "https://files.pythonhosted.org/packages/95/ae/5d15c83e337c082d0367053baeb40bfba683f42459f6ebff63a2fd7e5518/rpds_py-0.27.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5fa01b3d5e3b7d97efab65bd3d88f164e289ec323a8c033c5c38e53ee25c007e", size = 386150, upload-time = "2025-08-07T08:23:52.822Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/65/944e95f95d5931112829e040912b25a77b2e7ed913ea5fe5746aa5c1ce75/rpds_py-0.27.0-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:6c135708e987f46053e0a1246a206f53717f9fadfba27174a9769ad4befba5c3", size = 406100, upload-time = "2025-08-07T08:23:54.339Z" },
+    { url = "https://files.pythonhosted.org/packages/21/a4/1664b83fae02894533cd11dc0b9f91d673797c2185b7be0f7496107ed6c5/rpds_py-0.27.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fc327f4497b7087d06204235199daf208fd01c82d80465dc5efa4ec9df1c5b4e", size = 421345, upload-time = "2025-08-07T08:23:55.832Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/26/b7303941c2b0823bfb34c71378249f8beedce57301f400acb04bb345d025/rpds_py-0.27.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:7e57906e38583a2cba67046a09c2637e23297618dc1f3caddbc493f2be97c93f", size = 561891, upload-time = "2025-08-07T08:23:56.951Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/c8/48623d64d4a5a028fa99576c768a6159db49ab907230edddc0b8468b998b/rpds_py-0.27.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:0f4f69d7a4300fbf91efb1fb4916421bd57804c01ab938ab50ac9c4aa2212f03", size = 591756, upload-time = "2025-08-07T08:23:58.146Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/51/18f62617e8e61cc66334c9fb44b1ad7baae3438662098efbc55fb3fda453/rpds_py-0.27.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b4c4fbbcff474e1e5f38be1bf04511c03d492d42eec0babda5d03af3b5589374", size = 557088, upload-time = "2025-08-07T08:23:59.6Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/4c/e84c3a276e2496a93d245516be6b49e20499aa8ca1c94d59fada0d79addc/rpds_py-0.27.0-cp312-cp312-win32.whl", hash = "sha256:27bac29bbbf39601b2aab474daf99dbc8e7176ca3389237a23944b17f8913d97", size = 221926, upload-time = "2025-08-07T08:24:00.695Z" },
+    { url = "https://files.pythonhosted.org/packages/83/89/9d0fbcef64340db0605eb0a0044f258076f3ae0a3b108983b2c614d96212/rpds_py-0.27.0-cp312-cp312-win_amd64.whl", hash = "sha256:8a06aa1197ec0281eb1d7daf6073e199eb832fe591ffa329b88bae28f25f5fe5", size = 233235, upload-time = "2025-08-07T08:24:01.846Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/b0/e177aa9f39cbab060f96de4a09df77d494f0279604dc2f509263e21b05f9/rpds_py-0.27.0-cp312-cp312-win_arm64.whl", hash = "sha256:e14aab02258cb776a108107bd15f5b5e4a1bbaa61ef33b36693dfab6f89d54f9", size = 223315, upload-time = "2025-08-07T08:24:03.337Z" },
+    { url = "https://files.pythonhosted.org/packages/81/d2/dfdfd42565a923b9e5a29f93501664f5b984a802967d48d49200ad71be36/rpds_py-0.27.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:443d239d02d9ae55b74015234f2cd8eb09e59fbba30bf60baeb3123ad4c6d5ff", size = 362133, upload-time = "2025-08-07T08:24:04.508Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/4a/0a2e2460c4b66021d349ce9f6331df1d6c75d7eea90df9785d333a49df04/rpds_py-0.27.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:b8a7acf04fda1f30f1007f3cc96d29d8cf0a53e626e4e1655fdf4eabc082d367", size = 347128, upload-time = "2025-08-07T08:24:05.695Z" },
+    { url = "https://files.pythonhosted.org/packages/35/8d/7d1e4390dfe09d4213b3175a3f5a817514355cb3524593380733204f20b9/rpds_py-0.27.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9d0f92b78cfc3b74a42239fdd8c1266f4715b573204c234d2f9fc3fc7a24f185", size = 384027, upload-time = "2025-08-07T08:24:06.841Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/65/78499d1a62172891c8cd45de737b2a4b84a414b6ad8315ab3ac4945a5b61/rpds_py-0.27.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ce4ed8e0c7dbc5b19352b9c2c6131dd23b95fa8698b5cdd076307a33626b72dc", size = 399973, upload-time = "2025-08-07T08:24:08.143Z" },
+    { url = "https://files.pythonhosted.org/packages/10/a1/1c67c1d8cc889107b19570bb01f75cf49852068e95e6aee80d22915406fc/rpds_py-0.27.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fde355b02934cc6b07200cc3b27ab0c15870a757d1a72fd401aa92e2ea3c6bfe", size = 515295, upload-time = "2025-08-07T08:24:09.711Z" },
+    { url = "https://files.pythonhosted.org/packages/df/27/700ec88e748436b6c7c4a2262d66e80f8c21ab585d5e98c45e02f13f21c0/rpds_py-0.27.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:13bbc4846ae4c993f07c93feb21a24d8ec637573d567a924b1001e81c8ae80f9", size = 406737, upload-time = "2025-08-07T08:24:11.182Z" },
+    { url = "https://files.pythonhosted.org/packages/33/cc/6b0ee8f0ba3f2df2daac1beda17fde5cf10897a7d466f252bd184ef20162/rpds_py-0.27.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:be0744661afbc4099fef7f4e604e7f1ea1be1dd7284f357924af12a705cc7d5c", size = 385898, upload-time = "2025-08-07T08:24:12.798Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/7e/c927b37d7d33c0a0ebf249cc268dc2fcec52864c1b6309ecb960497f2285/rpds_py-0.27.0-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:069e0384a54f427bd65d7fda83b68a90606a3835901aaff42185fcd94f5a9295", size = 405785, upload-time = "2025-08-07T08:24:14.906Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/d2/8ed50746d909dcf402af3fa58b83d5a590ed43e07251d6b08fad1a535ba6/rpds_py-0.27.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4bc262ace5a1a7dc3e2eac2fa97b8257ae795389f688b5adf22c5db1e2431c43", size = 419760, upload-time = "2025-08-07T08:24:16.129Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/60/2b2071aee781cb3bd49f94d5d35686990b925e9b9f3e3d149235a6f5d5c1/rpds_py-0.27.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:2fe6e18e5c8581f0361b35ae575043c7029d0a92cb3429e6e596c2cdde251432", size = 561201, upload-time = "2025-08-07T08:24:17.645Z" },
+    { url = "https://files.pythonhosted.org/packages/98/1f/27b67304272521aaea02be293fecedce13fa351a4e41cdb9290576fc6d81/rpds_py-0.27.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:d93ebdb82363d2e7bec64eecdc3632b59e84bd270d74fe5be1659f7787052f9b", size = 591021, upload-time = "2025-08-07T08:24:18.999Z" },
+    { url = "https://files.pythonhosted.org/packages/db/9b/a2fadf823164dd085b1f894be6443b0762a54a7af6f36e98e8fcda69ee50/rpds_py-0.27.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:0954e3a92e1d62e83a54ea7b3fdc9efa5d61acef8488a8a3d31fdafbfb00460d", size = 556368, upload-time = "2025-08-07T08:24:20.54Z" },
+    { url = "https://files.pythonhosted.org/packages/24/f3/6d135d46a129cda2e3e6d4c5e91e2cc26ea0428c6cf152763f3f10b6dd05/rpds_py-0.27.0-cp313-cp313-win32.whl", hash = "sha256:2cff9bdd6c7b906cc562a505c04a57d92e82d37200027e8d362518df427f96cd", size = 221236, upload-time = "2025-08-07T08:24:22.144Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/44/65d7494f5448ecc755b545d78b188440f81da98b50ea0447ab5ebfdf9bd6/rpds_py-0.27.0-cp313-cp313-win_amd64.whl", hash = "sha256:dc79d192fb76fc0c84f2c58672c17bbbc383fd26c3cdc29daae16ce3d927e8b2", size = 232634, upload-time = "2025-08-07T08:24:23.642Z" },
+    { url = "https://files.pythonhosted.org/packages/70/d9/23852410fadab2abb611733933401de42a1964ce6600a3badae35fbd573e/rpds_py-0.27.0-cp313-cp313-win_arm64.whl", hash = "sha256:5b3a5c8089eed498a3af23ce87a80805ff98f6ef8f7bdb70bd1b7dae5105f6ac", size = 222783, upload-time = "2025-08-07T08:24:25.098Z" },
+    { url = "https://files.pythonhosted.org/packages/15/75/03447917f78512b34463f4ef11066516067099a0c466545655503bed0c77/rpds_py-0.27.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:90fb790138c1a89a2e58c9282fe1089638401f2f3b8dddd758499041bc6e0774", size = 359154, upload-time = "2025-08-07T08:24:26.249Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/fc/4dac4fa756451f2122ddaf136e2c6aeb758dc6fdbe9ccc4bc95c98451d50/rpds_py-0.27.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:010c4843a3b92b54373e3d2291a7447d6c3fc29f591772cc2ea0e9f5c1da434b", size = 343909, upload-time = "2025-08-07T08:24:27.405Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/81/723c1ed8e6f57ed9d8c0c07578747a2d3d554aaefc1ab89f4e42cfeefa07/rpds_py-0.27.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c9ce7a9e967afc0a2af7caa0d15a3e9c1054815f73d6a8cb9225b61921b419bd", size = 379340, upload-time = "2025-08-07T08:24:28.714Z" },
+    { url = "https://files.pythonhosted.org/packages/98/16/7e3740413de71818ce1997df82ba5f94bae9fff90c0a578c0e24658e6201/rpds_py-0.27.0-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:aa0bf113d15e8abdfee92aa4db86761b709a09954083afcb5bf0f952d6065fdb", size = 391655, upload-time = "2025-08-07T08:24:30.223Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/63/2a9f510e124d80660f60ecce07953f3f2d5f0b96192c1365443859b9c87f/rpds_py-0.27.0-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:eb91d252b35004a84670dfeafadb042528b19842a0080d8b53e5ec1128e8f433", size = 513017, upload-time = "2025-08-07T08:24:31.446Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/4e/cf6ff311d09776c53ea1b4f2e6700b9d43bb4e99551006817ade4bbd6f78/rpds_py-0.27.0-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:db8a6313dbac934193fc17fe7610f70cd8181c542a91382531bef5ed785e5615", size = 402058, upload-time = "2025-08-07T08:24:32.613Z" },
+    { url = "https://files.pythonhosted.org/packages/88/11/5e36096d474cb10f2a2d68b22af60a3bc4164fd8db15078769a568d9d3ac/rpds_py-0.27.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce96ab0bdfcef1b8c371ada2100767ace6804ea35aacce0aef3aeb4f3f499ca8", size = 383474, upload-time = "2025-08-07T08:24:33.767Z" },
+    { url = "https://files.pythonhosted.org/packages/db/a2/3dff02805b06058760b5eaa6d8cb8db3eb3e46c9e452453ad5fc5b5ad9fe/rpds_py-0.27.0-cp313-cp313t-manylinux_2_31_riscv64.whl", hash = "sha256:7451ede3560086abe1aa27dcdcf55cd15c96b56f543fb12e5826eee6f721f858", size = 400067, upload-time = "2025-08-07T08:24:35.021Z" },
+    { url = "https://files.pythonhosted.org/packages/67/87/eed7369b0b265518e21ea836456a4ed4a6744c8c12422ce05bce760bb3cf/rpds_py-0.27.0-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:32196b5a99821476537b3f7732432d64d93a58d680a52c5e12a190ee0135d8b5", size = 412085, upload-time = "2025-08-07T08:24:36.267Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/48/f50b2ab2fbb422fbb389fe296e70b7a6b5ea31b263ada5c61377e710a924/rpds_py-0.27.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a029be818059870664157194e46ce0e995082ac49926f1423c1f058534d2aaa9", size = 555928, upload-time = "2025-08-07T08:24:37.573Z" },
+    { url = "https://files.pythonhosted.org/packages/98/41/b18eb51045d06887666c3560cd4bbb6819127b43d758f5adb82b5f56f7d1/rpds_py-0.27.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:3841f66c1ffdc6cebce8aed64e36db71466f1dc23c0d9a5592e2a782a3042c79", size = 585527, upload-time = "2025-08-07T08:24:39.391Z" },
+    { url = "https://files.pythonhosted.org/packages/be/03/a3dd6470fc76499959b00ae56295b76b4bdf7c6ffc60d62006b1217567e1/rpds_py-0.27.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:42894616da0fc0dcb2ec08a77896c3f56e9cb2f4b66acd76fc8992c3557ceb1c", size = 554211, upload-time = "2025-08-07T08:24:40.6Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/d1/ee5fd1be395a07423ac4ca0bcc05280bf95db2b155d03adefeb47d5ebf7e/rpds_py-0.27.0-cp313-cp313t-win32.whl", hash = "sha256:b1fef1f13c842a39a03409e30ca0bf87b39a1e2a305a9924deadb75a43105d23", size = 216624, upload-time = "2025-08-07T08:24:42.204Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/94/4814c4c858833bf46706f87349c37ca45e154da7dbbec9ff09f1abeb08cc/rpds_py-0.27.0-cp313-cp313t-win_amd64.whl", hash = "sha256:183f5e221ba3e283cd36fdfbe311d95cd87699a083330b4f792543987167eff1", size = 230007, upload-time = "2025-08-07T08:24:43.329Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/a5/8fffe1c7dc7c055aa02df310f9fb71cfc693a4d5ccc5de2d3456ea5fb022/rpds_py-0.27.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:f3cd110e02c5bf17d8fb562f6c9df5c20e73029d587cf8602a2da6c5ef1e32cb", size = 362595, upload-time = "2025-08-07T08:24:44.478Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/c7/4e4253fd2d4bb0edbc0b0b10d9f280612ca4f0f990e3c04c599000fe7d71/rpds_py-0.27.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:8d0e09cf4863c74106b5265c2c310f36146e2b445ff7b3018a56799f28f39f6f", size = 347252, upload-time = "2025-08-07T08:24:45.678Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/c8/3d1a954d30f0174dd6baf18b57c215da03cf7846a9d6e0143304e784cddc/rpds_py-0.27.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:64f689ab822f9b5eb6dfc69893b4b9366db1d2420f7db1f6a2adf2a9ca15ad64", size = 384886, upload-time = "2025-08-07T08:24:46.86Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/52/3c5835f2df389832b28f9276dd5395b5a965cea34226e7c88c8fbec2093c/rpds_py-0.27.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e36c80c49853b3ffda7aa1831bf175c13356b210c73128c861f3aa93c3cc4015", size = 399716, upload-time = "2025-08-07T08:24:48.174Z" },
+    { url = "https://files.pythonhosted.org/packages/40/73/176e46992461a1749686a2a441e24df51ff86b99c2d34bf39f2a5273b987/rpds_py-0.27.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6de6a7f622860af0146cb9ee148682ff4d0cea0b8fd3ad51ce4d40efb2f061d0", size = 517030, upload-time = "2025-08-07T08:24:49.52Z" },
+    { url = "https://files.pythonhosted.org/packages/79/2a/7266c75840e8c6e70effeb0d38922a45720904f2cd695e68a0150e5407e2/rpds_py-0.27.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4045e2fc4b37ec4b48e8907a5819bdd3380708c139d7cc358f03a3653abedb89", size = 408448, upload-time = "2025-08-07T08:24:50.727Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/5f/a7efc572b8e235093dc6cf39f4dbc8a7f08e65fdbcec7ff4daeb3585eef1/rpds_py-0.27.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9da162b718b12c4219eeeeb68a5b7552fbc7aadedf2efee440f88b9c0e54b45d", size = 387320, upload-time = "2025-08-07T08:24:52.004Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/eb/9ff6bc92efe57cf5a2cb74dee20453ba444b6fdc85275d8c99e0d27239d1/rpds_py-0.27.0-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:0665be515767dc727ffa5f74bd2ef60b0ff85dad6bb8f50d91eaa6b5fb226f51", size = 407414, upload-time = "2025-08-07T08:24:53.664Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/bd/3b9b19b00d5c6e1bd0f418c229ab0f8d3b110ddf7ec5d9d689ef783d0268/rpds_py-0.27.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:203f581accef67300a942e49a37d74c12ceeef4514874c7cede21b012613ca2c", size = 420766, upload-time = "2025-08-07T08:24:55.917Z" },
+    { url = "https://files.pythonhosted.org/packages/17/6b/521a7b1079ce16258c70805166e3ac6ec4ee2139d023fe07954dc9b2d568/rpds_py-0.27.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7873b65686a6471c0037139aa000d23fe94628e0daaa27b6e40607c90e3f5ec4", size = 562409, upload-time = "2025-08-07T08:24:57.17Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/bf/65db5bfb14ccc55e39de8419a659d05a2a9cd232f0a699a516bb0991da7b/rpds_py-0.27.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:249ab91ceaa6b41abc5f19513cb95b45c6f956f6b89f1fe3d99c81255a849f9e", size = 590793, upload-time = "2025-08-07T08:24:58.388Z" },
+    { url = "https://files.pythonhosted.org/packages/db/b8/82d368b378325191ba7aae8f40f009b78057b598d4394d1f2cdabaf67b3f/rpds_py-0.27.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d2f184336bc1d6abfaaa1262ed42739c3789b1e3a65a29916a615307d22ffd2e", size = 558178, upload-time = "2025-08-07T08:24:59.756Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/ff/f270bddbfbc3812500f8131b1ebbd97afd014cd554b604a3f73f03133a36/rpds_py-0.27.0-cp314-cp314-win32.whl", hash = "sha256:d3c622c39f04d5751408f5b801ecb527e6e0a471b367f420a877f7a660d583f6", size = 222355, upload-time = "2025-08-07T08:25:01.027Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/20/fdab055b1460c02ed356a0e0b0a78c1dd32dc64e82a544f7b31c9ac643dc/rpds_py-0.27.0-cp314-cp314-win_amd64.whl", hash = "sha256:cf824aceaeffff029ccfba0da637d432ca71ab21f13e7f6f5179cd88ebc77a8a", size = 234007, upload-time = "2025-08-07T08:25:02.268Z" },
+    { url = "https://files.pythonhosted.org/packages/4d/a8/694c060005421797a3be4943dab8347c76c2b429a9bef68fb2c87c9e70c7/rpds_py-0.27.0-cp314-cp314-win_arm64.whl", hash = "sha256:86aca1616922b40d8ac1b3073a1ead4255a2f13405e5700c01f7c8d29a03972d", size = 223527, upload-time = "2025-08-07T08:25:03.45Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/f9/77f4c90f79d2c5ca8ce6ec6a76cb4734ee247de6b3a4f337e289e1f00372/rpds_py-0.27.0-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:341d8acb6724c0c17bdf714319c393bb27f6d23d39bc74f94221b3e59fc31828", size = 359469, upload-time = "2025-08-07T08:25:04.648Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/22/b97878d2f1284286fef4172069e84b0b42b546ea7d053e5fb7adb9ac6494/rpds_py-0.27.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:6b96b0b784fe5fd03beffff2b1533dc0d85e92bab8d1b2c24ef3a5dc8fac5669", size = 343960, upload-time = "2025-08-07T08:25:05.863Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/b0/dfd55b5bb480eda0578ae94ef256d3061d20b19a0f5e18c482f03e65464f/rpds_py-0.27.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0c431bfb91478d7cbe368d0a699978050d3b112d7f1d440a41e90faa325557fd", size = 380201, upload-time = "2025-08-07T08:25:07.513Z" },
+    { url = "https://files.pythonhosted.org/packages/28/22/e1fa64e50d58ad2b2053077e3ec81a979147c43428de9e6de68ddf6aff4e/rpds_py-0.27.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:20e222a44ae9f507d0f2678ee3dd0c45ec1e930f6875d99b8459631c24058aec", size = 392111, upload-time = "2025-08-07T08:25:09.149Z" },
+    { url = "https://files.pythonhosted.org/packages/49/f9/43ab7a43e97aedf6cea6af70fdcbe18abbbc41d4ae6cdec1bfc23bbad403/rpds_py-0.27.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:184f0d7b342967f6cda94a07d0e1fae177d11d0b8f17d73e06e36ac02889f303", size = 515863, upload-time = "2025-08-07T08:25:10.431Z" },
+    { url = "https://files.pythonhosted.org/packages/38/9b/9bd59dcc636cd04d86a2d20ad967770bf348f5eb5922a8f29b547c074243/rpds_py-0.27.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a00c91104c173c9043bc46f7b30ee5e6d2f6b1149f11f545580f5d6fdff42c0b", size = 402398, upload-time = "2025-08-07T08:25:11.819Z" },
+    { url = "https://files.pythonhosted.org/packages/71/bf/f099328c6c85667aba6b66fa5c35a8882db06dcd462ea214be72813a0dd2/rpds_py-0.27.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f7a37dd208f0d658e0487522078b1ed68cd6bce20ef4b5a915d2809b9094b410", size = 384665, upload-time = "2025-08-07T08:25:13.194Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/c5/9c1f03121ece6634818490bd3c8be2c82a70928a19de03467fb25a3ae2a8/rpds_py-0.27.0-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:92f3b3ec3e6008a1fe00b7c0946a170f161ac00645cde35e3c9a68c2475e8156", size = 400405, upload-time = "2025-08-07T08:25:14.417Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/b8/e25d54af3e63ac94f0c16d8fe143779fe71ff209445a0c00d0f6984b6b2c/rpds_py-0.27.0-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a1b3db5fae5cbce2131b7420a3f83553d4d89514c03d67804ced36161fe8b6b2", size = 413179, upload-time = "2025-08-07T08:25:15.664Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/d1/406b3316433fe49c3021546293a04bc33f1478e3ec7950215a7fce1a1208/rpds_py-0.27.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:5355527adaa713ab693cbce7c1e0ec71682f599f61b128cf19d07e5c13c9b1f1", size = 556895, upload-time = "2025-08-07T08:25:17.061Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/bc/3697c0c21fcb9a54d46ae3b735eb2365eea0c2be076b8f770f98e07998de/rpds_py-0.27.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:fcc01c57ce6e70b728af02b2401c5bc853a9e14eb07deda30624374f0aebfe42", size = 585464, upload-time = "2025-08-07T08:25:18.406Z" },
+    { url = "https://files.pythonhosted.org/packages/63/09/ee1bb5536f99f42c839b177d552f6114aa3142d82f49cef49261ed28dbe0/rpds_py-0.27.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:3001013dae10f806380ba739d40dee11db1ecb91684febb8406a87c2ded23dae", size = 555090, upload-time = "2025-08-07T08:25:20.461Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/2c/363eada9e89f7059199d3724135a86c47082cbf72790d6ba2f336d146ddb/rpds_py-0.27.0-cp314-cp314t-win32.whl", hash = "sha256:0f401c369186a5743694dd9fc08cba66cf70908757552e1f714bfc5219c655b5", size = 218001, upload-time = "2025-08-07T08:25:21.761Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/3f/d6c216ed5199c9ef79e2a33955601f454ed1e7420a93b89670133bca5ace/rpds_py-0.27.0-cp314-cp314t-win_amd64.whl", hash = "sha256:8a1dca5507fa1337f75dcd5070218b20bc68cf8844271c923c1b79dfcbc20391", size = 230993, upload-time = "2025-08-07T08:25:23.34Z" },
 ]
 
 [[package]]
@@ -4332,31 +5424,55 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/31/d8/de873d1c1b020d668d8ec9855d390764cb90cf8f6486c0983da52be8b7b7/ruff-0.9.9-py3-none-win_arm64.whl", hash = "sha256:3ac78f127517209fe6d96ab00f3ba97cafe38718b23b1db3e96d8b2d39e37ddf", size = 10435860, upload-time = "2025-02-28T10:16:39.481Z" },
 ]
 
+[[package]]
+name = "s3transfer"
+version = "0.14.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "botocore" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/62/74/8d69dcb7a9efe8baa2046891735e5dfe433ad558ae23d9e3c14c633d1d58/s3transfer-0.14.0.tar.gz", hash = "sha256:eff12264e7c8b4985074ccce27a3b38a485bb7f7422cc8046fee9be4983e4125", size = 151547, upload-time = "2025-09-09T19:23:31.089Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/48/f0/ae7ca09223a81a1d890b2557186ea015f6e0502e9b8cb8e1813f1d8cfa4e/s3transfer-0.14.0-py3-none-any.whl", hash = "sha256:ea3b790c7077558ed1f02a3072fb3cb992bbbd253392f4b6e9e8976941c7d456", size = 85712, upload-time = "2025-09-09T19:23:30.041Z" },
+]
+
+[[package]]
+name = "safehttpx"
+version = "0.1.6"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "httpx" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/67/4c/19db75e6405692b2a96af8f06d1258f8aa7290bdc35ac966f03e207f6d7f/safehttpx-0.1.6.tar.gz", hash = "sha256:b356bfc82cee3a24c395b94a2dbeabbed60aff1aa5fa3b5fe97c4f2456ebce42", size = 9987, upload-time = "2024-12-02T18:44:10.226Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/4d/c0/1108ad9f01567f66b3154063605b350b69c3c9366732e09e45f9fd0d1deb/safehttpx-0.1.6-py3-none-any.whl", hash = "sha256:407cff0b410b071623087c63dd2080c3b44dc076888d8c5823c00d1e58cb381c", size = 8692, upload-time = "2024-12-02T18:44:08.555Z" },
+]
+
 [[package]]
 name = "safetensors"
-version = "0.5.3"
+version = "0.6.2"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/71/7e/2d5d6ee7b40c0682315367ec7475693d110f512922d582fef1bd4a63adc3/safetensors-0.5.3.tar.gz", hash = "sha256:b6b0d6ecacec39a4fdd99cc19f4576f5219ce858e6fd8dbe7609df0b8dc56965", size = 67210, upload-time = "2025-02-26T09:15:13.155Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/ac/cc/738f3011628920e027a11754d9cae9abec1aed00f7ae860abbf843755233/safetensors-0.6.2.tar.gz", hash = "sha256:43ff2aa0e6fa2dc3ea5524ac7ad93a9839256b8703761e76e2d0b2a3fa4f15d9", size = 197968, upload-time = "2025-08-08T13:13:58.654Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/18/ae/88f6c49dbd0cc4da0e08610019a3c78a7d390879a919411a410a1876d03a/safetensors-0.5.3-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:bd20eb133db8ed15b40110b7c00c6df51655a2998132193de2f75f72d99c7073", size = 436917, upload-time = "2025-02-26T09:15:03.702Z" },
-    { url = "https://files.pythonhosted.org/packages/b8/3b/11f1b4a2f5d2ab7da34ecc062b0bc301f2be024d110a6466726bec8c055c/safetensors-0.5.3-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:21d01c14ff6c415c485616b8b0bf961c46b3b343ca59110d38d744e577f9cce7", size = 418419, upload-time = "2025-02-26T09:15:01.765Z" },
-    { url = "https://files.pythonhosted.org/packages/5d/9a/add3e6fef267658075c5a41573c26d42d80c935cdc992384dfae435feaef/safetensors-0.5.3-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:11bce6164887cd491ca75c2326a113ba934be596e22b28b1742ce27b1d076467", size = 459493, upload-time = "2025-02-26T09:14:51.812Z" },
-    { url = "https://files.pythonhosted.org/packages/df/5c/bf2cae92222513cc23b3ff85c4a1bb2811a2c3583ac0f8e8d502751de934/safetensors-0.5.3-cp38-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4a243be3590bc3301c821da7a18d87224ef35cbd3e5f5727e4e0728b8172411e", size = 472400, upload-time = "2025-02-26T09:14:53.549Z" },
-    { url = "https://files.pythonhosted.org/packages/58/11/7456afb740bd45782d0f4c8e8e1bb9e572f1bf82899fb6ace58af47b4282/safetensors-0.5.3-cp38-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8bd84b12b1670a6f8e50f01e28156422a2bc07fb16fc4e98bded13039d688a0d", size = 522891, upload-time = "2025-02-26T09:14:55.717Z" },
-    { url = "https://files.pythonhosted.org/packages/57/3d/fe73a9d2ace487e7285f6e157afee2383bd1ddb911b7cb44a55cf812eae3/safetensors-0.5.3-cp38-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:391ac8cab7c829452175f871fcaf414aa1e292b5448bd02620f675a7f3e7abb9", size = 537694, upload-time = "2025-02-26T09:14:57.036Z" },
-    { url = "https://files.pythonhosted.org/packages/a6/f8/dae3421624fcc87a89d42e1898a798bc7ff72c61f38973a65d60df8f124c/safetensors-0.5.3-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cead1fa41fc54b1e61089fa57452e8834f798cb1dc7a09ba3524f1eb08e0317a", size = 471642, upload-time = "2025-02-26T09:15:00.544Z" },
-    { url = "https://files.pythonhosted.org/packages/ce/20/1fbe16f9b815f6c5a672f5b760951e20e17e43f67f231428f871909a37f6/safetensors-0.5.3-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1077f3e94182d72618357b04b5ced540ceb71c8a813d3319f1aba448e68a770d", size = 502241, upload-time = "2025-02-26T09:14:58.303Z" },
-    { url = "https://files.pythonhosted.org/packages/5f/18/8e108846b506487aa4629fe4116b27db65c3dde922de2c8e0cc1133f3f29/safetensors-0.5.3-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:799021e78287bac619c7b3f3606730a22da4cda27759ddf55d37c8db7511c74b", size = 638001, upload-time = "2025-02-26T09:15:05.79Z" },
-    { url = "https://files.pythonhosted.org/packages/82/5a/c116111d8291af6c8c8a8b40628fe833b9db97d8141c2a82359d14d9e078/safetensors-0.5.3-cp38-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:df26da01aaac504334644e1b7642fa000bfec820e7cef83aeac4e355e03195ff", size = 734013, upload-time = "2025-02-26T09:15:07.892Z" },
-    { url = "https://files.pythonhosted.org/packages/7d/ff/41fcc4d3b7de837963622e8610d998710705bbde9a8a17221d85e5d0baad/safetensors-0.5.3-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:32c3ef2d7af8b9f52ff685ed0bc43913cdcde135089ae322ee576de93eae5135", size = 670687, upload-time = "2025-02-26T09:15:09.979Z" },
-    { url = "https://files.pythonhosted.org/packages/40/ad/2b113098e69c985a3d8fbda4b902778eae4a35b7d5188859b4a63d30c161/safetensors-0.5.3-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:37f1521be045e56fc2b54c606d4455573e717b2d887c579ee1dbba5f868ece04", size = 643147, upload-time = "2025-02-26T09:15:11.185Z" },
-    { url = "https://files.pythonhosted.org/packages/0a/0c/95aeb51d4246bd9a3242d3d8349c1112b4ee7611a4b40f0c5c93b05f001d/safetensors-0.5.3-cp38-abi3-win32.whl", hash = "sha256:cfc0ec0846dcf6763b0ed3d1846ff36008c6e7290683b61616c4b040f6a54ace", size = 296677, upload-time = "2025-02-26T09:15:16.554Z" },
-    { url = "https://files.pythonhosted.org/packages/69/e2/b011c38e5394c4c18fb5500778a55ec43ad6106126e74723ffaee246f56e/safetensors-0.5.3-cp38-abi3-win_amd64.whl", hash = "sha256:836cbbc320b47e80acd40e44c8682db0e8ad7123209f69b093def21ec7cafd11", size = 308878, upload-time = "2025-02-26T09:15:14.99Z" },
+    { url = "https://files.pythonhosted.org/packages/4d/b1/3f5fd73c039fc87dba3ff8b5d528bfc5a32b597fea8e7a6a4800343a17c7/safetensors-0.6.2-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:9c85ede8ec58f120bad982ec47746981e210492a6db876882aa021446af8ffba", size = 454797, upload-time = "2025-08-08T13:13:52.066Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/c9/bb114c158540ee17907ec470d01980957fdaf87b4aa07914c24eba87b9c6/safetensors-0.6.2-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:d6675cf4b39c98dbd7d940598028f3742e0375a6b4d4277e76beb0c35f4b843b", size = 432206, upload-time = "2025-08-08T13:13:50.931Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/8e/f70c34e47df3110e8e0bb268d90db8d4be8958a54ab0336c9be4fe86dac8/safetensors-0.6.2-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d2d2b3ce1e2509c68932ca03ab8f20570920cd9754b05063d4368ee52833ecd", size = 473261, upload-time = "2025-08-08T13:13:41.259Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/f5/be9c6a7c7ef773e1996dc214e73485286df1836dbd063e8085ee1976f9cb/safetensors-0.6.2-cp38-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:93de35a18f46b0f5a6a1f9e26d91b442094f2df02e9fd7acf224cfec4238821a", size = 485117, upload-time = "2025-08-08T13:13:43.506Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/55/23f2d0a2c96ed8665bf17a30ab4ce5270413f4d74b6d87dd663258b9af31/safetensors-0.6.2-cp38-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:89a89b505f335640f9120fac65ddeb83e40f1fd081cb8ed88b505bdccec8d0a1", size = 616154, upload-time = "2025-08-08T13:13:45.096Z" },
+    { url = "https://files.pythonhosted.org/packages/98/c6/affb0bd9ce02aa46e7acddbe087912a04d953d7a4d74b708c91b5806ef3f/safetensors-0.6.2-cp38-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fc4d0d0b937e04bdf2ae6f70cd3ad51328635fe0e6214aa1fc811f3b576b3bda", size = 520713, upload-time = "2025-08-08T13:13:46.25Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/5d/5a514d7b88e310c8b146e2404e0dc161282e78634d9358975fd56dfd14be/safetensors-0.6.2-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8045db2c872db8f4cbe3faa0495932d89c38c899c603f21e9b6486951a5ecb8f", size = 485835, upload-time = "2025-08-08T13:13:49.373Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/7b/4fc3b2ba62c352b2071bea9cfbad330fadda70579f617506ae1a2f129cab/safetensors-0.6.2-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:81e67e8bab9878bb568cffbc5f5e655adb38d2418351dc0859ccac158f753e19", size = 521503, upload-time = "2025-08-08T13:13:47.651Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/50/0057e11fe1f3cead9254315a6c106a16dd4b1a19cd247f7cc6414f6b7866/safetensors-0.6.2-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:b0e4d029ab0a0e0e4fdf142b194514695b1d7d3735503ba700cf36d0fc7136ce", size = 652256, upload-time = "2025-08-08T13:13:53.167Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/29/473f789e4ac242593ac1656fbece6e1ecd860bb289e635e963667807afe3/safetensors-0.6.2-cp38-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:fa48268185c52bfe8771e46325a1e21d317207bcabcb72e65c6e28e9ffeb29c7", size = 747281, upload-time = "2025-08-08T13:13:54.656Z" },
+    { url = "https://files.pythonhosted.org/packages/68/52/f7324aad7f2df99e05525c84d352dc217e0fa637a4f603e9f2eedfbe2c67/safetensors-0.6.2-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:d83c20c12c2d2f465997c51b7ecb00e407e5f94d7dec3ea0cc11d86f60d3fde5", size = 692286, upload-time = "2025-08-08T13:13:55.884Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/fe/cad1d9762868c7c5dc70c8620074df28ebb1a8e4c17d4c0cb031889c457e/safetensors-0.6.2-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:d944cea65fad0ead848b6ec2c37cc0b197194bec228f8020054742190e9312ac", size = 655957, upload-time = "2025-08-08T13:13:57.029Z" },
+    { url = "https://files.pythonhosted.org/packages/59/a7/e2158e17bbe57d104f0abbd95dff60dda916cf277c9f9663b4bf9bad8b6e/safetensors-0.6.2-cp38-abi3-win32.whl", hash = "sha256:cab75ca7c064d3911411461151cb69380c9225798a20e712b102edda2542ddb1", size = 308926, upload-time = "2025-08-08T13:14:01.095Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/c3/c0be1135726618dc1e28d181b8c442403d8dbb9e273fd791de2d4384bcdd/safetensors-0.6.2-cp38-abi3-win_amd64.whl", hash = "sha256:c7b214870df923cbc1593c3faee16bec59ea462758699bd3fee399d00aac072c", size = 320192, upload-time = "2025-08-08T13:13:59.467Z" },
 ]
 
 [[package]]
 name = "scikit-learn"
-version = "1.7.0"
+version = "1.7.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "joblib" },
@@ -4364,133 +5480,207 @@ dependencies = [
     { name = "scipy" },
     { name = "threadpoolctl" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/df/3b/29fa87e76b1d7b3b77cc1fcbe82e6e6b8cd704410705b008822de530277c/scikit_learn-1.7.0.tar.gz", hash = "sha256:c01e869b15aec88e2cdb73d27f15bdbe03bce8e2fb43afbe77c45d399e73a5a3", size = 7178217, upload-time = "2025-06-05T22:02:46.703Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/41/84/5f4af978fff619706b8961accac84780a6d298d82a8873446f72edb4ead0/scikit_learn-1.7.1.tar.gz", hash = "sha256:24b3f1e976a4665aa74ee0fcaac2b8fccc6ae77c8e07ab25da3ba6d3292b9802", size = 7190445, upload-time = "2025-07-18T08:01:54.5Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/70/3a/bffab14e974a665a3ee2d79766e7389572ffcaad941a246931c824afcdb2/scikit_learn-1.7.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c2c7243d34aaede0efca7a5a96d67fddaebb4ad7e14a70991b9abee9dc5c0379", size = 11646758, upload-time = "2025-06-05T22:02:09.51Z" },
-    { url = "https://files.pythonhosted.org/packages/58/d8/f3249232fa79a70cb40595282813e61453c1e76da3e1a44b77a63dd8d0cb/scikit_learn-1.7.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:9f39f6a811bf3f15177b66c82cbe0d7b1ebad9f190737dcdef77cfca1ea3c19c", size = 10673971, upload-time = "2025-06-05T22:02:12.217Z" },
-    { url = "https://files.pythonhosted.org/packages/67/93/eb14c50533bea2f77758abe7d60a10057e5f2e2cdcf0a75a14c6bc19c734/scikit_learn-1.7.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:63017a5f9a74963d24aac7590287149a8d0f1a0799bbe7173c0d8ba1523293c0", size = 11818428, upload-time = "2025-06-05T22:02:14.947Z" },
-    { url = "https://files.pythonhosted.org/packages/08/17/804cc13b22a8663564bb0b55fb89e661a577e4e88a61a39740d58b909efe/scikit_learn-1.7.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b2f8a0b1e73e9a08b7cc498bb2aeab36cdc1f571f8ab2b35c6e5d1c7115d97d", size = 12505887, upload-time = "2025-06-05T22:02:17.824Z" },
-    { url = "https://files.pythonhosted.org/packages/68/c7/4e956281a077f4835458c3f9656c666300282d5199039f26d9de1dabd9be/scikit_learn-1.7.0-cp312-cp312-win_amd64.whl", hash = "sha256:34cc8d9d010d29fb2b7cbcd5ccc24ffdd80515f65fe9f1e4894ace36b267ce19", size = 10668129, upload-time = "2025-06-05T22:02:20.536Z" },
-    { url = "https://files.pythonhosted.org/packages/9a/c3/a85dcccdaf1e807e6f067fa95788a6485b0491d9ea44fd4c812050d04f45/scikit_learn-1.7.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5b7974f1f32bc586c90145df51130e02267e4b7e77cab76165c76cf43faca0d9", size = 11559841, upload-time = "2025-06-05T22:02:23.308Z" },
-    { url = "https://files.pythonhosted.org/packages/d8/57/eea0de1562cc52d3196eae51a68c5736a31949a465f0b6bb3579b2d80282/scikit_learn-1.7.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:014e07a23fe02e65f9392898143c542a50b6001dbe89cb867e19688e468d049b", size = 10616463, upload-time = "2025-06-05T22:02:26.068Z" },
-    { url = "https://files.pythonhosted.org/packages/10/a4/39717ca669296dfc3a62928393168da88ac9d8cbec88b6321ffa62c6776f/scikit_learn-1.7.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e7e7ced20582d3a5516fb6f405fd1d254e1f5ce712bfef2589f51326af6346e8", size = 11766512, upload-time = "2025-06-05T22:02:28.689Z" },
-    { url = "https://files.pythonhosted.org/packages/d5/cd/a19722241d5f7b51e08351e1e82453e0057aeb7621b17805f31fcb57bb6c/scikit_learn-1.7.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1babf2511e6ffd695da7a983b4e4d6de45dce39577b26b721610711081850906", size = 12461075, upload-time = "2025-06-05T22:02:31.233Z" },
-    { url = "https://files.pythonhosted.org/packages/f3/bc/282514272815c827a9acacbe5b99f4f1a4bc5961053719d319480aee0812/scikit_learn-1.7.0-cp313-cp313-win_amd64.whl", hash = "sha256:5abd2acff939d5bd4701283f009b01496832d50ddafa83c90125a4e41c33e314", size = 10652517, upload-time = "2025-06-05T22:02:34.139Z" },
-    { url = "https://files.pythonhosted.org/packages/ea/78/7357d12b2e4c6674175f9a09a3ba10498cde8340e622715bcc71e532981d/scikit_learn-1.7.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:e39d95a929b112047c25b775035c8c234c5ca67e681ce60d12413afb501129f7", size = 12111822, upload-time = "2025-06-05T22:02:36.904Z" },
-    { url = "https://files.pythonhosted.org/packages/d0/0c/9c3715393343f04232f9d81fe540eb3831d0b4ec351135a145855295110f/scikit_learn-1.7.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:0521cb460426c56fee7e07f9365b0f45ec8ca7b2d696534ac98bfb85e7ae4775", size = 11325286, upload-time = "2025-06-05T22:02:39.739Z" },
-    { url = "https://files.pythonhosted.org/packages/64/e0/42282ad3dd70b7c1a5f65c412ac3841f6543502a8d6263cae7b466612dc9/scikit_learn-1.7.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:317ca9f83acbde2883bd6bb27116a741bfcb371369706b4f9973cf30e9a03b0d", size = 12380865, upload-time = "2025-06-05T22:02:42.137Z" },
-    { url = "https://files.pythonhosted.org/packages/4e/d0/3ef4ab2c6be4aa910445cd09c5ef0b44512e3de2cfb2112a88bb647d2cf7/scikit_learn-1.7.0-cp313-cp313t-win_amd64.whl", hash = "sha256:126c09740a6f016e815ab985b21e3a0656835414521c81fc1a8da78b679bdb75", size = 11549609, upload-time = "2025-06-05T22:02:44.483Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/16/57f176585b35ed865f51b04117947fe20f130f78940c6477b6d66279c9c2/scikit_learn-1.7.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:3cee419b49b5bbae8796ecd690f97aa412ef1674410c23fc3257c6b8b85b8087", size = 9260431, upload-time = "2025-07-18T08:01:22.77Z" },
+    { url = "https://files.pythonhosted.org/packages/67/4e/899317092f5efcab0e9bc929e3391341cec8fb0e816c4789686770024580/scikit_learn-1.7.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:2fd8b8d35817b0d9ebf0b576f7d5ffbbabdb55536b0655a8aaae629d7ffd2e1f", size = 8637191, upload-time = "2025-07-18T08:01:24.731Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/1b/998312db6d361ded1dd56b457ada371a8d8d77ca2195a7d18fd8a1736f21/scikit_learn-1.7.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:588410fa19a96a69763202f1d6b7b91d5d7a5d73be36e189bc6396bfb355bd87", size = 9486346, upload-time = "2025-07-18T08:01:26.713Z" },
+    { url = "https://files.pythonhosted.org/packages/ad/09/a2aa0b4e644e5c4ede7006748f24e72863ba2ae71897fecfd832afea01b4/scikit_learn-1.7.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e3142f0abe1ad1d1c31a2ae987621e41f6b578144a911ff4ac94781a583adad7", size = 9290988, upload-time = "2025-07-18T08:01:28.938Z" },
+    { url = "https://files.pythonhosted.org/packages/15/fa/c61a787e35f05f17fc10523f567677ec4eeee5f95aa4798dbbbcd9625617/scikit_learn-1.7.1-cp312-cp312-win_amd64.whl", hash = "sha256:3ddd9092c1bd469acab337d87930067c87eac6bd544f8d5027430983f1e1ae88", size = 8735568, upload-time = "2025-07-18T08:01:30.936Z" },
+    { url = "https://files.pythonhosted.org/packages/52/f8/e0533303f318a0f37b88300d21f79b6ac067188d4824f1047a37214ab718/scikit_learn-1.7.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b7839687fa46d02e01035ad775982f2470be2668e13ddd151f0f55a5bf123bae", size = 9213143, upload-time = "2025-07-18T08:01:32.942Z" },
+    { url = "https://files.pythonhosted.org/packages/71/f3/f1df377d1bdfc3e3e2adc9c119c238b182293e6740df4cbeac6de2cc3e23/scikit_learn-1.7.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:a10f276639195a96c86aa572ee0698ad64ee939a7b042060b98bd1930c261d10", size = 8591977, upload-time = "2025-07-18T08:01:34.967Z" },
+    { url = "https://files.pythonhosted.org/packages/99/72/c86a4cd867816350fe8dee13f30222340b9cd6b96173955819a5561810c5/scikit_learn-1.7.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:13679981fdaebc10cc4c13c43344416a86fcbc61449cb3e6517e1df9d12c8309", size = 9436142, upload-time = "2025-07-18T08:01:37.397Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/66/277967b29bd297538dc7a6ecfb1a7dce751beabd0d7f7a2233be7a4f7832/scikit_learn-1.7.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4f1262883c6a63f067a980a8cdd2d2e7f2513dddcef6a9eaada6416a7a7cbe43", size = 9282996, upload-time = "2025-07-18T08:01:39.721Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/47/9291cfa1db1dae9880420d1e07dbc7e8dd4a7cdbc42eaba22512e6bde958/scikit_learn-1.7.1-cp313-cp313-win_amd64.whl", hash = "sha256:ca6d31fb10e04d50bfd2b50d66744729dbb512d4efd0223b864e2fdbfc4cee11", size = 8707418, upload-time = "2025-07-18T08:01:42.124Z" },
+    { url = "https://files.pythonhosted.org/packages/61/95/45726819beccdaa34d3362ea9b2ff9f2b5d3b8bf721bd632675870308ceb/scikit_learn-1.7.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:781674d096303cfe3d351ae6963ff7c958db61cde3421cd490e3a5a58f2a94ae", size = 9561466, upload-time = "2025-07-18T08:01:44.195Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/1c/6f4b3344805de783d20a51eb24d4c9ad4b11a7f75c1801e6ec6d777361fd/scikit_learn-1.7.1-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:10679f7f125fe7ecd5fad37dd1aa2daae7e3ad8df7f3eefa08901b8254b3e12c", size = 9040467, upload-time = "2025-07-18T08:01:46.671Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/80/abe18fe471af9f1d181904203d62697998b27d9b62124cd281d740ded2f9/scikit_learn-1.7.1-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1f812729e38c8cb37f760dce71a9b83ccfb04f59b3dca7c6079dcdc60544fa9e", size = 9532052, upload-time = "2025-07-18T08:01:48.676Z" },
+    { url = "https://files.pythonhosted.org/packages/14/82/b21aa1e0c4cee7e74864d3a5a721ab8fcae5ca55033cb6263dca297ed35b/scikit_learn-1.7.1-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:88e1a20131cf741b84b89567e1717f27a2ced228e0f29103426102bc2e3b8ef7", size = 9361575, upload-time = "2025-07-18T08:01:50.639Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/20/f4777fcd5627dc6695fa6b92179d0edb7a3ac1b91bcd9a1c7f64fa7ade23/scikit_learn-1.7.1-cp313-cp313t-win_amd64.whl", hash = "sha256:b1bd1d919210b6a10b7554b717c9000b5485aa95a1d0f177ae0d7ee8ec750da5", size = 9277310, upload-time = "2025-07-18T08:01:52.547Z" },
 ]
 
 [[package]]
 name = "scipy"
-version = "1.16.0"
+version = "1.16.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "numpy" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/81/18/b06a83f0c5ee8cddbde5e3f3d0bb9b702abfa5136ef6d4620ff67df7eee5/scipy-1.16.0.tar.gz", hash = "sha256:b5ef54021e832869c8cfb03bc3bf20366cbcd426e02a58e8a58d7584dfbb8f62", size = 30581216, upload-time = "2025-06-22T16:27:55.782Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/01/c0/c943bc8d2bbd28123ad0f4f1eef62525fa1723e84d136b32965dcb6bad3a/scipy-1.16.0-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:7eb6bd33cef4afb9fa5f1fb25df8feeb1e52d94f21a44f1d17805b41b1da3180", size = 36459071, upload-time = "2025-06-22T16:19:06.605Z" },
-    { url = "https://files.pythonhosted.org/packages/99/0d/270e2e9f1a4db6ffbf84c9a0b648499842046e4e0d9b2275d150711b3aba/scipy-1.16.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:1dbc8fdba23e4d80394ddfab7a56808e3e6489176d559c6c71935b11a2d59db1", size = 28490500, upload-time = "2025-06-22T16:19:11.775Z" },
-    { url = "https://files.pythonhosted.org/packages/1c/22/01d7ddb07cff937d4326198ec8d10831367a708c3da72dfd9b7ceaf13028/scipy-1.16.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:7dcf42c380e1e3737b343dec21095c9a9ad3f9cbe06f9c05830b44b1786c9e90", size = 20762345, upload-time = "2025-06-22T16:19:15.813Z" },
-    { url = "https://files.pythonhosted.org/packages/34/7f/87fd69856569ccdd2a5873fe5d7b5bbf2ad9289d7311d6a3605ebde3a94b/scipy-1.16.0-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:26ec28675f4a9d41587266084c626b02899db373717d9312fa96ab17ca1ae94d", size = 23418563, upload-time = "2025-06-22T16:19:20.746Z" },
-    { url = "https://files.pythonhosted.org/packages/f6/f1/e4f4324fef7f54160ab749efbab6a4bf43678a9eb2e9817ed71a0a2fd8de/scipy-1.16.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:952358b7e58bd3197cfbd2f2f2ba829f258404bdf5db59514b515a8fe7a36c52", size = 33203951, upload-time = "2025-06-22T16:19:25.813Z" },
-    { url = "https://files.pythonhosted.org/packages/6d/f0/b6ac354a956384fd8abee2debbb624648125b298f2c4a7b4f0d6248048a5/scipy-1.16.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:03931b4e870c6fef5b5c0970d52c9f6ddd8c8d3e934a98f09308377eba6f3824", size = 35070225, upload-time = "2025-06-22T16:19:31.416Z" },
-    { url = "https://files.pythonhosted.org/packages/e5/73/5cbe4a3fd4bc3e2d67ffad02c88b83edc88f381b73ab982f48f3df1a7790/scipy-1.16.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:512c4f4f85912767c351a0306824ccca6fd91307a9f4318efe8fdbd9d30562ef", size = 35389070, upload-time = "2025-06-22T16:19:37.387Z" },
-    { url = "https://files.pythonhosted.org/packages/86/e8/a60da80ab9ed68b31ea5a9c6dfd3c2f199347429f229bf7f939a90d96383/scipy-1.16.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e69f798847e9add03d512eaf5081a9a5c9a98757d12e52e6186ed9681247a1ac", size = 37825287, upload-time = "2025-06-22T16:19:43.375Z" },
-    { url = "https://files.pythonhosted.org/packages/ea/b5/29fece1a74c6a94247f8a6fb93f5b28b533338e9c34fdcc9cfe7a939a767/scipy-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:adf9b1999323ba335adc5d1dc7add4781cb5a4b0ef1e98b79768c05c796c4e49", size = 38431929, upload-time = "2025-06-22T16:19:49.385Z" },
-    { url = "https://files.pythonhosted.org/packages/46/95/0746417bc24be0c2a7b7563946d61f670a3b491b76adede420e9d173841f/scipy-1.16.0-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:e9f414cbe9ca289a73e0cc92e33a6a791469b6619c240aa32ee18abdce8ab451", size = 36418162, upload-time = "2025-06-22T16:19:56.3Z" },
-    { url = "https://files.pythonhosted.org/packages/19/5a/914355a74481b8e4bbccf67259bbde171348a3f160b67b4945fbc5f5c1e5/scipy-1.16.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:bbba55fb97ba3cdef9b1ee973f06b09d518c0c7c66a009c729c7d1592be1935e", size = 28465985, upload-time = "2025-06-22T16:20:01.238Z" },
-    { url = "https://files.pythonhosted.org/packages/58/46/63477fc1246063855969cbefdcee8c648ba4b17f67370bd542ba56368d0b/scipy-1.16.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:58e0d4354eacb6004e7aa1cd350e5514bd0270acaa8d5b36c0627bb3bb486974", size = 20737961, upload-time = "2025-06-22T16:20:05.913Z" },
-    { url = "https://files.pythonhosted.org/packages/93/86/0fbb5588b73555e40f9d3d6dde24ee6fac7d8e301a27f6f0cab9d8f66ff2/scipy-1.16.0-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:75b2094ec975c80efc273567436e16bb794660509c12c6a31eb5c195cbf4b6dc", size = 23377941, upload-time = "2025-06-22T16:20:10.668Z" },
-    { url = "https://files.pythonhosted.org/packages/ca/80/a561f2bf4c2da89fa631b3cbf31d120e21ea95db71fd9ec00cb0247c7a93/scipy-1.16.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:6b65d232157a380fdd11a560e7e21cde34fdb69d65c09cb87f6cc024ee376351", size = 33196703, upload-time = "2025-06-22T16:20:16.097Z" },
-    { url = "https://files.pythonhosted.org/packages/11/6b/3443abcd0707d52e48eb315e33cc669a95e29fc102229919646f5a501171/scipy-1.16.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1d8747f7736accd39289943f7fe53a8333be7f15a82eea08e4afe47d79568c32", size = 35083410, upload-time = "2025-06-22T16:20:21.734Z" },
-    { url = "https://files.pythonhosted.org/packages/20/ab/eb0fc00e1e48961f1bd69b7ad7e7266896fe5bad4ead91b5fc6b3561bba4/scipy-1.16.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:eb9f147a1b8529bb7fec2a85cf4cf42bdfadf9e83535c309a11fdae598c88e8b", size = 35387829, upload-time = "2025-06-22T16:20:27.548Z" },
-    { url = "https://files.pythonhosted.org/packages/57/9e/d6fc64e41fad5d481c029ee5a49eefc17f0b8071d636a02ceee44d4a0de2/scipy-1.16.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d2b83c37edbfa837a8923d19c749c1935ad3d41cf196006a24ed44dba2ec4358", size = 37841356, upload-time = "2025-06-22T16:20:35.112Z" },
-    { url = "https://files.pythonhosted.org/packages/7c/a7/4c94bbe91f12126b8bf6709b2471900577b7373a4fd1f431f28ba6f81115/scipy-1.16.0-cp313-cp313-win_amd64.whl", hash = "sha256:79a3c13d43c95aa80b87328a46031cf52508cf5f4df2767602c984ed1d3c6bbe", size = 38403710, upload-time = "2025-06-22T16:21:54.473Z" },
-    { url = "https://files.pythonhosted.org/packages/47/20/965da8497f6226e8fa90ad3447b82ed0e28d942532e92dd8b91b43f100d4/scipy-1.16.0-cp313-cp313t-macosx_10_14_x86_64.whl", hash = "sha256:f91b87e1689f0370690e8470916fe1b2308e5b2061317ff76977c8f836452a47", size = 36813833, upload-time = "2025-06-22T16:20:43.925Z" },
-    { url = "https://files.pythonhosted.org/packages/28/f4/197580c3dac2d234e948806e164601c2df6f0078ed9f5ad4a62685b7c331/scipy-1.16.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:88a6ca658fb94640079e7a50b2ad3b67e33ef0f40e70bdb7dc22017dae73ac08", size = 28974431, upload-time = "2025-06-22T16:20:51.302Z" },
-    { url = "https://files.pythonhosted.org/packages/8a/fc/e18b8550048d9224426e76906694c60028dbdb65d28b1372b5503914b89d/scipy-1.16.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:ae902626972f1bd7e4e86f58fd72322d7f4ec7b0cfc17b15d4b7006efc385176", size = 21246454, upload-time = "2025-06-22T16:20:57.276Z" },
-    { url = "https://files.pythonhosted.org/packages/8c/48/07b97d167e0d6a324bfd7484cd0c209cc27338b67e5deadae578cf48e809/scipy-1.16.0-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:8cb824c1fc75ef29893bc32b3ddd7b11cf9ab13c1127fe26413a05953b8c32ed", size = 23772979, upload-time = "2025-06-22T16:21:03.363Z" },
-    { url = "https://files.pythonhosted.org/packages/4c/4f/9efbd3f70baf9582edf271db3002b7882c875ddd37dc97f0f675ad68679f/scipy-1.16.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:de2db7250ff6514366a9709c2cba35cb6d08498e961cba20d7cff98a7ee88938", size = 33341972, upload-time = "2025-06-22T16:21:11.14Z" },
-    { url = "https://files.pythonhosted.org/packages/3f/dc/9e496a3c5dbe24e76ee24525155ab7f659c20180bab058ef2c5fa7d9119c/scipy-1.16.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e85800274edf4db8dd2e4e93034f92d1b05c9421220e7ded9988b16976f849c1", size = 35185476, upload-time = "2025-06-22T16:21:19.156Z" },
-    { url = "https://files.pythonhosted.org/packages/ce/b3/21001cff985a122ba434c33f2c9d7d1dc3b669827e94f4fc4e1fe8b9dfd8/scipy-1.16.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:4f720300a3024c237ace1cb11f9a84c38beb19616ba7c4cdcd771047a10a1706", size = 35570990, upload-time = "2025-06-22T16:21:27.797Z" },
-    { url = "https://files.pythonhosted.org/packages/e5/d3/7ba42647d6709251cdf97043d0c107e0317e152fa2f76873b656b509ff55/scipy-1.16.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:aad603e9339ddb676409b104c48a027e9916ce0d2838830691f39552b38a352e", size = 37950262, upload-time = "2025-06-22T16:21:36.976Z" },
-    { url = "https://files.pythonhosted.org/packages/eb/c4/231cac7a8385394ebbbb4f1ca662203e9d8c332825ab4f36ffc3ead09a42/scipy-1.16.0-cp313-cp313t-win_amd64.whl", hash = "sha256:f56296fefca67ba605fd74d12f7bd23636267731a72cb3947963e76b8c0a25db", size = 38515076, upload-time = "2025-06-22T16:21:45.694Z" },
+sdist = { url = "https://files.pythonhosted.org/packages/f5/4a/b927028464795439faec8eaf0b03b011005c487bb2d07409f28bf30879c4/scipy-1.16.1.tar.gz", hash = "sha256:44c76f9e8b6e8e488a586190ab38016e4ed2f8a038af7cd3defa903c0a2238b3", size = 30580861, upload-time = "2025-07-27T16:33:30.834Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f8/d9/ec4864f5896232133f51382b54a08de91a9d1af7a76dfa372894026dfee2/scipy-1.16.1-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:81b433bbeaf35728dad619afc002db9b189e45eebe2cd676effe1fb93fef2b9c", size = 36575194, upload-time = "2025-07-27T16:27:41.321Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/6d/40e81ecfb688e9d25d34a847dca361982a6addf8e31f0957b1a54fbfa994/scipy-1.16.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:886cc81fdb4c6903a3bb0464047c25a6d1016fef77bb97949817d0c0d79f9e04", size = 28594590, upload-time = "2025-07-27T16:27:49.204Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/37/9f65178edfcc629377ce9a64fc09baebea18c80a9e57ae09a52edf84880b/scipy-1.16.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:15240c3aac087a522b4eaedb09f0ad061753c5eebf1ea430859e5bf8640d5919", size = 20866458, upload-time = "2025-07-27T16:27:54.98Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/7b/749a66766871ea4cb1d1ea10f27004db63023074c22abed51f22f09770e0/scipy-1.16.1-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:65f81a25805f3659b48126b5053d9e823d3215e4a63730b5e1671852a1705921", size = 23539318, upload-time = "2025-07-27T16:28:01.604Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/db/8d4afec60eb833a666434d4541a3151eedbf2494ea6d4d468cbe877f00cd/scipy-1.16.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:6c62eea7f607f122069b9bad3f99489ddca1a5173bef8a0c75555d7488b6f725", size = 33292899, upload-time = "2025-07-27T16:28:09.147Z" },
+    { url = "https://files.pythonhosted.org/packages/51/1e/79023ca3bbb13a015d7d2757ecca3b81293c663694c35d6541b4dca53e98/scipy-1.16.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f965bbf3235b01c776115ab18f092a95aa74c271a52577bcb0563e85738fd618", size = 35162637, upload-time = "2025-07-27T16:28:17.535Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/49/0648665f9c29fdaca4c679182eb972935b3b4f5ace41d323c32352f29816/scipy-1.16.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f006e323874ffd0b0b816d8c6a8e7f9a73d55ab3b8c3f72b752b226d0e3ac83d", size = 35490507, upload-time = "2025-07-27T16:28:25.705Z" },
+    { url = "https://files.pythonhosted.org/packages/62/8f/66cbb9d6bbb18d8c658f774904f42a92078707a7c71e5347e8bf2f52bb89/scipy-1.16.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e8fd15fc5085ab4cca74cb91fe0a4263b1f32e4420761ddae531ad60934c2119", size = 37923998, upload-time = "2025-07-27T16:28:34.339Z" },
+    { url = "https://files.pythonhosted.org/packages/14/c3/61f273ae550fbf1667675701112e380881905e28448c080b23b5a181df7c/scipy-1.16.1-cp312-cp312-win_amd64.whl", hash = "sha256:f7b8013c6c066609577d910d1a2a077021727af07b6fab0ee22c2f901f22352a", size = 38508060, upload-time = "2025-07-27T16:28:43.242Z" },
+    { url = "https://files.pythonhosted.org/packages/93/0b/b5c99382b839854a71ca9482c684e3472badc62620287cbbdab499b75ce6/scipy-1.16.1-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:5451606823a5e73dfa621a89948096c6528e2896e40b39248295d3a0138d594f", size = 36533717, upload-time = "2025-07-27T16:28:51.706Z" },
+    { url = "https://files.pythonhosted.org/packages/eb/e5/69ab2771062c91e23e07c12e7d5033a6b9b80b0903ee709c3c36b3eb520c/scipy-1.16.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:89728678c5ca5abd610aee148c199ac1afb16e19844401ca97d43dc548a354eb", size = 28570009, upload-time = "2025-07-27T16:28:57.017Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/69/bd75dbfdd3cf524f4d753484d723594aed62cfaac510123e91a6686d520b/scipy-1.16.1-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:e756d688cb03fd07de0fffad475649b03cb89bee696c98ce508b17c11a03f95c", size = 20841942, upload-time = "2025-07-27T16:29:01.152Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/74/add181c87663f178ba7d6144b370243a87af8476664d5435e57d599e6874/scipy-1.16.1-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:5aa2687b9935da3ed89c5dbed5234576589dd28d0bf7cd237501ccfbdf1ad608", size = 23498507, upload-time = "2025-07-27T16:29:05.202Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/74/ece2e582a0d9550cee33e2e416cc96737dce423a994d12bbe59716f47ff1/scipy-1.16.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0851f6a1e537fe9399f35986897e395a1aa61c574b178c0d456be5b1a0f5ca1f", size = 33286040, upload-time = "2025-07-27T16:29:10.201Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/82/08e4076df538fb56caa1d489588d880ec7c52d8273a606bb54d660528f7c/scipy-1.16.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fedc2cbd1baed37474b1924c331b97bdff611d762c196fac1a9b71e67b813b1b", size = 35176096, upload-time = "2025-07-27T16:29:17.091Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/79/cd710aab8c921375711a8321c6be696e705a120e3011a643efbbcdeeabcc/scipy-1.16.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:2ef500e72f9623a6735769e4b93e9dcb158d40752cdbb077f305487e3e2d1f45", size = 35490328, upload-time = "2025-07-27T16:29:22.928Z" },
+    { url = "https://files.pythonhosted.org/packages/71/73/e9cc3d35ee4526d784520d4494a3e1ca969b071fb5ae5910c036a375ceec/scipy-1.16.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:978d8311674b05a8f7ff2ea6c6bce5d8b45a0cb09d4c5793e0318f448613ea65", size = 37939921, upload-time = "2025-07-27T16:29:29.108Z" },
+    { url = "https://files.pythonhosted.org/packages/21/12/c0efd2941f01940119b5305c375ae5c0fcb7ec193f806bd8f158b73a1782/scipy-1.16.1-cp313-cp313-win_amd64.whl", hash = "sha256:81929ed0fa7a5713fcdd8b2e6f73697d3b4c4816d090dd34ff937c20fa90e8ab", size = 38479462, upload-time = "2025-07-27T16:30:24.078Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/19/c3d08b675260046a991040e1ea5d65f91f40c7df1045fffff412dcfc6765/scipy-1.16.1-cp313-cp313t-macosx_10_14_x86_64.whl", hash = "sha256:bcc12db731858abda693cecdb3bdc9e6d4bd200213f49d224fe22df82687bdd6", size = 36938832, upload-time = "2025-07-27T16:29:35.057Z" },
+    { url = "https://files.pythonhosted.org/packages/81/f2/ce53db652c033a414a5b34598dba6b95f3d38153a2417c5a3883da429029/scipy-1.16.1-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:744d977daa4becb9fc59135e75c069f8d301a87d64f88f1e602a9ecf51e77b27", size = 29093084, upload-time = "2025-07-27T16:29:40.201Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/ae/7a10ff04a7dc15f9057d05b33737ade244e4bd195caa3f7cc04d77b9e214/scipy-1.16.1-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:dc54f76ac18073bcecffb98d93f03ed6b81a92ef91b5d3b135dcc81d55a724c7", size = 21365098, upload-time = "2025-07-27T16:29:44.295Z" },
+    { url = "https://files.pythonhosted.org/packages/36/ac/029ff710959932ad3c2a98721b20b405f05f752f07344622fd61a47c5197/scipy-1.16.1-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:367d567ee9fc1e9e2047d31f39d9d6a7a04e0710c86e701e053f237d14a9b4f6", size = 23896858, upload-time = "2025-07-27T16:29:48.784Z" },
+    { url = "https://files.pythonhosted.org/packages/71/13/d1ef77b6bd7898720e1f0b6b3743cb945f6c3cafa7718eaac8841035ab60/scipy-1.16.1-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4cf5785e44e19dcd32a0e4807555e1e9a9b8d475c6afff3d21c3c543a6aa84f4", size = 33438311, upload-time = "2025-07-27T16:29:54.164Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/e0/e64a6821ffbb00b4c5b05169f1c1fddb4800e9307efe3db3788995a82a2c/scipy-1.16.1-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3d0b80fb26d3e13a794c71d4b837e2a589d839fd574a6bbb4ee1288c213ad4a3", size = 35279542, upload-time = "2025-07-27T16:30:00.249Z" },
+    { url = "https://files.pythonhosted.org/packages/57/59/0dc3c8b43e118f1e4ee2b798dcc96ac21bb20014e5f1f7a8e85cc0653bdb/scipy-1.16.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:8503517c44c18d1030d666cb70aaac1cc8913608816e06742498833b128488b7", size = 35667665, upload-time = "2025-07-27T16:30:05.916Z" },
+    { url = "https://files.pythonhosted.org/packages/45/5f/844ee26e34e2f3f9f8febb9343748e72daeaec64fe0c70e9bf1ff84ec955/scipy-1.16.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:30cc4bb81c41831ecfd6dc450baf48ffd80ef5aed0f5cf3ea775740e80f16ecc", size = 38045210, upload-time = "2025-07-27T16:30:11.655Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/d7/210f2b45290f444f1de64bc7353aa598ece9f0e90c384b4a156f9b1a5063/scipy-1.16.1-cp313-cp313t-win_amd64.whl", hash = "sha256:c24fa02f7ed23ae514460a22c57eca8f530dbfa50b1cfdbf4f37c05b5309cc39", size = 38593661, upload-time = "2025-07-27T16:30:17.825Z" },
+    { url = "https://files.pythonhosted.org/packages/81/ea/84d481a5237ed223bd3d32d6e82d7a6a96e34756492666c260cef16011d1/scipy-1.16.1-cp314-cp314-macosx_10_14_x86_64.whl", hash = "sha256:796a5a9ad36fa3a782375db8f4241ab02a091308eb079746bc0f874c9b998318", size = 36525921, upload-time = "2025-07-27T16:30:30.081Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/9f/d9edbdeff9f3a664807ae3aea383e10afaa247e8e6255e6d2aa4515e8863/scipy-1.16.1-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:3ea0733a2ff73fd6fdc5fecca54ee9b459f4d74f00b99aced7d9a3adb43fb1cc", size = 28564152, upload-time = "2025-07-27T16:30:35.336Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/95/8125bcb1fe04bc267d103e76516243e8d5e11229e6b306bda1024a5423d1/scipy-1.16.1-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:85764fb15a2ad994e708258bb4ed8290d1305c62a4e1ef07c414356a24fcfbf8", size = 20836028, upload-time = "2025-07-27T16:30:39.421Z" },
+    { url = "https://files.pythonhosted.org/packages/77/9c/bf92e215701fc70bbcd3d14d86337cf56a9b912a804b9c776a269524a9e9/scipy-1.16.1-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:ca66d980469cb623b1759bdd6e9fd97d4e33a9fad5b33771ced24d0cb24df67e", size = 23489666, upload-time = "2025-07-27T16:30:43.663Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/00/5e941d397d9adac41b02839011594620d54d99488d1be5be755c00cde9ee/scipy-1.16.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e7cc1ffcc230f568549fc56670bcf3df1884c30bd652c5da8138199c8c76dae0", size = 33358318, upload-time = "2025-07-27T16:30:48.982Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/87/8db3aa10dde6e3e8e7eb0133f24baa011377d543f5b19c71469cf2648026/scipy-1.16.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3ddfb1e8d0b540cb4ee9c53fc3dea3186f97711248fb94b4142a1b27178d8b4b", size = 35185724, upload-time = "2025-07-27T16:30:54.26Z" },
+    { url = "https://files.pythonhosted.org/packages/89/b4/6ab9ae443216807622bcff02690262d8184078ea467efee2f8c93288a3b1/scipy-1.16.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:4dc0e7be79e95d8ba3435d193e0d8ce372f47f774cffd882f88ea4e1e1ddc731", size = 35554335, upload-time = "2025-07-27T16:30:59.765Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/9a/d0e9dc03c5269a1afb60661118296a32ed5d2c24298af61b676c11e05e56/scipy-1.16.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:f23634f9e5adb51b2a77766dac217063e764337fbc816aa8ad9aaebcd4397fd3", size = 37960310, upload-time = "2025-07-27T16:31:06.151Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/00/c8f3130a50521a7977874817ca89e0599b1b4ee8e938bad8ae798a0e1f0d/scipy-1.16.1-cp314-cp314-win_amd64.whl", hash = "sha256:57d75524cb1c5a374958a2eae3d84e1929bb971204cc9d52213fb8589183fc19", size = 39319239, upload-time = "2025-07-27T16:31:59.942Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/f2/1ca3eda54c3a7e4c92f6acef7db7b3a057deb135540d23aa6343ef8ad333/scipy-1.16.1-cp314-cp314t-macosx_10_14_x86_64.whl", hash = "sha256:d8da7c3dd67bcd93f15618938f43ed0995982eb38973023d46d4646c4283ad65", size = 36939460, upload-time = "2025-07-27T16:31:11.865Z" },
+    { url = "https://files.pythonhosted.org/packages/80/30/98c2840b293a132400c0940bb9e140171dcb8189588619048f42b2ce7b4f/scipy-1.16.1-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:cc1d2f2fd48ba1e0620554fe5bc44d3e8f5d4185c8c109c7fbdf5af2792cfad2", size = 29093322, upload-time = "2025-07-27T16:31:17.045Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/e6/1e6e006e850622cf2a039b62d1a6ddc4497d4851e58b68008526f04a9a00/scipy-1.16.1-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:21a611ced9275cb861bacadbada0b8c0623bc00b05b09eb97f23b370fc2ae56d", size = 21365329, upload-time = "2025-07-27T16:31:21.188Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/02/72a5aa5b820589dda9a25e329ca752842bfbbaf635e36bc7065a9b42216e/scipy-1.16.1-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:8dfbb25dffc4c3dd9371d8ab456ca81beeaf6f9e1c2119f179392f0dc1ab7695", size = 23897544, upload-time = "2025-07-27T16:31:25.408Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/dc/7122d806a6f9eb8a33532982234bed91f90272e990f414f2830cfe656e0b/scipy-1.16.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:f0ebb7204f063fad87fc0a0e4ff4a2ff40b2a226e4ba1b7e34bf4b79bf97cd86", size = 33442112, upload-time = "2025-07-27T16:31:30.62Z" },
+    { url = "https://files.pythonhosted.org/packages/24/39/e383af23564daa1021a5b3afbe0d8d6a68ec639b943661841f44ac92de85/scipy-1.16.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f1b9e5962656f2734c2b285a8745358ecb4e4efbadd00208c80a389227ec61ff", size = 35286594, upload-time = "2025-07-27T16:31:36.112Z" },
+    { url = "https://files.pythonhosted.org/packages/95/47/1a0b0aff40c3056d955f38b0df5d178350c3d74734ec54f9c68d23910be5/scipy-1.16.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:5e1a106f8c023d57a2a903e771228bf5c5b27b5d692088f457acacd3b54511e4", size = 35665080, upload-time = "2025-07-27T16:31:42.025Z" },
+    { url = "https://files.pythonhosted.org/packages/64/df/ce88803e9ed6e27fe9b9abefa157cf2c80e4fa527cf17ee14be41f790ad4/scipy-1.16.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:709559a1db68a9abc3b2c8672c4badf1614f3b440b3ab326d86a5c0491eafae3", size = 38050306, upload-time = "2025-07-27T16:31:48.109Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/6c/a76329897a7cae4937d403e623aa6aaea616a0bb5b36588f0b9d1c9a3739/scipy-1.16.1-cp314-cp314t-win_amd64.whl", hash = "sha256:c0c804d60492a0aad7f5b2bb1862f4548b990049e27e828391ff2bf6f7199998", size = 39427705, upload-time = "2025-07-27T16:31:53.96Z" },
+]
+
+[[package]]
+name = "semantic-version"
+version = "2.10.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/7d/31/f2289ce78b9b473d582568c234e104d2a342fd658cc288a7553d83bb8595/semantic_version-2.10.0.tar.gz", hash = "sha256:bdabb6d336998cbb378d4b9db3a4b56a1e3235701dc05ea2690d9a997ed5041c", size = 52289, upload-time = "2022-05-26T13:35:23.454Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/6a/23/8146aad7d88f4fcb3a6218f41a60f6c2d4e3a72de72da1825dc7c8f7877c/semantic_version-2.10.0-py2.py3-none-any.whl", hash = "sha256:de78a3b8e0feda74cabc54aab2da702113e33ac9d9eb9d2389bcf1f58b7d9177", size = 15552, upload-time = "2022-05-26T13:35:21.206Z" },
 ]
 
 [[package]]
 name = "sentencepiece"
-version = "0.2.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/c9/d2/b9c7ca067c26d8ff085d252c89b5f69609ca93fb85a00ede95f4857865d4/sentencepiece-0.2.0.tar.gz", hash = "sha256:a52c19171daaf2e697dc6cbe67684e0fa341b1248966f6aebb541de654d15843", size = 2632106, upload-time = "2024-02-19T17:06:47.428Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/27/5a/141b227ed54293360a9ffbb7bf8252b4e5efc0400cdeac5809340e5d2b21/sentencepiece-0.2.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:ea5f536e32ea8ec96086ee00d7a4a131ce583a1b18d130711707c10e69601cb2", size = 2409370, upload-time = "2024-02-19T17:06:29.315Z" },
-    { url = "https://files.pythonhosted.org/packages/2e/08/a4c135ad6fc2ce26798d14ab72790d66e813efc9589fd30a5316a88ca8d5/sentencepiece-0.2.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d0cb51f53b6aae3c36bafe41e86167c71af8370a039f542c43b0cce5ef24a68c", size = 1239288, upload-time = "2024-02-19T17:06:31.674Z" },
-    { url = "https://files.pythonhosted.org/packages/49/0a/2fe387f825ac5aad5a0bfe221904882106cac58e1b693ba7818785a882b6/sentencepiece-0.2.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3212121805afc58d8b00ab4e7dd1f8f76c203ddb9dc94aa4079618a31cf5da0f", size = 1181597, upload-time = "2024-02-19T17:06:33.763Z" },
-    { url = "https://files.pythonhosted.org/packages/cc/38/e4698ee2293fe4835dc033c49796a39b3eebd8752098f6bd0aa53a14af1f/sentencepiece-0.2.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2a3149e3066c2a75e0d68a43eb632d7ae728c7925b517f4c05c40f6f7280ce08", size = 1259220, upload-time = "2024-02-19T17:06:35.85Z" },
-    { url = "https://files.pythonhosted.org/packages/12/24/fd7ef967c9dad2f6e6e5386d0cadaf65cda8b7be6e3861a9ab3121035139/sentencepiece-0.2.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:632f3594d3e7ac8b367bca204cb3fd05a01d5b21455acd097ea4c0e30e2f63d7", size = 1355962, upload-time = "2024-02-19T17:06:38.616Z" },
-    { url = "https://files.pythonhosted.org/packages/4f/d2/18246f43ca730bb81918f87b7e886531eda32d835811ad9f4657c54eee35/sentencepiece-0.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f295105c6bdbb05bd5e1b0cafbd78ff95036f5d3641e7949455a3f4e5e7c3109", size = 1301706, upload-time = "2024-02-19T17:06:40.712Z" },
-    { url = "https://files.pythonhosted.org/packages/8a/47/ca237b562f420044ab56ddb4c278672f7e8c866e183730a20e413b38a989/sentencepiece-0.2.0-cp312-cp312-win32.whl", hash = "sha256:fb89f811e5efd18bab141afc3fea3de141c3f69f3fe9e898f710ae7fe3aab251", size = 936941, upload-time = "2024-02-19T17:06:42.802Z" },
-    { url = "https://files.pythonhosted.org/packages/c6/97/d159c32642306ee2b70732077632895438867b3b6df282354bd550cf2a67/sentencepiece-0.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:7a673a72aab81fef5ebe755c6e0cc60087d1f3a4700835d40537183c1703a45f", size = 991994, upload-time = "2024-02-19T17:06:45.01Z" },
+version = "0.2.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/15/15/2e7a025fc62d764b151ae6d0f2a92f8081755ebe8d4a64099accc6f77ba6/sentencepiece-0.2.1.tar.gz", hash = "sha256:8138cec27c2f2282f4a34d9a016e3374cd40e5c6e9cb335063db66a0a3b71fad", size = 3228515, upload-time = "2025-08-12T07:00:51.718Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/4a/be/32ce495aa1d0e0c323dcb1ba87096037358edee539cac5baf8755a6bd396/sentencepiece-0.2.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:57cae326c8727de58c85977b175af132a7138d84c764635d7e71bbee7e774133", size = 1943152, upload-time = "2025-08-12T06:59:40.048Z" },
+    { url = "https://files.pythonhosted.org/packages/88/7e/ff23008899a58678e98c6ff592bf4d368eee5a71af96d0df6b38a039dd4f/sentencepiece-0.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:56dd39a3c4d6493db3cdca7e8cc68c6b633f0d4195495cbadfcf5af8a22d05a6", size = 1325651, upload-time = "2025-08-12T06:59:41.536Z" },
+    { url = "https://files.pythonhosted.org/packages/19/84/42eb3ce4796777a1b5d3699dfd4dca85113e68b637f194a6c8d786f16a04/sentencepiece-0.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d9381351182ff9888cc80e41c632e7e274b106f450de33d67a9e8f6043da6f76", size = 1253645, upload-time = "2025-08-12T06:59:42.903Z" },
+    { url = "https://files.pythonhosted.org/packages/89/fa/d3d5ebcba3cb9e6d3775a096251860c41a6bc53a1b9461151df83fe93255/sentencepiece-0.2.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:99f955df238021bf11f0fc37cdb54fd5e5b5f7fd30ecc3d93fb48b6815437167", size = 1316273, upload-time = "2025-08-12T06:59:44.476Z" },
+    { url = "https://files.pythonhosted.org/packages/04/88/14f2f4a2b922d8b39be45bf63d79e6cd3a9b2f248b2fcb98a69b12af12f5/sentencepiece-0.2.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0cdfecef430d985f1c2bcbfff3defd1d95dae876fbd0173376012d2d7d24044b", size = 1387881, upload-time = "2025-08-12T06:59:46.09Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/b8/903e5ccb77b4ef140605d5d71b4f9e0ad95d456d6184688073ed11712809/sentencepiece-0.2.1-cp312-cp312-win32.whl", hash = "sha256:a483fd29a34c3e34c39ac5556b0a90942bec253d260235729e50976f5dba1068", size = 999540, upload-time = "2025-08-12T06:59:48.023Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/81/92df5673c067148c2545b1bfe49adfd775bcc3a169a047f5a0e6575ddaca/sentencepiece-0.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:4cdc7c36234fda305e85c32949c5211faaf8dd886096c7cea289ddc12a2d02de", size = 1054671, upload-time = "2025-08-12T06:59:49.895Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/02/c5e3bc518655d714622bec87d83db9cdba1cd0619a4a04e2109751c4f47f/sentencepiece-0.2.1-cp312-cp312-win_arm64.whl", hash = "sha256:daeb5e9e9fcad012324807856113708614d534f596d5008638eb9b40112cd9e4", size = 1033923, upload-time = "2025-08-12T06:59:51.952Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/4a/85fbe1706d4d04a7e826b53f327c4b80f849cf1c7b7c5e31a20a97d8f28b/sentencepiece-0.2.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:dcd8161eee7b41aae57ded06272905dbd680a0a04b91edd0f64790c796b2f706", size = 1943150, upload-time = "2025-08-12T06:59:53.588Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/83/4cfb393e287509fc2155480b9d184706ef8d9fa8cbf5505d02a5792bf220/sentencepiece-0.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c6c8f42949f419ff8c7e9960dbadcfbc982d7b5efc2f6748210d3dd53a7de062", size = 1325651, upload-time = "2025-08-12T06:59:55.073Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/de/5a007fb53b1ab0aafc69d11a5a3dd72a289d5a3e78dcf2c3a3d9b14ffe93/sentencepiece-0.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:097f3394e99456e9e4efba1737c3749d7e23563dd1588ce71a3d007f25475fff", size = 1253641, upload-time = "2025-08-12T06:59:56.562Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/d2/f552be5928105588f4f4d66ee37dd4c61460d8097e62d0e2e0eec41bc61d/sentencepiece-0.2.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d7b670879c370d350557edabadbad1f6561a9e6968126e6debca4029e5547820", size = 1316271, upload-time = "2025-08-12T06:59:58.109Z" },
+    { url = "https://files.pythonhosted.org/packages/96/df/0cfe748ace5485be740fed9476dee7877f109da32ed0d280312c94ec259f/sentencepiece-0.2.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c7f0fd2f2693309e6628aeeb2e2faf6edd221134dfccac3308ca0de01f8dab47", size = 1387882, upload-time = "2025-08-12T07:00:00.701Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/dd/f7774d42a881ced8e1739f393ab1e82ece39fc9abd4779e28050c2e975b5/sentencepiece-0.2.1-cp313-cp313-win32.whl", hash = "sha256:92b3816aa2339355fda2c8c4e021a5de92180b00aaccaf5e2808972e77a4b22f", size = 999541, upload-time = "2025-08-12T07:00:02.709Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/e9/932b9eae6fd7019548321eee1ab8d5e3b3d1294df9d9a0c9ac517c7b636d/sentencepiece-0.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:10ed3dab2044c47f7a2e7b4969b0c430420cdd45735d78c8f853191fa0e3148b", size = 1054669, upload-time = "2025-08-12T07:00:04.915Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/3a/76488a00ea7d6931689cda28726a1447d66bf1a4837943489314593d5596/sentencepiece-0.2.1-cp313-cp313-win_arm64.whl", hash = "sha256:ac650534e2251083c5f75dde4ff28896ce7c8904133dc8fef42780f4d5588fcd", size = 1033922, upload-time = "2025-08-12T07:00:06.496Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/b6/08fe2ce819e02ccb0296f4843e3f195764ce9829cbda61b7513f29b95718/sentencepiece-0.2.1-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:8dd4b477a7b069648d19363aad0cab9bad2f4e83b2d179be668efa672500dc94", size = 1946052, upload-time = "2025-08-12T07:00:08.136Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/d9/1ea0e740591ff4c6fc2b6eb1d7510d02f3fb885093f19b2f3abd1363b402/sentencepiece-0.2.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0c0f672da370cc490e4c59d89e12289778310a0e71d176c541e4834759e1ae07", size = 1327408, upload-time = "2025-08-12T07:00:09.572Z" },
+    { url = "https://files.pythonhosted.org/packages/99/7e/1fb26e8a21613f6200e1ab88824d5d203714162cf2883248b517deb500b7/sentencepiece-0.2.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:ad8493bea8432dae8d6830365352350f3b4144415a1d09c4c8cb8d30cf3b6c3c", size = 1254857, upload-time = "2025-08-12T07:00:11.021Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/85/c72fd1f3c7a6010544d6ae07f8ddb38b5e2a7e33bd4318f87266c0bbafbf/sentencepiece-0.2.1-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b81a24733726e3678d2db63619acc5a8dccd074f7aa7a54ecd5ca33ca6d2d596", size = 1315722, upload-time = "2025-08-12T07:00:12.989Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/e8/661e5bd82a8aa641fd6c1020bd0e890ef73230a2b7215ddf9c8cd8e941c2/sentencepiece-0.2.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0a81799d0a68d618e89063fb423c3001a034c893069135ffe51fee439ae474d6", size = 1387452, upload-time = "2025-08-12T07:00:15.088Z" },
+    { url = "https://files.pythonhosted.org/packages/99/5e/ae66c361023a470afcbc1fbb8da722c72ea678a2fcd9a18f1a12598c7501/sentencepiece-0.2.1-cp313-cp313t-win32.whl", hash = "sha256:89a3ea015517c42c0341d0d962f3e6aaf2cf10d71b1932d475c44ba48d00aa2b", size = 1002501, upload-time = "2025-08-12T07:00:16.966Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/03/d332828c4ff764e16c1b56c2c8f9a33488bbe796b53fb6b9c4205ddbf167/sentencepiece-0.2.1-cp313-cp313t-win_amd64.whl", hash = "sha256:33f068c9382dc2e7c228eedfd8163b52baa86bb92f50d0488bf2b7da7032e484", size = 1057555, upload-time = "2025-08-12T07:00:18.573Z" },
+    { url = "https://files.pythonhosted.org/packages/88/14/5aee0bf0864df9bd82bd59e7711362908e4935e3f9cdc1f57246b5d5c9b9/sentencepiece-0.2.1-cp313-cp313t-win_arm64.whl", hash = "sha256:b3616ad246f360e52c85781e47682d31abfb6554c779e42b65333d4b5f44ecc0", size = 1036042, upload-time = "2025-08-12T07:00:20.209Z" },
+    { url = "https://files.pythonhosted.org/packages/24/9c/89eb8b2052f720a612478baf11c8227dcf1dc28cd4ea4c0c19506b5af2a2/sentencepiece-0.2.1-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:5d0350b686c320068702116276cfb26c066dc7e65cfef173980b11bb4d606719", size = 1943147, upload-time = "2025-08-12T07:00:21.809Z" },
+    { url = "https://files.pythonhosted.org/packages/82/0b/a1432bc87f97c2ace36386ca23e8bd3b91fb40581b5e6148d24b24186419/sentencepiece-0.2.1-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:c7f54a31cde6fa5cb030370566f68152a742f433f8d2be458463d06c208aef33", size = 1325624, upload-time = "2025-08-12T07:00:23.289Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/99/bbe054ebb5a5039457c590e0a4156ed073fb0fe9ce4f7523404dd5b37463/sentencepiece-0.2.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c83b85ab2d6576607f31df77ff86f28182be4a8de6d175d2c33ca609925f5da1", size = 1253670, upload-time = "2025-08-12T07:00:24.69Z" },
+    { url = "https://files.pythonhosted.org/packages/19/ad/d5c7075f701bd97971d7c2ac2904f227566f51ef0838dfbdfdccb58cd212/sentencepiece-0.2.1-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1855f57db07b51fb51ed6c9c452f570624d2b169b36f0f79ef71a6e6c618cd8b", size = 1316247, upload-time = "2025-08-12T07:00:26.435Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/03/35fbe5f3d9a7435eebd0b473e09584bd3cc354ce118b960445b060d33781/sentencepiece-0.2.1-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:01e6912125cb45d3792f530a4d38f8e21bf884d6b4d4ade1b2de5cf7a8d2a52b", size = 1387894, upload-time = "2025-08-12T07:00:28.339Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/aa/956ef729aafb6c8f9c443104c9636489093bb5c61d6b90fc27aa1a865574/sentencepiece-0.2.1-cp314-cp314-win32.whl", hash = "sha256:c415c9de1447e0a74ae3fdb2e52f967cb544113a3a5ce3a194df185cbc1f962f", size = 1096698, upload-time = "2025-08-12T07:00:29.764Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/cb/fe400d8836952cc535c81a0ce47dc6875160e5fedb71d2d9ff0e9894c2a6/sentencepiece-0.2.1-cp314-cp314-win_amd64.whl", hash = "sha256:881b2e44b14fc19feade3cbed314be37de639fc415375cefaa5bc81a4be137fd", size = 1155115, upload-time = "2025-08-12T07:00:32.865Z" },
+    { url = "https://files.pythonhosted.org/packages/32/89/047921cf70f36c7b6b6390876b2399b3633ab73b8d0cb857e5a964238941/sentencepiece-0.2.1-cp314-cp314-win_arm64.whl", hash = "sha256:2005242a16d2dc3ac5fe18aa7667549134d37854823df4c4db244752453b78a8", size = 1133890, upload-time = "2025-08-12T07:00:34.763Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/11/5b414b9fae6255b5fb1e22e2ed3dc3a72d3a694e5703910e640ac78346bb/sentencepiece-0.2.1-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:a19adcec27c524cb7069a1c741060add95f942d1cbf7ad0d104dffa0a7d28a2b", size = 1946081, upload-time = "2025-08-12T07:00:36.97Z" },
+    { url = "https://files.pythonhosted.org/packages/77/eb/7a5682bb25824db8545f8e5662e7f3e32d72a508fdce086029d89695106b/sentencepiece-0.2.1-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:e37e4b4c4a11662b5db521def4e44d4d30ae69a1743241412a93ae40fdcab4bb", size = 1327406, upload-time = "2025-08-12T07:00:38.669Z" },
+    { url = "https://files.pythonhosted.org/packages/03/b0/811dae8fb9f2784e138785d481469788f2e0d0c109c5737372454415f55f/sentencepiece-0.2.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:477c81505db072b3ab627e7eab972ea1025331bd3a92bacbf798df2b75ea86ec", size = 1254846, upload-time = "2025-08-12T07:00:40.611Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/23/195b2e7ec85ebb6a547969f60b723c7aca5a75800ece6cc3f41da872d14e/sentencepiece-0.2.1-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:010f025a544ef770bb395091d57cb94deb9652d8972e0d09f71d85d5a0816c8c", size = 1315721, upload-time = "2025-08-12T07:00:42.914Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/aa/553dbe4178b5f23eb28e59393dddd64186178b56b81d9b8d5c3ff1c28395/sentencepiece-0.2.1-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:733e59ff1794d26db706cd41fc2d7ca5f6c64a820709cb801dc0ea31780d64ab", size = 1387458, upload-time = "2025-08-12T07:00:44.56Z" },
+    { url = "https://files.pythonhosted.org/packages/66/7c/08ff0012507297a4dd74a5420fdc0eb9e3e80f4e88cab1538d7f28db303d/sentencepiece-0.2.1-cp314-cp314t-win32.whl", hash = "sha256:d3233770f78e637dc8b1fda2cd7c3b99ec77e7505041934188a4e7fe751de3b0", size = 1099765, upload-time = "2025-08-12T07:00:46.058Z" },
+    { url = "https://files.pythonhosted.org/packages/91/d5/2a69e1ce15881beb9ddfc7e3f998322f5cedcd5e4d244cb74dade9441663/sentencepiece-0.2.1-cp314-cp314t-win_amd64.whl", hash = "sha256:5e4366c97b68218fd30ea72d70c525e6e78a6c0a88650f57ac4c43c63b234a9d", size = 1157807, upload-time = "2025-08-12T07:00:47.673Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/16/54f611fcfc2d1c46cbe3ec4169780b2cfa7cf63708ef2b71611136db7513/sentencepiece-0.2.1-cp314-cp314t-win_arm64.whl", hash = "sha256:105e36e75cbac1292642045458e8da677b2342dcd33df503e640f0b457cb6751", size = 1136264, upload-time = "2025-08-12T07:00:49.485Z" },
 ]
 
 [[package]]
 name = "sentry-sdk"
-version = "2.31.0"
+version = "2.35.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "certifi" },
     { name = "urllib3" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/d0/45/c7ef7e12d8434fda8b61cdab432d8af64fb832480c93cdaf4bdcab7f5597/sentry_sdk-2.31.0.tar.gz", hash = "sha256:fed6d847f15105849cdf5dfdc64dcec356f936d41abb8c9d66adae45e60959ec", size = 334167, upload-time = "2025-06-24T16:36:26.066Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/31/83/055dc157b719651ef13db569bb8cf2103df11174478649735c1b2bf3f6bc/sentry_sdk-2.35.0.tar.gz", hash = "sha256:5ea58d352779ce45d17bc2fa71ec7185205295b83a9dbb5707273deb64720092", size = 343014, upload-time = "2025-08-14T17:11:20.223Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/7d/a2/9b6d8cc59f03251c583b3fec9d2f075dc09c0f6e030e0e0a3b223c6e64b2/sentry_sdk-2.31.0-py2.py3-none-any.whl", hash = "sha256:e953f5ab083e6599bab255b75d6829b33b3ddf9931a27ca00b4ab0081287e84f", size = 355638, upload-time = "2025-06-24T16:36:24.306Z" },
+    { url = "https://files.pythonhosted.org/packages/36/3d/742617a7c644deb0c1628dcf6bb2d2165ab7c6aab56fe5222758994007f8/sentry_sdk-2.35.0-py2.py3-none-any.whl", hash = "sha256:6e0c29b9a5d34de8575ffb04d289a987ff3053cf2c98ede445bea995e3830263", size = 363806, upload-time = "2025-08-14T17:11:18.29Z" },
 ]
 
 [[package]]
 name = "setproctitle"
-version = "1.3.6"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/9e/af/56efe21c53ac81ac87e000b15e60b3d8104224b4313b6eacac3597bd183d/setproctitle-1.3.6.tar.gz", hash = "sha256:c9f32b96c700bb384f33f7cf07954bb609d35dd82752cef57fb2ee0968409169", size = 26889, upload-time = "2025-04-29T13:35:00.184Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/8f/fb/99456fd94d4207c5f6c40746a048a33a52b4239cd7d9c8d4889e2210ec82/setproctitle-1.3.6-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:af44bb7a1af163806bbb679eb8432fa7b4fb6d83a5d403b541b675dcd3798638", size = 17399, upload-time = "2025-04-29T13:33:13.406Z" },
-    { url = "https://files.pythonhosted.org/packages/d5/48/9699191fe6062827683c43bfa9caac33a2c89f8781dd8c7253fa3dba85fd/setproctitle-1.3.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3cca16fd055316a48f0debfcbfb6af7cea715429fc31515ab3fcac05abd527d8", size = 11966, upload-time = "2025-04-29T13:33:14.976Z" },
-    { url = "https://files.pythonhosted.org/packages/33/03/b085d192b9ecb9c7ce6ad6ef30ecf4110b7f39430b58a56245569827fcf4/setproctitle-1.3.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ea002088d5554fd75e619742cefc78b84a212ba21632e59931b3501f0cfc8f67", size = 32017, upload-time = "2025-04-29T13:33:16.163Z" },
-    { url = "https://files.pythonhosted.org/packages/ae/68/c53162e645816f97212002111420d1b2f75bf6d02632e37e961dc2cd6d8b/setproctitle-1.3.6-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bb465dd5825356c1191a038a86ee1b8166e3562d6e8add95eec04ab484cfb8a2", size = 33419, upload-time = "2025-04-29T13:33:18.239Z" },
-    { url = "https://files.pythonhosted.org/packages/ac/0d/119a45d15a816a6cf5ccc61b19729f82620095b27a47e0a6838216a95fae/setproctitle-1.3.6-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d2c8e20487b3b73c1fa72c56f5c89430617296cd380373e7af3a538a82d4cd6d", size = 30711, upload-time = "2025-04-29T13:33:19.571Z" },
-    { url = "https://files.pythonhosted.org/packages/e3/fb/5e9b5068df9e9f31a722a775a5e8322a29a638eaaa3eac5ea7f0b35e6314/setproctitle-1.3.6-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a0d6252098e98129a1decb59b46920d4eca17b0395f3d71b0d327d086fefe77d", size = 31742, upload-time = "2025-04-29T13:33:21.172Z" },
-    { url = "https://files.pythonhosted.org/packages/35/88/54de1e73e8fce87d587889c7eedb48fc4ee2bbe4e4ca6331690d03024f86/setproctitle-1.3.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:cf355fbf0d4275d86f9f57be705d8e5eaa7f8ddb12b24ced2ea6cbd68fdb14dc", size = 31925, upload-time = "2025-04-29T13:33:22.427Z" },
-    { url = "https://files.pythonhosted.org/packages/f3/01/65948d7badd66e63e3db247b923143da142790fa293830fdecf832712c2d/setproctitle-1.3.6-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:e288f8a162d663916060beb5e8165a8551312b08efee9cf68302687471a6545d", size = 30981, upload-time = "2025-04-29T13:33:23.739Z" },
-    { url = "https://files.pythonhosted.org/packages/22/20/c495e61786f1d38d5dc340b9d9077fee9be3dfc7e89f515afe12e1526dbc/setproctitle-1.3.6-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:b2e54f4a2dc6edf0f5ea5b1d0a608d2af3dcb5aa8c8eeab9c8841b23e1b054fe", size = 33209, upload-time = "2025-04-29T13:33:24.915Z" },
-    { url = "https://files.pythonhosted.org/packages/98/3f/a457b8550fbd34d5b482fe20b8376b529e76bf1fbf9a474a6d9a641ab4ad/setproctitle-1.3.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b6f4abde9a2946f57e8daaf1160b2351bcf64274ef539e6675c1d945dbd75e2a", size = 31587, upload-time = "2025-04-29T13:33:26.123Z" },
-    { url = "https://files.pythonhosted.org/packages/44/fe/743517340e5a635e3f1c4310baea20c16c66202f96a6f4cead222ffd6d84/setproctitle-1.3.6-cp312-cp312-win32.whl", hash = "sha256:db608db98ccc21248370d30044a60843b3f0f3d34781ceeea67067c508cd5a28", size = 11487, upload-time = "2025-04-29T13:33:27.403Z" },
-    { url = "https://files.pythonhosted.org/packages/60/9a/d88f1c1f0f4efff1bd29d9233583ee341114dda7d9613941453984849674/setproctitle-1.3.6-cp312-cp312-win_amd64.whl", hash = "sha256:082413db8a96b1f021088e8ec23f0a61fec352e649aba20881895815388b66d3", size = 12208, upload-time = "2025-04-29T13:33:28.852Z" },
-    { url = "https://files.pythonhosted.org/packages/89/76/f1a2fdbf9b9602945a7489ba5c52e9863de37381ef1a85a2b9ed0ff8bc79/setproctitle-1.3.6-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e2a9e62647dc040a76d55563580bf3bb8fe1f5b6ead08447c2ed0d7786e5e794", size = 17392, upload-time = "2025-04-29T13:33:30.925Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/5b/4e0db8b10b4543afcb3dbc0827793d46e43ec1de6b377e313af3703d08e0/setproctitle-1.3.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:751ba352ed922e0af60458e961167fa7b732ac31c0ddd1476a2dfd30ab5958c5", size = 11951, upload-time = "2025-04-29T13:33:32.296Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/fe/d5d00aaa700fe1f6160b6e95c225b29c01f4d9292176d48fd968815163ea/setproctitle-1.3.6-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7890e291bf4708e3b61db9069ea39b3ab0651e42923a5e1f4d78a7b9e4b18301", size = 32087, upload-time = "2025-04-29T13:33:33.469Z" },
-    { url = "https://files.pythonhosted.org/packages/9f/b3/894b827b93ef813c082479bebf88185860f01ac243df737823dd705e7fff/setproctitle-1.3.6-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b2b17855ed7f994f3f259cf2dfbfad78814538536fa1a91b50253d84d87fd88d", size = 33502, upload-time = "2025-04-29T13:33:34.831Z" },
-    { url = "https://files.pythonhosted.org/packages/b2/cd/5330734cca1a4cfcb721432c22cb7899ff15a4101ba868b2ef452ffafea1/setproctitle-1.3.6-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2e51ec673513465663008ce402171192a053564865c2fc6dc840620871a9bd7c", size = 30713, upload-time = "2025-04-29T13:33:36.739Z" },
-    { url = "https://files.pythonhosted.org/packages/fa/d3/c2590c5daa2e9a008d3f2b16c0f4a351826193be55f147cb32af49c6d814/setproctitle-1.3.6-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:63cc10352dc6cf35a33951656aa660d99f25f574eb78132ce41a85001a638aa7", size = 31792, upload-time = "2025-04-29T13:33:37.974Z" },
-    { url = "https://files.pythonhosted.org/packages/e6/b1/c553ed5af8cfcecd5ae7737e63af58a17a03d26f3d61868c7eb20bf7e3cf/setproctitle-1.3.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0dba8faee2e4a96e934797c9f0f2d093f8239bf210406a99060b3eabe549628e", size = 31927, upload-time = "2025-04-29T13:33:39.203Z" },
-    { url = "https://files.pythonhosted.org/packages/70/78/2d5385206540127a3dca0ff83225b1ac66873f5cc89d4a6d3806c92f5ae2/setproctitle-1.3.6-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:e3e44d08b61de0dd6f205528498f834a51a5c06689f8fb182fe26f3a3ce7dca9", size = 30981, upload-time = "2025-04-29T13:33:40.431Z" },
-    { url = "https://files.pythonhosted.org/packages/31/62/e3e4a4e006d0e549748e53cded4ff3b667be0602860fc61b7de8b412b667/setproctitle-1.3.6-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:de004939fc3fd0c1200d26ea9264350bfe501ffbf46c8cf5dc7f345f2d87a7f1", size = 33244, upload-time = "2025-04-29T13:33:41.817Z" },
-    { url = "https://files.pythonhosted.org/packages/aa/05/4b223fd4ef94e105dc7aff27fa502fb7200cf52be2bb0c064bd2406b5611/setproctitle-1.3.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:3f8194b4d631b003a1176a75d1acd545e04b1f54b821638e098a93e6e62830ef", size = 31630, upload-time = "2025-04-29T13:33:43.093Z" },
-    { url = "https://files.pythonhosted.org/packages/1b/ba/5f68eb969f7336f54b54a599fd3ffbd7662f9733b080bc8598705971b3dd/setproctitle-1.3.6-cp313-cp313-win32.whl", hash = "sha256:d714e002dd3638170fe7376dc1b686dbac9cb712cde3f7224440af722cc9866a", size = 11480, upload-time = "2025-04-29T13:34:01.257Z" },
-    { url = "https://files.pythonhosted.org/packages/ba/f5/7f47f0ca35c9c357f16187cee9229f3eda0237bc6fdd3061441336f361c0/setproctitle-1.3.6-cp313-cp313-win_amd64.whl", hash = "sha256:b70c07409d465f3a8b34d52f863871fb8a00755370791d2bd1d4f82b3cdaf3d5", size = 12198, upload-time = "2025-04-29T13:34:02.293Z" },
-    { url = "https://files.pythonhosted.org/packages/39/ad/c3941b8fc6b32a976c9e2d9615a90ae793b69cd010ca8c3575dbc822104f/setproctitle-1.3.6-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:23a57d3b8f1549515c2dbe4a2880ebc1f27780dc126c5e064167563e015817f5", size = 17401, upload-time = "2025-04-29T13:33:44.186Z" },
-    { url = "https://files.pythonhosted.org/packages/04/38/a184f857b988d3a9c401e470a4e38182a5c99ee77bf90432d7665e9d35a3/setproctitle-1.3.6-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:81c443310831e29fabbd07b75ebbfa29d0740b56f5907c6af218482d51260431", size = 11959, upload-time = "2025-04-29T13:33:45.71Z" },
-    { url = "https://files.pythonhosted.org/packages/b7/b9/4878ef9d8483adfd1edf6bf95151362aaec0d05aac306a97ff0383f491b5/setproctitle-1.3.6-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d88c63bd395c787b0aa81d8bbc22c1809f311032ce3e823a6517b711129818e4", size = 33463, upload-time = "2025-04-29T13:33:46.913Z" },
-    { url = "https://files.pythonhosted.org/packages/cc/60/3ef49d1931aff2a36a7324a49cca10d77ef03e0278452fd468c33a52d7e3/setproctitle-1.3.6-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d73f14b86d0e2858ece6bf5807c9889670e392c001d414b4293d0d9b291942c3", size = 34959, upload-time = "2025-04-29T13:33:48.216Z" },
-    { url = "https://files.pythonhosted.org/packages/81/c6/dee0a973acecefb0db6c9c2e0ea7f18b7e4db773a72e534741ebdee8bbb8/setproctitle-1.3.6-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3393859eb8f19f5804049a685bf286cb08d447e28ba5c6d8543c7bf5500d5970", size = 32055, upload-time = "2025-04-29T13:33:49.443Z" },
-    { url = "https://files.pythonhosted.org/packages/ea/a5/5dd5c4192cf18d16349a32a07f728a9a48a2a05178e16966cabd6645903e/setproctitle-1.3.6-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:785cd210c0311d9be28a70e281a914486d62bfd44ac926fcd70cf0b4d65dff1c", size = 32986, upload-time = "2025-04-29T13:33:51.519Z" },
-    { url = "https://files.pythonhosted.org/packages/df/a6/1508d37eb8008670d33f13fcdb91cbd8ef54697276469abbfdd3d4428c59/setproctitle-1.3.6-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:c051f46ed1e13ba8214b334cbf21902102807582fbfaf0fef341b9e52f0fafbf", size = 32736, upload-time = "2025-04-29T13:33:52.852Z" },
-    { url = "https://files.pythonhosted.org/packages/1a/73/c84ec8880d543766a12fcd6b65dbd013770974a40577889f357409b0441e/setproctitle-1.3.6-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:49498ebf68ca3e75321ffe634fcea5cc720502bfaa79bd6b03ded92ce0dc3c24", size = 31945, upload-time = "2025-04-29T13:33:54.665Z" },
-    { url = "https://files.pythonhosted.org/packages/95/0a/126b9ff7a406a69a62825fe5bd6d1ba8671919a7018c4f9e2c63f49bfcb6/setproctitle-1.3.6-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:4431629c178193f23c538cb1de3da285a99ccc86b20ee91d81eb5f1a80e0d2ba", size = 34333, upload-time = "2025-04-29T13:33:56.101Z" },
-    { url = "https://files.pythonhosted.org/packages/9a/fd/5474b04f1c013ff460129d2bc774557dd6e186da4667865efef9a83bf378/setproctitle-1.3.6-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:d136fbf8ad4321716e44d6d6b3d8dffb4872626010884e07a1db54b7450836cf", size = 32508, upload-time = "2025-04-29T13:33:57.43Z" },
-    { url = "https://files.pythonhosted.org/packages/32/21/2503e38520cb076a7ecaef6a35d6a6fa89cf02af3541c84c811fd7500d20/setproctitle-1.3.6-cp313-cp313t-win32.whl", hash = "sha256:d483cc23cc56ab32911ea0baa0d2d9ea7aa065987f47de847a0a93a58bf57905", size = 11482, upload-time = "2025-04-29T13:33:58.602Z" },
-    { url = "https://files.pythonhosted.org/packages/65/23/7833d75a27fba25ddc5cd3b54cd03c4bf8e18b8e2dbec622eb6326278ce8/setproctitle-1.3.6-cp313-cp313t-win_amd64.whl", hash = "sha256:74973aebea3543ad033b9103db30579ec2b950a466e09f9c2180089e8346e0ec", size = 12209, upload-time = "2025-04-29T13:33:59.727Z" },
+version = "1.3.7"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/8d/48/49393a96a2eef1ab418b17475fb92b8fcfad83d099e678751b05472e69de/setproctitle-1.3.7.tar.gz", hash = "sha256:bc2bc917691c1537d5b9bca1468437176809c7e11e5694ca79a9ca12345dcb9e", size = 27002, upload-time = "2025-09-05T12:51:25.278Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fb/f0/2dc88e842077719d7384d86cc47403e5102810492b33680e7dadcee64cd8/setproctitle-1.3.7-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:2dc99aec591ab6126e636b11035a70991bc1ab7a261da428491a40b84376654e", size = 18049, upload-time = "2025-09-05T12:49:36.241Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/b4/50940504466689cda65680c9e9a1e518e5750c10490639fa687489ac7013/setproctitle-1.3.7-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:cdd8aa571b7aa39840fdbea620e308a19691ff595c3a10231e9ee830339dd798", size = 13079, upload-time = "2025-09-05T12:49:38.088Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/99/71630546b9395b095f4082be41165d1078204d1696c2d9baade3de3202d0/setproctitle-1.3.7-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:2906b6c7959cdb75f46159bf0acd8cc9906cf1361c9e1ded0d065fe8f9039629", size = 32932, upload-time = "2025-09-05T12:49:39.271Z" },
+    { url = "https://files.pythonhosted.org/packages/50/22/cee06af4ffcfb0e8aba047bd44f5262e644199ae7527ae2c1f672b86495c/setproctitle-1.3.7-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6915964a6dda07920a1159321dcd6d94fc7fc526f815ca08a8063aeca3c204f1", size = 33736, upload-time = "2025-09-05T12:49:40.565Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/00/a5949a8bb06ef5e7df214fc393bb2fb6aedf0479b17214e57750dfdd0f24/setproctitle-1.3.7-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:cff72899861c765bd4021d1ff1c68d60edc129711a2fdba77f9cb69ef726a8b6", size = 35605, upload-time = "2025-09-05T12:49:42.362Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/3a/50caca532a9343828e3bf5778c7a84d6c737a249b1796d50dd680290594d/setproctitle-1.3.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b7cb05bd446687ff816a3aaaf831047fc4c364feff7ada94a66024f1367b448c", size = 33143, upload-time = "2025-09-05T12:49:43.515Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/14/b843a251296ce55e2e17c017d6b9f11ce0d3d070e9265de4ecad948b913d/setproctitle-1.3.7-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:3a57b9a00de8cae7e2a1f7b9f0c2ac7b69372159e16a7708aa2f38f9e5cc987a", size = 34434, upload-time = "2025-09-05T12:49:45.31Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/b7/06145c238c0a6d2c4bc881f8be230bb9f36d2bf51aff7bddcb796d5eed67/setproctitle-1.3.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:d8828b356114f6b308b04afe398ed93803d7fca4a955dd3abe84430e28d33739", size = 32795, upload-time = "2025-09-05T12:49:46.419Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/dc/ef76a81fac9bf27b84ed23df19c1f67391a753eed6e3c2254ebcb5133f56/setproctitle-1.3.7-cp312-cp312-win32.whl", hash = "sha256:b0304f905efc845829ac2bc791ddebb976db2885f6171f4a3de678d7ee3f7c9f", size = 12552, upload-time = "2025-09-05T12:49:47.635Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/5b/a9fe517912cd6e28cf43a212b80cb679ff179a91b623138a99796d7d18a0/setproctitle-1.3.7-cp312-cp312-win_amd64.whl", hash = "sha256:9888ceb4faea3116cf02a920ff00bfbc8cc899743e4b4ac914b03625bdc3c300", size = 13247, upload-time = "2025-09-05T12:49:49.16Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/2f/fcedcade3b307a391b6e17c774c6261a7166aed641aee00ed2aad96c63ce/setproctitle-1.3.7-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:c3736b2a423146b5e62230502e47e08e68282ff3b69bcfe08a322bee73407922", size = 18047, upload-time = "2025-09-05T12:49:50.271Z" },
+    { url = "https://files.pythonhosted.org/packages/23/ae/afc141ca9631350d0a80b8f287aac79a76f26b6af28fd8bf92dae70dc2c5/setproctitle-1.3.7-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3384e682b158d569e85a51cfbde2afd1ab57ecf93ea6651fe198d0ba451196ee", size = 13073, upload-time = "2025-09-05T12:49:51.46Z" },
+    { url = "https://files.pythonhosted.org/packages/87/ed/0a4f00315bc02510395b95eec3d4aa77c07192ee79f0baae77ea7b9603d8/setproctitle-1.3.7-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:0564a936ea687cd24dffcea35903e2a20962aa6ac20e61dd3a207652401492dd", size = 33284, upload-time = "2025-09-05T12:49:52.741Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/e4/adf3c4c0a2173cb7920dc9df710bcc67e9bcdbf377e243b7a962dc31a51a/setproctitle-1.3.7-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a5d1cb3f81531f0eb40e13246b679a1bdb58762b170303463cb06ecc296f26d0", size = 34104, upload-time = "2025-09-05T12:49:54.416Z" },
+    { url = "https://files.pythonhosted.org/packages/52/4f/6daf66394152756664257180439d37047aa9a1cfaa5e4f5ed35e93d1dc06/setproctitle-1.3.7-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a7d159e7345f343b44330cbba9194169b8590cb13dae940da47aa36a72aa9929", size = 35982, upload-time = "2025-09-05T12:49:56.295Z" },
+    { url = "https://files.pythonhosted.org/packages/1b/62/f2c0595403cf915db031f346b0e3b2c0096050e90e0be658a64f44f4278a/setproctitle-1.3.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0b5074649797fd07c72ca1f6bff0406f4a42e1194faac03ecaab765ce605866f", size = 33150, upload-time = "2025-09-05T12:49:58.025Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/29/10dd41cde849fb2f9b626c846b7ea30c99c81a18a5037a45cc4ba33c19a7/setproctitle-1.3.7-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:61e96febced3f61b766115381d97a21a6265a0f29188a791f6df7ed777aef698", size = 34463, upload-time = "2025-09-05T12:49:59.424Z" },
+    { url = "https://files.pythonhosted.org/packages/71/3c/cedd8eccfaf15fb73a2c20525b68c9477518917c9437737fa0fda91e378f/setproctitle-1.3.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:047138279f9463f06b858e579cc79580fbf7a04554d24e6bddf8fe5dddbe3d4c", size = 32848, upload-time = "2025-09-05T12:50:01.107Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/3e/0a0e27d1c9926fecccfd1f91796c244416c70bf6bca448d988638faea81d/setproctitle-1.3.7-cp313-cp313-win32.whl", hash = "sha256:7f47accafac7fe6535ba8ba9efd59df9d84a6214565108d0ebb1199119c9cbbd", size = 12544, upload-time = "2025-09-05T12:50:15.81Z" },
+    { url = "https://files.pythonhosted.org/packages/36/1b/6bf4cb7acbbd5c846ede1c3f4d6b4ee52744d402e43546826da065ff2ab7/setproctitle-1.3.7-cp313-cp313-win_amd64.whl", hash = "sha256:fe5ca35aeec6dc50cabab9bf2d12fbc9067eede7ff4fe92b8f5b99d92e21263f", size = 13235, upload-time = "2025-09-05T12:50:16.89Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/a4/d588d3497d4714750e3eaf269e9e8985449203d82b16b933c39bd3fc52a1/setproctitle-1.3.7-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:10e92915c4b3086b1586933a36faf4f92f903c5554f3c34102d18c7d3f5378e9", size = 18058, upload-time = "2025-09-05T12:50:02.501Z" },
+    { url = "https://files.pythonhosted.org/packages/05/77/7637f7682322a7244e07c373881c7e982567e2cb1dd2f31bd31481e45500/setproctitle-1.3.7-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:de879e9c2eab637f34b1a14c4da1e030c12658cdc69ee1b3e5be81b380163ce5", size = 13072, upload-time = "2025-09-05T12:50:03.601Z" },
+    { url = "https://files.pythonhosted.org/packages/52/09/f366eca0973cfbac1470068d1313fa3fe3de4a594683385204ec7f1c4101/setproctitle-1.3.7-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:c18246d88e227a5b16248687514f95642505000442165f4b7db354d39d0e4c29", size = 34490, upload-time = "2025-09-05T12:50:04.948Z" },
+    { url = "https://files.pythonhosted.org/packages/71/36/611fc2ed149fdea17c3677e1d0df30d8186eef9562acc248682b91312706/setproctitle-1.3.7-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7081f193dab22df2c36f9fc6d113f3793f83c27891af8fe30c64d89d9a37e152", size = 35267, upload-time = "2025-09-05T12:50:06.015Z" },
+    { url = "https://files.pythonhosted.org/packages/88/a4/64e77d0671446bd5a5554387b69e1efd915274686844bea733714c828813/setproctitle-1.3.7-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:9cc9b901ce129350637426a89cfd650066a4adc6899e47822e2478a74023ff7c", size = 37376, upload-time = "2025-09-05T12:50:07.484Z" },
+    { url = "https://files.pythonhosted.org/packages/89/bc/ad9c664fe524fb4a4b2d3663661a5c63453ce851736171e454fa2cdec35c/setproctitle-1.3.7-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:80e177eff2d1ec172188d0d7fd9694f8e43d3aab76a6f5f929bee7bf7894e98b", size = 33963, upload-time = "2025-09-05T12:50:09.056Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/01/a36de7caf2d90c4c28678da1466b47495cbbad43badb4e982d8db8167ed4/setproctitle-1.3.7-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:23e520776c445478a67ee71b2a3c1ffdafbe1f9f677239e03d7e2cc635954e18", size = 35550, upload-time = "2025-09-05T12:50:10.791Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/68/17e8aea0ed5ebc17fbf03ed2562bfab277c280e3625850c38d92a7b5fcd9/setproctitle-1.3.7-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:5fa1953126a3b9bd47049d58c51b9dac72e78ed120459bd3aceb1bacee72357c", size = 33727, upload-time = "2025-09-05T12:50:12.032Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/33/90a3bf43fe3a2242b4618aa799c672270250b5780667898f30663fd94993/setproctitle-1.3.7-cp313-cp313t-win32.whl", hash = "sha256:4a5e212bf438a4dbeece763f4962ad472c6008ff6702e230b4f16a037e2f6f29", size = 12549, upload-time = "2025-09-05T12:50:13.074Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/0e/50d1f07f3032e1f23d814ad6462bc0a138f369967c72494286b8a5228e40/setproctitle-1.3.7-cp313-cp313t-win_amd64.whl", hash = "sha256:cf2727b733e90b4f874bac53e3092aa0413fe1ea6d4f153f01207e6ce65034d9", size = 13243, upload-time = "2025-09-05T12:50:14.146Z" },
+    { url = "https://files.pythonhosted.org/packages/89/c7/43ac3a98414f91d1b86a276bc2f799ad0b4b010e08497a95750d5bc42803/setproctitle-1.3.7-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:80c36c6a87ff72eabf621d0c79b66f3bdd0ecc79e873c1e9f0651ee8bf215c63", size = 18052, upload-time = "2025-09-05T12:50:17.928Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/2c/dc258600a25e1a1f04948073826bebc55e18dbd99dc65a576277a82146fa/setproctitle-1.3.7-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:b53602371a52b91c80aaf578b5ada29d311d12b8a69c0c17fbc35b76a1fd4f2e", size = 13071, upload-time = "2025-09-05T12:50:19.061Z" },
+    { url = "https://files.pythonhosted.org/packages/ab/26/8e3bb082992f19823d831f3d62a89409deb6092e72fc6940962983ffc94f/setproctitle-1.3.7-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:fcb966a6c57cf07cc9448321a08f3be6b11b7635be502669bc1d8745115d7e7f", size = 33180, upload-time = "2025-09-05T12:50:20.395Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/af/ae692a20276d1159dd0cf77b0bcf92cbb954b965655eb4a69672099bb214/setproctitle-1.3.7-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:46178672599b940368d769474fe13ecef1b587d58bb438ea72b9987f74c56ea5", size = 34043, upload-time = "2025-09-05T12:50:22.454Z" },
+    { url = "https://files.pythonhosted.org/packages/34/b2/6a092076324dd4dac1a6d38482bedebbff5cf34ef29f58585ec76e47bc9d/setproctitle-1.3.7-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7f9e9e3ff135cbcc3edd2f4cf29b139f4aca040d931573102742db70ff428c17", size = 35892, upload-time = "2025-09-05T12:50:23.937Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/1a/8836b9f28cee32859ac36c3df85aa03e1ff4598d23ea17ca2e96b5845a8f/setproctitle-1.3.7-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:14c7eba8d90c93b0e79c01f0bd92a37b61983c27d6d7d5a3b5defd599113d60e", size = 32898, upload-time = "2025-09-05T12:50:25.617Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/22/8fabdc24baf42defb599714799d8445fe3ae987ec425a26ec8e80ea38f8e/setproctitle-1.3.7-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:9e64e98077fb30b6cf98073d6c439cd91deb8ebbf8fc62d9dbf52bd38b0c6ac0", size = 34308, upload-time = "2025-09-05T12:50:26.827Z" },
+    { url = "https://files.pythonhosted.org/packages/15/1b/b9bee9de6c8cdcb3b3a6cb0b3e773afdb86bbbc1665a3bfa424a4294fda2/setproctitle-1.3.7-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:b91387cc0f02a00ac95dcd93f066242d3cca10ff9e6153de7ee07069c6f0f7c8", size = 32536, upload-time = "2025-09-05T12:50:28.5Z" },
+    { url = "https://files.pythonhosted.org/packages/37/0c/75e5f2685a5e3eda0b39a8b158d6d8895d6daf3ba86dec9e3ba021510272/setproctitle-1.3.7-cp314-cp314-win32.whl", hash = "sha256:52b054a61c99d1b72fba58b7f5486e04b20fefc6961cd76722b424c187f362ed", size = 12731, upload-time = "2025-09-05T12:50:43.955Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/ae/acddbce90d1361e1786e1fb421bc25baeb0c22ef244ee5d0176511769ec8/setproctitle-1.3.7-cp314-cp314-win_amd64.whl", hash = "sha256:5818e4080ac04da1851b3ec71e8a0f64e3748bf9849045180566d8b736702416", size = 13464, upload-time = "2025-09-05T12:50:45.057Z" },
+    { url = "https://files.pythonhosted.org/packages/01/6d/20886c8ff2e6d85e3cabadab6aab9bb90acaf1a5cfcb04d633f8d61b2626/setproctitle-1.3.7-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:6fc87caf9e323ac426910306c3e5d3205cd9f8dcac06d233fcafe9337f0928a3", size = 18062, upload-time = "2025-09-05T12:50:29.78Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/60/26dfc5f198715f1343b95c2f7a1c16ae9ffa45bd89ffd45a60ed258d24ea/setproctitle-1.3.7-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:6134c63853d87a4897ba7d5cc0e16abfa687f6c66fc09f262bb70d67718f2309", size = 13075, upload-time = "2025-09-05T12:50:31.604Z" },
+    { url = "https://files.pythonhosted.org/packages/21/9c/980b01f50d51345dd513047e3ba9e96468134b9181319093e61db1c47188/setproctitle-1.3.7-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:1403d2abfd32790b6369916e2313dffbe87d6b11dca5bbd898981bcde48e7a2b", size = 34744, upload-time = "2025-09-05T12:50:32.777Z" },
+    { url = "https://files.pythonhosted.org/packages/86/b4/82cd0c86e6d1c4538e1a7eb908c7517721513b801dff4ba3f98ef816a240/setproctitle-1.3.7-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e7c5bfe4228ea22373e3025965d1a4116097e555ee3436044f5c954a5e63ac45", size = 35589, upload-time = "2025-09-05T12:50:34.13Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/4f/9f6b2a7417fd45673037554021c888b31247f7594ff4bd2239918c5cd6d0/setproctitle-1.3.7-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:585edf25e54e21a94ccb0fe81ad32b9196b69ebc4fc25f81da81fb8a50cca9e4", size = 37698, upload-time = "2025-09-05T12:50:35.524Z" },
+    { url = "https://files.pythonhosted.org/packages/20/92/927b7d4744aac214d149c892cb5fa6dc6f49cfa040cb2b0a844acd63dcaf/setproctitle-1.3.7-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:96c38cdeef9036eb2724c2210e8d0b93224e709af68c435d46a4733a3675fee1", size = 34201, upload-time = "2025-09-05T12:50:36.697Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/0c/fd4901db5ba4b9d9013e62f61d9c18d52290497f956745cd3e91b0d80f90/setproctitle-1.3.7-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:45e3ef48350abb49cf937d0a8ba15e42cee1e5ae13ca41a77c66d1abc27a5070", size = 35801, upload-time = "2025-09-05T12:50:38.314Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/e3/54b496ac724e60e61cc3447f02690105901ca6d90da0377dffe49ff99fc7/setproctitle-1.3.7-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:1fae595d032b30dab4d659bece20debd202229fce12b55abab978b7f30783d73", size = 33958, upload-time = "2025-09-05T12:50:39.841Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/a8/c84bb045ebf8c6fdc7f7532319e86f8380d14bbd3084e6348df56bdfe6fd/setproctitle-1.3.7-cp314-cp314t-win32.whl", hash = "sha256:02432f26f5d1329ab22279ff863c83589894977063f59e6c4b4845804a08f8c2", size = 12745, upload-time = "2025-09-05T12:50:41.377Z" },
+    { url = "https://files.pythonhosted.org/packages/08/b6/3a5a4f9952972791a9114ac01dfc123f0df79903577a3e0a7a404a695586/setproctitle-1.3.7-cp314-cp314t-win_amd64.whl", hash = "sha256:cbc388e3d86da1f766d8fc2e12682e446064c01cea9f88a88647cfe7c011de6a", size = 13469, upload-time = "2025-09-05T12:50:42.67Z" },
 ]
 
 [[package]]
@@ -4511,6 +5701,41 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" },
 ]
 
+[[package]]
+name = "simplejson"
+version = "3.20.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/af/92/51b417685abd96b31308b61b9acce7ec50d8e1de8fbc39a7fd4962c60689/simplejson-3.20.1.tar.gz", hash = "sha256:e64139b4ec4f1f24c142ff7dcafe55a22b811a74d86d66560c8815687143037d", size = 85591, upload-time = "2025-02-15T05:18:53.15Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/8d/eb/34c16a1ac9ba265d024dc977ad84e1659d931c0a700967c3e59a98ed7514/simplejson-3.20.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f31c4a3a7ab18467ee73a27f3e59158255d1520f3aad74315edde7a940f1be23", size = 93100, upload-time = "2025-02-15T05:16:38.801Z" },
+    { url = "https://files.pythonhosted.org/packages/41/fc/2c2c007d135894971e6814e7c0806936e5bade28f8db4dd7e2a58b50debd/simplejson-3.20.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:884e6183d16b725e113b83a6fc0230152ab6627d4d36cb05c89c2c5bccfa7bc6", size = 75464, upload-time = "2025-02-15T05:16:40.905Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/05/2b5ecb33b776c34bb5cace5de5d7669f9b60e3ca13c113037b2ca86edfbd/simplejson-3.20.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:03d7a426e416fe0d3337115f04164cd9427eb4256e843a6b8751cacf70abc832", size = 75112, upload-time = "2025-02-15T05:16:42.246Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/36/1f3609a2792f06cd4b71030485f78e91eb09cfd57bebf3116bf2980a8bac/simplejson-3.20.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:000602141d0bddfcff60ea6a6e97d5e10c9db6b17fd2d6c66199fa481b6214bb", size = 150182, upload-time = "2025-02-15T05:16:43.557Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/b0/053fbda38b8b602a77a4f7829def1b4f316cd8deb5440a6d3ee90790d2a4/simplejson-3.20.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:af8377a8af78226e82e3a4349efdde59ffa421ae88be67e18cef915e4023a595", size = 158363, upload-time = "2025-02-15T05:16:45.748Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/4b/2eb84ae867539a80822e92f9be4a7200dffba609275faf99b24141839110/simplejson-3.20.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:15c7de4c88ab2fbcb8781a3b982ef883696736134e20b1210bca43fb42ff1acf", size = 148415, upload-time = "2025-02-15T05:16:47.861Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/bd/400b0bd372a5666addf2540c7358bfc3841b9ce5cdbc5cc4ad2f61627ad8/simplejson-3.20.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:455a882ff3f97d810709f7b620007d4e0aca8da71d06fc5c18ba11daf1c4df49", size = 152213, upload-time = "2025-02-15T05:16:49.25Z" },
+    { url = "https://files.pythonhosted.org/packages/50/12/143f447bf6a827ee9472693768dc1a5eb96154f8feb140a88ce6973a3cfa/simplejson-3.20.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:fc0f523ce923e7f38eb67804bc80e0a028c76d7868500aa3f59225574b5d0453", size = 150048, upload-time = "2025-02-15T05:16:51.5Z" },
+    { url = "https://files.pythonhosted.org/packages/5e/ea/dd9b3e8e8ed710a66f24a22c16a907c9b539b6f5f45fd8586bd5c231444e/simplejson-3.20.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:76461ec929282dde4a08061071a47281ad939d0202dc4e63cdd135844e162fbc", size = 151668, upload-time = "2025-02-15T05:16:53Z" },
+    { url = "https://files.pythonhosted.org/packages/99/af/ee52a8045426a0c5b89d755a5a70cc821815ef3c333b56fbcad33c4435c0/simplejson-3.20.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:ab19c2da8c043607bde4d4ef3a6b633e668a7d2e3d56f40a476a74c5ea71949f", size = 158840, upload-time = "2025-02-15T05:16:54.851Z" },
+    { url = "https://files.pythonhosted.org/packages/68/db/ab32869acea6b5de7d75fa0dac07a112ded795d41eaa7e66c7813b17be95/simplejson-3.20.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b2578bedaedf6294415197b267d4ef678fea336dd78ee2a6d2f4b028e9d07be3", size = 154212, upload-time = "2025-02-15T05:16:56.318Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/7a/e3132d454977d75a3bf9a6d541d730f76462ebf42a96fea2621498166f41/simplejson-3.20.1-cp312-cp312-win32.whl", hash = "sha256:339f407373325a36b7fd744b688ba5bae0666b5d340ec6d98aebc3014bf3d8ea", size = 74101, upload-time = "2025-02-15T05:16:57.746Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/5d/4e243e937fa3560107c69f6f7c2eed8589163f5ed14324e864871daa2dd9/simplejson-3.20.1-cp312-cp312-win_amd64.whl", hash = "sha256:627d4486a1ea7edf1f66bb044ace1ce6b4c1698acd1b05353c97ba4864ea2e17", size = 75736, upload-time = "2025-02-15T05:16:59.017Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/03/0f453a27877cb5a5fff16a975925f4119102cc8552f52536b9a98ef0431e/simplejson-3.20.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:71e849e7ceb2178344998cbe5ade101f1b329460243c79c27fbfc51c0447a7c3", size = 93109, upload-time = "2025-02-15T05:17:00.377Z" },
+    { url = "https://files.pythonhosted.org/packages/74/1f/a729f4026850cabeaff23e134646c3f455e86925d2533463420635ae54de/simplejson-3.20.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b63fdbab29dc3868d6f009a59797cefaba315fd43cd32ddd998ee1da28e50e29", size = 75475, upload-time = "2025-02-15T05:17:02.544Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/14/50a2713fee8ff1f8d655b1a14f4a0f1c0c7246768a1b3b3d12964a4ed5aa/simplejson-3.20.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1190f9a3ce644fd50ec277ac4a98c0517f532cfebdcc4bd975c0979a9f05e1fb", size = 75112, upload-time = "2025-02-15T05:17:03.875Z" },
+    { url = "https://files.pythonhosted.org/packages/45/86/ea9835abb646755140e2d482edc9bc1e91997ed19a59fd77ae4c6a0facea/simplejson-3.20.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c1336ba7bcb722ad487cd265701ff0583c0bb6de638364ca947bb84ecc0015d1", size = 150245, upload-time = "2025-02-15T05:17:06.899Z" },
+    { url = "https://files.pythonhosted.org/packages/12/b4/53084809faede45da829fe571c65fbda8479d2a5b9c633f46b74124d56f5/simplejson-3.20.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e975aac6a5acd8b510eba58d5591e10a03e3d16c1cf8a8624ca177491f7230f0", size = 158465, upload-time = "2025-02-15T05:17:08.707Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/7d/d56579468d1660b3841e1f21c14490d103e33cf911886b22652d6e9683ec/simplejson-3.20.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6a6dd11ee282937ad749da6f3b8d87952ad585b26e5edfa10da3ae2536c73078", size = 148514, upload-time = "2025-02-15T05:17:11.323Z" },
+    { url = "https://files.pythonhosted.org/packages/19/e3/874b1cca3d3897b486d3afdccc475eb3a09815bf1015b01cf7fcb52a55f0/simplejson-3.20.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ab980fcc446ab87ea0879edad41a5c28f2d86020014eb035cf5161e8de4474c6", size = 152262, upload-time = "2025-02-15T05:17:13.543Z" },
+    { url = "https://files.pythonhosted.org/packages/32/84/f0fdb3625292d945c2bd13a814584603aebdb38cfbe5fe9be6b46fe598c4/simplejson-3.20.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f5aee2a4cb6b146bd17333ac623610f069f34e8f31d2f4f0c1a2186e50c594f0", size = 150164, upload-time = "2025-02-15T05:17:15.021Z" },
+    { url = "https://files.pythonhosted.org/packages/95/51/6d625247224f01eaaeabace9aec75ac5603a42f8ebcce02c486fbda8b428/simplejson-3.20.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:652d8eecbb9a3b6461b21ec7cf11fd0acbab144e45e600c817ecf18e4580b99e", size = 151795, upload-time = "2025-02-15T05:17:16.542Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/d9/bb921df6b35be8412f519e58e86d1060fddf3ad401b783e4862e0a74c4c1/simplejson-3.20.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:8c09948f1a486a89251ee3a67c9f8c969b379f6ffff1a6064b41fea3bce0a112", size = 159027, upload-time = "2025-02-15T05:17:18.083Z" },
+    { url = "https://files.pythonhosted.org/packages/03/c5/5950605e4ad023a6621cf4c931b29fd3d2a9c1f36be937230bfc83d7271d/simplejson-3.20.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:cbbd7b215ad4fc6f058b5dd4c26ee5c59f72e031dfda3ac183d7968a99e4ca3a", size = 154380, upload-time = "2025-02-15T05:17:20.334Z" },
+    { url = "https://files.pythonhosted.org/packages/66/ad/b74149557c5ec1e4e4d55758bda426f5d2ec0123cd01a53ae63b8de51fa3/simplejson-3.20.1-cp313-cp313-win32.whl", hash = "sha256:ae81e482476eaa088ef9d0120ae5345de924f23962c0c1e20abbdff597631f87", size = 74102, upload-time = "2025-02-15T05:17:22.475Z" },
+    { url = "https://files.pythonhosted.org/packages/db/a9/25282fdd24493e1022f30b7f5cdf804255c007218b2bfaa655bd7ad34b2d/simplejson-3.20.1-cp313-cp313-win_amd64.whl", hash = "sha256:1b9fd15853b90aec3b1739f4471efbf1ac05066a2c7041bf8db821bb73cd2ddc", size = 75736, upload-time = "2025-02-15T05:17:24.122Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/30/00f02a0a921556dd5a6db1ef2926a1bc7a8bbbfb1c49cfed68a275b8ab2b/simplejson-3.20.1-py3-none-any.whl", hash = "sha256:8a6c1bbac39fa4a79f83cbf1df6ccd8ff7069582a9fd8db1e52cea073bc2c697", size = 57121, upload-time = "2025-02-15T05:18:51.243Z" },
+]
+
 [[package]]
 name = "six"
 version = "1.17.0"
@@ -4522,14 +5747,14 @@ wheels = [
 
 [[package]]
 name = "smart-open"
-version = "7.1.0"
+version = "7.3.0.post1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "wrapt" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/21/30/1f41c3d3b8cec82024b4b277bfd4e5b18b765ae7279eb9871fa25c503778/smart_open-7.1.0.tar.gz", hash = "sha256:a4f09f84f0f6d3637c6543aca7b5487438877a21360e7368ccf1f704789752ba", size = 72044, upload-time = "2024-12-17T13:19:17.71Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/18/2b/5e7234c68ed5bc872ad6ae77b8a421c2ed70dcb1190b44dc1abdeed5e347/smart_open-7.3.0.post1.tar.gz", hash = "sha256:ce6a3d9bc1afbf6234ad13c010b77f8cd36d24636811e3c52c3b5160f5214d1e", size = 51557, upload-time = "2025-07-03T10:06:31.271Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/7a/18/9a8d9f01957aa1f8bbc5676d54c2e33102d247e146c1a3679d3bd5cc2e3a/smart_open-7.1.0-py3-none-any.whl", hash = "sha256:4b8489bb6058196258bafe901730c7db0dcf4f083f316e97269c66f45502055b", size = 61746, upload-time = "2024-12-17T13:19:21.076Z" },
+    { url = "https://files.pythonhosted.org/packages/08/5b/a2a3d4514c64818925f4e886d39981f1926eeb5288a4549c6b3c17ed66bb/smart_open-7.3.0.post1-py3-none-any.whl", hash = "sha256:c73661a2c24bf045c1e04e08fffc585b59af023fe783d57896f590489db66fb4", size = 61946, upload-time = "2025-07-03T10:06:29.599Z" },
 ]
 
 [[package]]
@@ -4559,6 +5784,25 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/c8/78/3565d011c61f5a43488987ee32b6f3f656e7f107ac2782dd57bdd7d91d9a/snowballstemmer-3.0.1-py3-none-any.whl", hash = "sha256:6cd7b3897da8d6c9ffb968a6781fa6532dce9c3618a4b127d920dab764a19064", size = 103274, upload-time = "2025-05-09T16:34:50.371Z" },
 ]
 
+[[package]]
+name = "soundfile"
+version = "0.13.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cffi" },
+    { name = "numpy" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/e1/41/9b873a8c055582859b239be17902a85339bec6a30ad162f98c9b0288a2cc/soundfile-0.13.1.tar.gz", hash = "sha256:b2c68dab1e30297317080a5b43df57e302584c49e2942defdde0acccc53f0e5b", size = 46156, upload-time = "2025-01-25T09:17:04.831Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/64/28/e2a36573ccbcf3d57c00626a21fe51989380636e821b341d36ccca0c1c3a/soundfile-0.13.1-py2.py3-none-any.whl", hash = "sha256:a23c717560da2cf4c7b5ae1142514e0fd82d6bbd9dfc93a50423447142f2c445", size = 25751, upload-time = "2025-01-25T09:16:44.235Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/ab/73e97a5b3cc46bba7ff8650a1504348fa1863a6f9d57d7001c6b67c5f20e/soundfile-0.13.1-py2.py3-none-macosx_10_9_x86_64.whl", hash = "sha256:82dc664d19831933fe59adad199bf3945ad06d84bc111a5b4c0d3089a5b9ec33", size = 1142250, upload-time = "2025-01-25T09:16:47.583Z" },
+    { url = "https://files.pythonhosted.org/packages/a0/e5/58fd1a8d7b26fc113af244f966ee3aecf03cb9293cb935daaddc1e455e18/soundfile-0.13.1-py2.py3-none-macosx_11_0_arm64.whl", hash = "sha256:743f12c12c4054921e15736c6be09ac26b3b3d603aef6fd69f9dde68748f2593", size = 1101406, upload-time = "2025-01-25T09:16:49.662Z" },
+    { url = "https://files.pythonhosted.org/packages/58/ae/c0e4a53d77cf6e9a04179535766b3321b0b9ced5f70522e4caf9329f0046/soundfile-0.13.1-py2.py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:9c9e855f5a4d06ce4213f31918653ab7de0c5a8d8107cd2427e44b42df547deb", size = 1235729, upload-time = "2025-01-25T09:16:53.018Z" },
+    { url = "https://files.pythonhosted.org/packages/57/5e/70bdd9579b35003a489fc850b5047beeda26328053ebadc1fb60f320f7db/soundfile-0.13.1-py2.py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:03267c4e493315294834a0870f31dbb3b28a95561b80b134f0bd3cf2d5f0e618", size = 1313646, upload-time = "2025-01-25T09:16:54.872Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/df/8c11dc4dfceda14e3003bb81a0d0edcaaf0796dd7b4f826ea3e532146bba/soundfile-0.13.1-py2.py3-none-win32.whl", hash = "sha256:c734564fab7c5ddf8e9be5bf70bab68042cd17e9c214c06e365e20d64f9a69d5", size = 899881, upload-time = "2025-01-25T09:16:56.663Z" },
+    { url = "https://files.pythonhosted.org/packages/14/e9/6b761de83277f2f02ded7e7ea6f07828ec78e4b229b80e4ca55dd205b9dc/soundfile-0.13.1-py2.py3-none-win_amd64.whl", hash = "sha256:1e70a05a0626524a69e9f0f4dd2ec174b4e9567f4d8b6c11d38b5c289be36ee9", size = 1019162, upload-time = "2025-01-25T09:16:59.573Z" },
+]
+
 [[package]]
 name = "soupsieve"
 version = "2.7"
@@ -4568,6 +5812,22 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/e7/9c/0e6afc12c269578be5c0c1c9f4b49a8d32770a080260c333ac04cc1c832d/soupsieve-2.7-py3-none-any.whl", hash = "sha256:6e60cc5c1ffaf1cebcc12e8188320b72071e922c2e897f737cadce79ad5d30c4", size = 36677, upload-time = "2025-04-20T18:50:07.196Z" },
 ]
 
+[[package]]
+name = "soxr"
+version = "0.5.0.post1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "numpy" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/02/c0/4429bf9b3be10e749149e286aa5c53775399ec62891c6b970456c6dca325/soxr-0.5.0.post1.tar.gz", hash = "sha256:7092b9f3e8a416044e1fa138c8172520757179763b85dc53aa9504f4813cff73", size = 170853, upload-time = "2024-08-31T03:43:33.058Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5d/e3/d422d279e51e6932e7b64f1170a4f61a7ee768e0f84c9233a5b62cd2c832/soxr-0.5.0.post1-cp312-abi3-macosx_10_14_x86_64.whl", hash = "sha256:fef509466c9c25f65eae0ce1e4b9ac9705d22c6038c914160ddaf459589c6e31", size = 199993, upload-time = "2024-08-31T03:43:17.24Z" },
+    { url = "https://files.pythonhosted.org/packages/20/f1/88adaca3c52e03bcb66b63d295df2e2d35bf355d19598c6ce84b20be7fca/soxr-0.5.0.post1-cp312-abi3-macosx_11_0_arm64.whl", hash = "sha256:4704ba6b13a3f1e41d12acf192878384c1c31f71ce606829c64abdf64a8d7d32", size = 156373, upload-time = "2024-08-31T03:43:18.633Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/38/bad15a9e615215c8219652ca554b601663ac3b7ac82a284aca53ec2ff48c/soxr-0.5.0.post1-cp312-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bd052a66471a7335b22a6208601a9d0df7b46b8d087dce4ff6e13eed6a33a2a1", size = 216564, upload-time = "2024-08-31T03:43:20.789Z" },
+    { url = "https://files.pythonhosted.org/packages/e1/1a/569ea0420a0c4801c2c8dd40d8d544989522f6014d51def689125f3f2935/soxr-0.5.0.post1-cp312-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a3f16810dd649ab1f433991d2a9661e9e6a116c2b4101039b53b3c3e90a094fc", size = 248455, upload-time = "2024-08-31T03:43:22.165Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/10/440f1ba3d4955e0dc740bbe4ce8968c254a3d644d013eb75eea729becdb8/soxr-0.5.0.post1-cp312-abi3-win_amd64.whl", hash = "sha256:b1be9fee90afb38546bdbd7bde714d1d9a8c5a45137f97478a83b65e7f3146f6", size = 164937, upload-time = "2024-08-31T03:43:23.671Z" },
+]
+
 [[package]]
 name = "sphinx"
 version = "8.2.3"
@@ -4638,6 +5898,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/9e/48/1ea60e74949eecb12cdd6ac43987f9fd331156388dcc2319b45e2ebb81bf/sphinx_copybutton-0.5.2-py3-none-any.whl", hash = "sha256:fb543fd386d917746c9a2c50360c7905b605726b9355cd26e9974857afeae06e", size = 13343, upload-time = "2023-04-14T08:10:20.844Z" },
 ]
 
+[[package]]
+name = "sphinx-design"
+version = "0.6.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "sphinx" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/2b/69/b34e0cb5336f09c6866d53b4a19d76c227cdec1bbc7ac4de63ca7d58c9c7/sphinx_design-0.6.1.tar.gz", hash = "sha256:b44eea3719386d04d765c1a8257caca2b3e6f8421d7b3a5e742c0fd45f84e632", size = 2193689, upload-time = "2024-08-02T13:48:44.277Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c6/43/65c0acbd8cc6f50195a3a1fc195c404988b15c67090e73c7a41a9f57d6bd/sphinx_design-0.6.1-py3-none-any.whl", hash = "sha256:b11f37db1a802a183d61b159d9a202314d4d2fe29c163437001324fe2f19549c", size = 2215338, upload-time = "2024-08-02T13:48:42.106Z" },
+]
+
 [[package]]
 name = "sphinxcontrib-applehelp"
 version = "2.0.0"
@@ -4674,6 +5946,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/c2/42/4c8646762ee83602e3fb3fbe774c2fac12f317deb0b5dbeeedd2d3ba4b77/sphinxcontrib_jsmath-1.0.1-py2.py3-none-any.whl", hash = "sha256:2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178", size = 5071, upload-time = "2019-01-21T16:10:14.333Z" },
 ]
 
+[[package]]
+name = "sphinxcontrib-mermaid"
+version = "1.0.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pyyaml" },
+    { name = "sphinx" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/97/69/bf039237ad260073e8c02f820b3e00dc34f3a2de20aff7861e6b19d2f8c5/sphinxcontrib_mermaid-1.0.0.tar.gz", hash = "sha256:2e8ab67d3e1e2816663f9347d026a8dee4a858acdd4ad32dd1c808893db88146", size = 15153, upload-time = "2024-10-12T16:33:03.863Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/cd/c8/784b9ac6ea08aa594c1a4becbd0dbe77186785362e31fd633b8c6ae0197a/sphinxcontrib_mermaid-1.0.0-py3-none-any.whl", hash = "sha256:60b72710ea02087f212028feb09711225fbc2e343a10d34822fe787510e1caa3", size = 9597, upload-time = "2024-10-12T16:33:02.303Z" },
+]
+
 [[package]]
 name = "sphinxcontrib-qthelp"
 version = "2.0.0"
@@ -4694,31 +5979,31 @@ wheels = [
 
 [[package]]
 name = "sqlalchemy"
-version = "2.0.41"
+version = "2.0.43"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "greenlet", marker = "(python_full_version < '3.14' and platform_machine == 'AMD64') or (python_full_version < '3.14' and platform_machine == 'WIN32') or (python_full_version < '3.14' and platform_machine == 'aarch64') or (python_full_version < '3.14' and platform_machine == 'amd64') or (python_full_version < '3.14' and platform_machine == 'ppc64le') or (python_full_version < '3.14' and platform_machine == 'win32') or (python_full_version < '3.14' and platform_machine == 'x86_64')" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/63/66/45b165c595ec89aa7dcc2c1cd222ab269bc753f1fc7a1e68f8481bd957bf/sqlalchemy-2.0.41.tar.gz", hash = "sha256:edba70118c4be3c2b1f90754d308d0b79c6fe2c0fdc52d8ddf603916f83f4db9", size = 9689424, upload-time = "2025-05-14T17:10:32.339Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/d7/bc/d59b5d97d27229b0e009bd9098cd81af71c2fa5549c580a0a67b9bed0496/sqlalchemy-2.0.43.tar.gz", hash = "sha256:788bfcef6787a7764169cfe9859fe425bf44559619e1d9f56f5bddf2ebf6f417", size = 9762949, upload-time = "2025-08-11T14:24:58.438Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/3e/2a/f1f4e068b371154740dd10fb81afb5240d5af4aa0087b88d8b308b5429c2/sqlalchemy-2.0.41-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:81f413674d85cfd0dfcd6512e10e0f33c19c21860342a4890c3a2b59479929f9", size = 2119645, upload-time = "2025-05-14T17:55:24.854Z" },
-    { url = "https://files.pythonhosted.org/packages/9b/e8/c664a7e73d36fbfc4730f8cf2bf930444ea87270f2825efbe17bf808b998/sqlalchemy-2.0.41-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:598d9ebc1e796431bbd068e41e4de4dc34312b7aa3292571bb3674a0cb415dd1", size = 2107399, upload-time = "2025-05-14T17:55:28.097Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/78/8a9cf6c5e7135540cb682128d091d6afa1b9e48bd049b0d691bf54114f70/sqlalchemy-2.0.41-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a104c5694dfd2d864a6f91b0956eb5d5883234119cb40010115fd45a16da5e70", size = 3293269, upload-time = "2025-05-14T17:50:38.227Z" },
-    { url = "https://files.pythonhosted.org/packages/3c/35/f74add3978c20de6323fb11cb5162702670cc7a9420033befb43d8d5b7a4/sqlalchemy-2.0.41-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6145afea51ff0af7f2564a05fa95eb46f542919e6523729663a5d285ecb3cf5e", size = 3303364, upload-time = "2025-05-14T17:51:49.829Z" },
-    { url = "https://files.pythonhosted.org/packages/6a/d4/c990f37f52c3f7748ebe98883e2a0f7d038108c2c5a82468d1ff3eec50b7/sqlalchemy-2.0.41-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b46fa6eae1cd1c20e6e6f44e19984d438b6b2d8616d21d783d150df714f44078", size = 3229072, upload-time = "2025-05-14T17:50:39.774Z" },
-    { url = "https://files.pythonhosted.org/packages/15/69/cab11fecc7eb64bc561011be2bd03d065b762d87add52a4ca0aca2e12904/sqlalchemy-2.0.41-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:41836fe661cc98abfae476e14ba1906220f92c4e528771a8a3ae6a151242d2ae", size = 3268074, upload-time = "2025-05-14T17:51:51.736Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/ca/0c19ec16858585d37767b167fc9602593f98998a68a798450558239fb04a/sqlalchemy-2.0.41-cp312-cp312-win32.whl", hash = "sha256:a8808d5cf866c781150d36a3c8eb3adccfa41a8105d031bf27e92c251e3969d6", size = 2084514, upload-time = "2025-05-14T17:55:49.915Z" },
-    { url = "https://files.pythonhosted.org/packages/7f/23/4c2833d78ff3010a4e17f984c734f52b531a8c9060a50429c9d4b0211be6/sqlalchemy-2.0.41-cp312-cp312-win_amd64.whl", hash = "sha256:5b14e97886199c1f52c14629c11d90c11fbb09e9334fa7bb5f6d068d9ced0ce0", size = 2111557, upload-time = "2025-05-14T17:55:51.349Z" },
-    { url = "https://files.pythonhosted.org/packages/d3/ad/2e1c6d4f235a97eeef52d0200d8ddda16f6c4dd70ae5ad88c46963440480/sqlalchemy-2.0.41-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4eeb195cdedaf17aab6b247894ff2734dcead6c08f748e617bfe05bd5a218443", size = 2115491, upload-time = "2025-05-14T17:55:31.177Z" },
-    { url = "https://files.pythonhosted.org/packages/cf/8d/be490e5db8400dacc89056f78a52d44b04fbf75e8439569d5b879623a53b/sqlalchemy-2.0.41-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d4ae769b9c1c7757e4ccce94b0641bc203bbdf43ba7a2413ab2523d8d047d8dc", size = 2102827, upload-time = "2025-05-14T17:55:34.921Z" },
-    { url = "https://files.pythonhosted.org/packages/a0/72/c97ad430f0b0e78efaf2791342e13ffeafcbb3c06242f01a3bb8fe44f65d/sqlalchemy-2.0.41-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a62448526dd9ed3e3beedc93df9bb6b55a436ed1474db31a2af13b313a70a7e1", size = 3225224, upload-time = "2025-05-14T17:50:41.418Z" },
-    { url = "https://files.pythonhosted.org/packages/5e/51/5ba9ea3246ea068630acf35a6ba0d181e99f1af1afd17e159eac7e8bc2b8/sqlalchemy-2.0.41-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc56c9788617b8964ad02e8fcfeed4001c1f8ba91a9e1f31483c0dffb207002a", size = 3230045, upload-time = "2025-05-14T17:51:54.722Z" },
-    { url = "https://files.pythonhosted.org/packages/78/2f/8c14443b2acea700c62f9b4a8bad9e49fc1b65cfb260edead71fd38e9f19/sqlalchemy-2.0.41-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c153265408d18de4cc5ded1941dcd8315894572cddd3c58df5d5b5705b3fa28d", size = 3159357, upload-time = "2025-05-14T17:50:43.483Z" },
-    { url = "https://files.pythonhosted.org/packages/fc/b2/43eacbf6ccc5276d76cea18cb7c3d73e294d6fb21f9ff8b4eef9b42bbfd5/sqlalchemy-2.0.41-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4f67766965996e63bb46cfbf2ce5355fc32d9dd3b8ad7e536a920ff9ee422e23", size = 3197511, upload-time = "2025-05-14T17:51:57.308Z" },
-    { url = "https://files.pythonhosted.org/packages/fa/2e/677c17c5d6a004c3c45334ab1dbe7b7deb834430b282b8a0f75ae220c8eb/sqlalchemy-2.0.41-cp313-cp313-win32.whl", hash = "sha256:bfc9064f6658a3d1cadeaa0ba07570b83ce6801a1314985bf98ec9b95d74e15f", size = 2082420, upload-time = "2025-05-14T17:55:52.69Z" },
-    { url = "https://files.pythonhosted.org/packages/e9/61/e8c1b9b6307c57157d328dd8b8348ddc4c47ffdf1279365a13b2b98b8049/sqlalchemy-2.0.41-cp313-cp313-win_amd64.whl", hash = "sha256:82ca366a844eb551daff9d2e6e7a9e5e76d2612c8564f58db6c19a726869c1df", size = 2108329, upload-time = "2025-05-14T17:55:54.495Z" },
-    { url = "https://files.pythonhosted.org/packages/1c/fc/9ba22f01b5cdacc8f5ed0d22304718d2c758fce3fd49a5372b886a86f37c/sqlalchemy-2.0.41-py3-none-any.whl", hash = "sha256:57df5dc6fdb5ed1a88a1ed2195fd31927e705cad62dedd86b46972752a80f576", size = 1911224, upload-time = "2025-05-14T17:39:42.154Z" },
+    { url = "https://files.pythonhosted.org/packages/61/db/20c78f1081446095450bdc6ee6cc10045fce67a8e003a5876b6eaafc5cc4/sqlalchemy-2.0.43-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:20d81fc2736509d7a2bd33292e489b056cbae543661bb7de7ce9f1c0cd6e7f24", size = 2134891, upload-time = "2025-08-11T15:51:13.019Z" },
+    { url = "https://files.pythonhosted.org/packages/45/0a/3d89034ae62b200b4396f0f95319f7d86e9945ee64d2343dcad857150fa2/sqlalchemy-2.0.43-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:25b9fc27650ff5a2c9d490c13c14906b918b0de1f8fcbb4c992712d8caf40e83", size = 2123061, upload-time = "2025-08-11T15:51:14.319Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/10/2711f7ff1805919221ad5bee205971254845c069ee2e7036847103ca1e4c/sqlalchemy-2.0.43-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6772e3ca8a43a65a37c88e2f3e2adfd511b0b1da37ef11ed78dea16aeae85bd9", size = 3320384, upload-time = "2025-08-11T15:52:35.088Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/0e/3d155e264d2ed2778484006ef04647bc63f55b3e2d12e6a4f787747b5900/sqlalchemy-2.0.43-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1a113da919c25f7f641ffbd07fbc9077abd4b3b75097c888ab818f962707eb48", size = 3329648, upload-time = "2025-08-11T15:56:34.153Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/81/635100fb19725c931622c673900da5efb1595c96ff5b441e07e3dd61f2be/sqlalchemy-2.0.43-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4286a1139f14b7d70141c67a8ae1582fc2b69105f1b09d9573494eb4bb4b2687", size = 3258030, upload-time = "2025-08-11T15:52:36.933Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/ed/a99302716d62b4965fded12520c1cbb189f99b17a6d8cf77611d21442e47/sqlalchemy-2.0.43-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:529064085be2f4d8a6e5fab12d36ad44f1909a18848fcfbdb59cc6d4bbe48efe", size = 3294469, upload-time = "2025-08-11T15:56:35.553Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/a2/3a11b06715149bf3310b55a98b5c1e84a42cfb949a7b800bc75cb4e33abc/sqlalchemy-2.0.43-cp312-cp312-win32.whl", hash = "sha256:b535d35dea8bbb8195e7e2b40059e2253acb2b7579b73c1b432a35363694641d", size = 2098906, upload-time = "2025-08-11T15:55:00.645Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/09/405c915a974814b90aa591280623adc6ad6b322f61fd5cff80aeaef216c9/sqlalchemy-2.0.43-cp312-cp312-win_amd64.whl", hash = "sha256:1c6d85327ca688dbae7e2b06d7d84cfe4f3fffa5b5f9e21bb6ce9d0e1a0e0e0a", size = 2126260, upload-time = "2025-08-11T15:55:02.965Z" },
+    { url = "https://files.pythonhosted.org/packages/41/1c/a7260bd47a6fae7e03768bf66451437b36451143f36b285522b865987ced/sqlalchemy-2.0.43-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e7c08f57f75a2bb62d7ee80a89686a5e5669f199235c6d1dac75cd59374091c3", size = 2130598, upload-time = "2025-08-11T15:51:15.903Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/84/8a337454e82388283830b3586ad7847aa9c76fdd4f1df09cdd1f94591873/sqlalchemy-2.0.43-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:14111d22c29efad445cd5021a70a8b42f7d9152d8ba7f73304c4d82460946aaa", size = 2118415, upload-time = "2025-08-11T15:51:17.256Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/ff/22ab2328148492c4d71899d62a0e65370ea66c877aea017a244a35733685/sqlalchemy-2.0.43-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:21b27b56eb2f82653168cefe6cb8e970cdaf4f3a6cb2c5e3c3c1cf3158968ff9", size = 3248707, upload-time = "2025-08-11T15:52:38.444Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/29/11ae2c2b981de60187f7cbc84277d9d21f101093d1b2e945c63774477aba/sqlalchemy-2.0.43-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c5a9da957c56e43d72126a3f5845603da00e0293720b03bde0aacffcf2dc04f", size = 3253602, upload-time = "2025-08-11T15:56:37.348Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/61/987b6c23b12c56d2be451bc70900f67dd7d989d52b1ee64f239cf19aec69/sqlalchemy-2.0.43-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5d79f9fdc9584ec83d1b3c75e9f4595c49017f5594fee1a2217117647225d738", size = 3183248, upload-time = "2025-08-11T15:52:39.865Z" },
+    { url = "https://files.pythonhosted.org/packages/86/85/29d216002d4593c2ce1c0ec2cec46dda77bfbcd221e24caa6e85eff53d89/sqlalchemy-2.0.43-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9df7126fd9db49e3a5a3999442cc67e9ee8971f3cb9644250107d7296cb2a164", size = 3219363, upload-time = "2025-08-11T15:56:39.11Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/e4/bd78b01919c524f190b4905d47e7630bf4130b9f48fd971ae1c6225b6f6a/sqlalchemy-2.0.43-cp313-cp313-win32.whl", hash = "sha256:7f1ac7828857fcedb0361b48b9ac4821469f7694089d15550bbcf9ab22564a1d", size = 2096718, upload-time = "2025-08-11T15:55:05.349Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/a5/ca2f07a2a201f9497de1928f787926613db6307992fe5cda97624eb07c2f/sqlalchemy-2.0.43-cp313-cp313-win_amd64.whl", hash = "sha256:971ba928fcde01869361f504fcff3b7143b47d30de188b11c6357c0505824197", size = 2123200, upload-time = "2025-08-11T15:55:07.932Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/d9/13bdde6521f322861fab67473cec4b1cc8999f3871953531cf61945fad92/sqlalchemy-2.0.43-py3-none-any.whl", hash = "sha256:1681c21dd2ccee222c2fe0bef671d1aef7c504087c9c4e800371cfcc8ac966fc", size = 1924759, upload-time = "2025-08-11T15:39:53.024Z" },
 ]
 
 [[package]]
@@ -4732,14 +6017,69 @@ wheels = [
 
 [[package]]
 name = "starlette"
-version = "0.46.2"
+version = "0.47.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/04/57/d062573f391d062710d4088fa1369428c38d51460ab6fedff920efef932e/starlette-0.47.2.tar.gz", hash = "sha256:6ae9aa5db235e4846decc1e7b79c4f346adf41e9777aebeb49dfd09bbd7023d8", size = 2583948, upload-time = "2025-07-20T17:31:58.522Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f7/1f/b876b1f83aef204198a42dc101613fefccb32258e5428b5f9259677864b4/starlette-0.47.2-py3-none-any.whl", hash = "sha256:c5847e96134e5c5371ee9fac6fdf1a67336d5815e09eb2a01fdb57a351ef915b", size = 72984, upload-time = "2025-07-20T17:31:56.738Z" },
+]
+
+[[package]]
+name = "swagger-plugin-for-sphinx"
+version = "6.0.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "docutils" },
+    { name = "jinja2" },
+    { name = "sphinx" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/5b/b3/84bc7d8af6b50e6c365f1593fe4a01245f4bfb2178040261c6429db0b46b/swagger_plugin_for_sphinx-6.0.0.tar.gz", hash = "sha256:70366c610648cede5ef482922c9c97c86c99746b9edf33e4ec13fab23d820251", size = 16026, upload-time = "2025-10-16T06:26:09.95Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/51/f0/1098f6628bbe04b086ce59692d09b116ec751286eb7d33e88c5bf0c2e210/swagger_plugin_for_sphinx-6.0.0-py3-none-any.whl", hash = "sha256:35dc646d759a44ce78aefde2fe34f54e7b8c3439d0a52541a6a8b9924a711832", size = 11253, upload-time = "2025-10-16T06:26:08.504Z" },
+]
+
+[[package]]
+name = "swankit"
+version = "0.2.4"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pyyaml" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/dd/b8/025612465e22f77df120401055f366a66877fd26f52d718ea558f65814bd/swankit-0.2.4.tar.gz", hash = "sha256:ed89b7d23351f4038930da78d34195604d76c2285c31586ecf1a4cb9fad9c33d", size = 21641, upload-time = "2025-06-13T17:48:03.947Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/4a/c7/7cc8d6bc562ce96d751a7655421eae09ba795cd557ed4791d63a72bd8f9a/swankit-0.2.4-py3-none-any.whl", hash = "sha256:8e6d3a50451ed1f708f375b839964c5815fe7152a39ed7588921d67c1ba469f8", size = 23853, upload-time = "2025-06-13T17:48:02.996Z" },
+]
+
+[[package]]
+name = "swanlab"
+version = "0.6.10"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "boto3" },
+    { name = "botocore" },
+    { name = "click" },
+    { name = "platformdirs" },
+    { name = "protobuf" },
+    { name = "psutil" },
+    { name = "pydantic" },
+    { name = "pyecharts" },
+    { name = "pynvml" },
+    { name = "pyyaml" },
+    { name = "requests" },
+    { name = "rich" },
+    { name = "setuptools" },
+    { name = "swankit" },
+    { name = "urllib3" },
+    { name = "wrapt" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/ce/20/08dfcd9c983f6a6f4a1000d934b9e6d626cff8d2eeb77a89a68eef20a2b7/starlette-0.46.2.tar.gz", hash = "sha256:7f7361f34eed179294600af672f565727419830b54b7b084efe44bb82d2fccd5", size = 2580846, upload-time = "2025-04-13T13:56:17.942Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/47/c7/87c4084b43c65d0daa3c324e634b4c1fb2b10453515f35f914f87932841e/swanlab-0.6.10.tar.gz", hash = "sha256:0abb3a2bdd5915349fd161f43f3dcc9e79b399affc87b46451d86ee140cab9e2", size = 422149, upload-time = "2025-09-12T03:01:26.448Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/8b/0c/9d30a4ebeb6db2b25a841afbb80f6ef9a854fc3b41be131d249a977b4959/starlette-0.46.2-py3-none-any.whl", hash = "sha256:595633ce89f8ffa71a015caed34a5b2dc1c0cdb3f0f1fbd1e69339cf2abeec35", size = 72037, upload-time = "2025-04-13T13:56:16.21Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/c3/e80a5c631d1e676807bed2c670bee27a6482840e72a3953f4e5cc26477c6/swanlab-0.6.10-py3-none-any.whl", hash = "sha256:e7f836c27c41819844755c942d9a3c8be63ae0a19996d6bab3934fb2c8cb77fb", size = 292190, upload-time = "2025-09-12T03:01:24.428Z" },
 ]
 
 [[package]]
@@ -4755,17 +6095,22 @@ wheels = [
 ]
 
 [[package]]
-name = "tabulate"
-version = "0.9.0"
+name = "tdigest"
+version = "0.5.2.2"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/ec/fe/802052aecb21e3797b8f7902564ab6ea0d60ff8ca23952079064155d1ae1/tabulate-0.9.0.tar.gz", hash = "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c", size = 81090, upload-time = "2022-10-06T17:21:48.54Z" }
+dependencies = [
+    { name = "accumulation-tree" },
+    { name = "pyudorandom" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/dd/34/7e2f78d1ed0af7d0039ab2cff45b6bf8512234b9f178bb21713084a1f2f0/tdigest-0.5.2.2.tar.gz", hash = "sha256:8deffc8bac024761786f43d9444e3b6c91008cd690323e051f068820a7364d0e", size = 6549, upload-time = "2019-05-07T18:57:40.771Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/40/44/4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854/tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f", size = 35252, upload-time = "2022-10-06T17:21:44.262Z" },
+    { url = "https://files.pythonhosted.org/packages/32/72/f420480118cbdd18eb761b9936f0a927957130659a638449575b4a4f0aa7/tdigest-0.5.2.2-py2.py3-none-any.whl", hash = "sha256:e32ff6ab62e4defdb93b816c831080d94dfa1efb68a9fa1e7976c237fa9375cb", size = 9445, upload-time = "2019-05-07T18:57:37.493Z" },
+    { url = "https://files.pythonhosted.org/packages/b4/94/fd3853b98f39d10206b08f2737d2ec2dc6f46a42dc7b7e05f4f0162d13ee/tdigest-0.5.2.2-py3-none-any.whl", hash = "sha256:dd25f8d6e6be002192bba9e4b8c16491d36c10b389f50637818603d1f67c6fb2", size = 9440, upload-time = "2019-05-07T18:57:38.942Z" },
 ]
 
 [[package]]
 name = "tensorboard"
-version = "2.19.0"
+version = "2.20.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "absl-py" },
@@ -4773,14 +6118,14 @@ dependencies = [
     { name = "markdown" },
     { name = "numpy" },
     { name = "packaging" },
+    { name = "pillow" },
     { name = "protobuf" },
     { name = "setuptools" },
-    { name = "six" },
     { name = "tensorboard-data-server" },
     { name = "werkzeug" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/5d/12/4f70e8e2ba0dbe72ea978429d8530b0333f0ed2140cc571a48802878ef99/tensorboard-2.19.0-py3-none-any.whl", hash = "sha256:5e71b98663a641a7ce8a6e70b0be8e1a4c0c45d48760b076383ac4755c35b9a0", size = 5503412, upload-time = "2025-02-12T08:17:27.21Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/d9/a5db55f88f258ac669a92858b70a714bbbd5acd993820b41ec4a96a4d77f/tensorboard-2.20.0-py3-none-any.whl", hash = "sha256:9dc9f978cb84c0723acf9a345d96c184f0293d18f166bb8d59ee098e6cfaaba6", size = 5525680, upload-time = "2025-07-17T19:20:49.638Z" },
 ]
 
 [[package]]
@@ -4795,24 +6140,62 @@ wheels = [
 
 [[package]]
 name = "tensorstore"
-version = "0.1.75"
+version = "0.1.74"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'",
+    "python_full_version >= '3.13' and sys_platform == 'win32'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin'",
+    "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
+]
+dependencies = [
+    { name = "ml-dtypes", version = "0.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.13'" },
+    { name = "numpy", marker = "python_full_version >= '3.13'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/3c/b9/ea25aba62c688a87d7d7d9cc5926d602e2f9e84fa72586825486fb180b7e/tensorstore-0.1.74.tar.gz", hash = "sha256:a062875f27283d30ce4959c408c253ecb336fce8e3f9837c064e3d30cda79203", size = 6795605, upload-time = "2025-04-24T15:42:18.829Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a1/14/2e6d1cad744af9e9a1a78d881a908a859ad95b61b15de10397069f55fbd8/tensorstore-0.1.74-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:7218722ee5d74e4d01f357917d3b1b7b1d6b1c068aa73e3d801cb3d58fc45116", size = 15334307, upload-time = "2025-04-24T15:41:48.315Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/ac/8d572b8c6d689eb50db0252e9d35ee6278a6aed481b64d7e025cf51e32c4/tensorstore-0.1.74-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a6926554a8633d0210bdba619d3996fff6a6af4214237fbca626e6ddfcc8ea39", size = 13288669, upload-time = "2025-04-24T15:41:50.808Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/6c/3e76d614ad70b61670686d91abaa3ddee6b01255bf2b40f050beb15b7970/tensorstore-0.1.74-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d584e468eb4ef8195f5d21a9da4780cf96c6074b87ef219b43a89efce3d503ca", size = 17031720, upload-time = "2025-04-24T15:41:55.092Z" },
+    { url = "https://files.pythonhosted.org/packages/31/f3/09d7c3ad7c9517f89b5be9b4460b83333e98dce1c9ab0a52464ded0bab67/tensorstore-0.1.74-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e0af2225431d59f8a2bb4db4c1519252f10ee407e6550875d78212d3d34ee743", size = 18378829, upload-time = "2025-04-24T15:41:58.167Z" },
+    { url = "https://files.pythonhosted.org/packages/a7/f2/45ece38705280ed9ebf4ccaf084ed1e76e35b1eeec8c510e589978ac8dcd/tensorstore-0.1.74-cp312-cp312-win_amd64.whl", hash = "sha256:4e35f3679873cdc488aae20b9ae2cea4589c7b147a80edb07eb3f09eba47d43d", size = 12432300, upload-time = "2025-04-24T15:42:00.761Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/e9/a08c6a6eb7d6b4b26053d4575196a06c6fccf4e89f9bc625f81e7c91bb5d/tensorstore-0.1.74-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:f7d2c80de9ab352ca14aeca798d6650c5670725e6f8eac73f4fcc8f3147ca614", size = 15334469, upload-time = "2025-04-24T15:42:03.731Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/a9/64b90c6e66e0b8043e641090144c6614b0c78d9a719b9110d953d13a516d/tensorstore-0.1.74-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ceef7d2dcfd1caf61356f7eeb9a37896b4825b4be2750b00615cf5fb1ae47a8b", size = 13288791, upload-time = "2025-04-24T15:42:06.145Z" },
+    { url = "https://files.pythonhosted.org/packages/62/e8/226cfc25d7eac00e783ff2ee4994830c4a42cd8690e207c4a8b93210f3d9/tensorstore-0.1.74-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e71637002a806bc1b0f0f05556d1c33493a43f3ab35f9632b3d48855677d93dc", size = 17031815, upload-time = "2025-04-24T15:42:09.239Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/09/dce8a0942d84f6bb039b5ea3e8bc6a479b1a9535cd216b0d42dd03c4f761/tensorstore-0.1.74-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c799edf9000aee68d6676e3d2f73d4e1a56fc817c47e150732f6d3bd2b1ef46d", size = 18378091, upload-time = "2025-04-24T15:42:13.546Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/23/5218575d25de9d8debfb3faf290a1e3b9a7b6be9e77ba07ff3a63a0bc899/tensorstore-0.1.74-cp313-cp313-win_amd64.whl", hash = "sha256:5da86437ffa1ee0f0c590c38daa2f4b548890ce66b1f470ac98714cb0eabdbf5", size = 12432635, upload-time = "2025-04-24T15:42:16.275Z" },
+]
+
+[[package]]
+name = "tensorstore"
+version = "0.1.76"
 source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'",
+    "python_full_version < '3.13' and sys_platform == 'win32'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version < '3.13' and sys_platform == 'darwin'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
+]
 dependencies = [
-    { name = "ml-dtypes" },
-    { name = "numpy" },
+    { name = "ml-dtypes", version = "0.5.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.13'" },
+    { name = "numpy", marker = "python_full_version < '3.13'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/00/4e/5be077c63d01af420ca8a009cad3b30fef137ef37f6530c266f4f2628382/tensorstore-0.1.75.tar.gz", hash = "sha256:515cc90f5b6c316443f44794168083326fb29a0e50b0cd8fbd4cb3e0f32a3922", size = 6831417, upload-time = "2025-05-14T00:38:05.037Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/ff/ae/947a9f232de7319b664ed8d278e9e0363e9294da73fd422c687ac4eb070e/tensorstore-0.1.76.tar.gz", hash = "sha256:ed0d565e7a038a84b1b5b5d9f7397caec200b53941d8889f44b7f63dd6abffe7", size = 6869230, upload-time = "2025-07-02T21:34:03.773Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/ac/fb/28a5f8035cadbae34bdcaf03a8e0d731fd8bc8c9804ed8f54413cbfddeda/tensorstore-0.1.75-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:bc092152673a993df1867bac16622f5f382816184f2244df9ff78ba7f781e642", size = 15644019, upload-time = "2025-05-14T00:37:41.892Z" },
-    { url = "https://files.pythonhosted.org/packages/16/52/b289ac969d7cee8c253b2f90e5cd6b37789f704147ff7fffa8a50e7b97c4/tensorstore-0.1.75-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d5c6c8ef6c6758f7e11a4cbc7fc4e23af5170128901df729185b7870f6dbc071", size = 13557511, upload-time = "2025-05-14T00:37:44.508Z" },
-    { url = "https://files.pythonhosted.org/packages/35/50/a2c4271e2512ace24290d2d7cf166aaf6e251ef14d20255d98a96c6a9514/tensorstore-0.1.75-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8bbb30f24aef98d43657d132833f5577bfa91497769ef6b5238c5faccf7afe35", size = 17454887, upload-time = "2025-05-14T00:37:46.918Z" },
-    { url = "https://files.pythonhosted.org/packages/a3/1d/9b2610a0770a2115e4a20c1a9377e2e14efabeb55852d150832ff82346f4/tensorstore-0.1.75-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f08d9c2b8b84c892c3c81f6025ec189f58bd7860bf624c32646e5bee81870f95", size = 18820501, upload-time = "2025-05-14T00:37:49.022Z" },
-    { url = "https://files.pythonhosted.org/packages/b9/9c/06f7318bd56fe62ccd7743159cd9e133b5e0ead5b8b229a6f1f392e65666/tensorstore-0.1.75-cp312-cp312-win_amd64.whl", hash = "sha256:39d4173bdbbc1cf41e168fe730fd457a6b0c4100ba707254260f63cb9ad3ef0b", size = 12607424, upload-time = "2025-05-14T00:37:51.368Z" },
-    { url = "https://files.pythonhosted.org/packages/c1/97/656252b262099fdc8b3f247c58ec147ba644f4fc4dec8f7af3ffb352704e/tensorstore-0.1.75-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:0d2f87ca268faf903d5ffba6157fd9aeb42e9f961cea01b98baa690f71f51a1e", size = 15644856, upload-time = "2025-05-14T00:37:53.28Z" },
-    { url = "https://files.pythonhosted.org/packages/94/e1/66067a2aa5c2890c02397df65d748978de1dbbe91ce394f285f86390c149/tensorstore-0.1.75-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:17ee80f9869b5a4b0303cb37edca9c9665af7a9510fac85f59fb8de19f12efd1", size = 13557924, upload-time = "2025-05-14T00:37:55.249Z" },
-    { url = "https://files.pythonhosted.org/packages/46/56/c1245f7bb674072bb0f9e8516bd60f7608ffe114e911c08ebcaefca58f46/tensorstore-0.1.75-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f00144d23eaf511104651b4479fcb111b9befc13db3018277d358144be503ef4", size = 17454695, upload-time = "2025-05-14T00:37:58.521Z" },
-    { url = "https://files.pythonhosted.org/packages/db/78/8a103a9012662fb8d85c3d6daa9c9678d49f260a21b5426e0a1616e63c42/tensorstore-0.1.75-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5c8697cab7b24440a13df8d9e6d000c1067ed3f97204a3dae5388e9e60606834", size = 18820794, upload-time = "2025-05-14T00:38:01.253Z" },
-    { url = "https://files.pythonhosted.org/packages/7d/3d/69d7997fe67fd9cb8fce07ea0f3f3e754a6ea0d2c16f1c46e178abe7da0e/tensorstore-0.1.75-cp313-cp313-win_amd64.whl", hash = "sha256:df410ca28e679c1c8a5b361267ce02fe60a9c4d78964cb984d884d15c538f2f2", size = 12607428, upload-time = "2025-05-14T00:38:03.32Z" },
+    { url = "https://files.pythonhosted.org/packages/09/37/f2254b4ae1dabd95e258fa3eb4783ac4db4261bb8c90ff9bfe15549d1238/tensorstore-0.1.76-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:b68450983ccad9e7774e81b2fa37daef1b72c774fd939d9eb4065d6aa70e666a", size = 15712650, upload-time = "2025-07-02T21:33:39.716Z" },
+    { url = "https://files.pythonhosted.org/packages/93/3c/1cae56cbbe9610ff48cb2d7c0921a4d4c333a0540918e3b2db08b521c5f6/tensorstore-0.1.76-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6b7a3856f884279e40f90bad87d0da70869879e124835e650c6b16c80f64fbc4", size = 13624138, upload-time = "2025-07-02T21:33:41.758Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/d2/b92d34a896f608a59dc76c290d4ec9f7d0264a02e4d74864987a6adbd3c9/tensorstore-0.1.76-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8709a98ae0b453eb23525c07372c2be1f6bbd978bba53319f26a1f2a83a77c2a", size = 17538270, upload-time = "2025-07-02T21:33:44.911Z" },
+    { url = "https://files.pythonhosted.org/packages/21/66/142b803541552b02a2fa033b1f48bcb50e1d2df6ac10131aab1857c5141d/tensorstore-0.1.76-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:267edea8f1596f2bd67017ff97b7b350bf3f95ff84947a8babadc5e17ca53663", size = 18910782, upload-time = "2025-07-02T21:33:47.401Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/3e/c264cf1435c04fb998a1f30dd1f066deb370b841412f89e1cb36d37ee4fc/tensorstore-0.1.76-cp312-cp312-win_amd64.whl", hash = "sha256:f66ac63d0c63c3336ac4dc61f1f97b6afe8b512e586ddfdbc91f19175787f321", size = 12611059, upload-time = "2025-07-02T21:33:49.596Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/66/1e3b819e1de98b048dad7843f3a814c5e739ead57f511dafb6aa0748f04a/tensorstore-0.1.76-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:a471994b156daa3cadb0e4968e29202fa2e8c7ddcd28d825499bb5637caa0983", size = 15713110, upload-time = "2025-07-02T21:33:51.973Z" },
+    { url = "https://files.pythonhosted.org/packages/58/d3/226344e8822c5e02af929c89bd61964e08980253cda15286a201850eb3b1/tensorstore-0.1.76-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:98175dc64935b49467cb7664a431b9a06e9df9b5cab94f9a1fdb24a30b2d69d3", size = 13624514, upload-time = "2025-07-02T21:33:54.109Z" },
+    { url = "https://files.pythonhosted.org/packages/94/9f/2b267c520dbbcf0a5ebc7a3c0a6cf852a445e22c8ea8b0f7450bf6b98783/tensorstore-0.1.76-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c9e30577f1197ea3573102912482dced95e4c6ff72087ffeb99b5d8b496bf81a", size = 17539304, upload-time = "2025-07-02T21:33:56.172Z" },
+    { url = "https://files.pythonhosted.org/packages/1d/9a/9dcc01c8f87047b09602ea16379233b8a308d1d83d5432bf8bc89163ca3e/tensorstore-0.1.76-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:20782f833bfa3c59dd3787f657388054c54ee0ab48dad181b360e3e5e81e4c4b", size = 18911982, upload-time = "2025-07-02T21:33:58.928Z" },
+    { url = "https://files.pythonhosted.org/packages/10/45/43d387027b3eac9f09de8bb736b1b432de287fbd807716877fe5fbaeee56/tensorstore-0.1.76-cp313-cp313-win_amd64.whl", hash = "sha256:e84fc11b36fcd55cfd1c5dfc60de9d54d7d95c3de074f4d854914067e82a6740", size = 12610851, upload-time = "2025-07-02T21:34:01.505Z" },
 ]
 
 [[package]]
@@ -4826,71 +6209,125 @@ wheels = [
 
 [[package]]
 name = "tiktoken"
-version = "0.9.0"
+version = "0.11.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "regex" },
     { name = "requests" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/ea/cf/756fedf6981e82897f2d570dd25fa597eb3f4459068ae0572d7e888cfd6f/tiktoken-0.9.0.tar.gz", hash = "sha256:d02a5ca6a938e0490e1ff957bc48c8b078c88cb83977be1625b1fd8aac792c5d", size = 35991, upload-time = "2025-02-14T06:03:01.003Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/a7/86/ad0155a37c4f310935d5ac0b1ccf9bdb635dcb906e0a9a26b616dd55825a/tiktoken-0.11.0.tar.gz", hash = "sha256:3c518641aee1c52247c2b97e74d8d07d780092af79d5911a6ab5e79359d9b06a", size = 37648, upload-time = "2025-08-08T23:58:08.495Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e7/9e/eceddeffc169fc75fe0fd4f38471309f11cb1906f9b8aa39be4f5817df65/tiktoken-0.11.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:fd9e6b23e860973cf9526544e220b223c60badf5b62e80a33509d6d40e6c8f5d", size = 1055199, upload-time = "2025-08-08T23:57:45.076Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/cf/5f02bfefffdc6b54e5094d2897bc80efd43050e5b09b576fd85936ee54bf/tiktoken-0.11.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6a76d53cee2da71ee2731c9caa747398762bda19d7f92665e882fef229cb0b5b", size = 996655, upload-time = "2025-08-08T23:57:46.304Z" },
+    { url = "https://files.pythonhosted.org/packages/65/8e/c769b45ef379bc360c9978c4f6914c79fd432400a6733a8afc7ed7b0726a/tiktoken-0.11.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ef72aab3ea240646e642413cb363b73869fed4e604dcfd69eec63dc54d603e8", size = 1128867, upload-time = "2025-08-08T23:57:47.438Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/2d/4d77f6feb9292bfdd23d5813e442b3bba883f42d0ac78ef5fdc56873f756/tiktoken-0.11.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f929255c705efec7a28bf515e29dc74220b2f07544a8c81b8d69e8efc4578bd", size = 1183308, upload-time = "2025-08-08T23:57:48.566Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/65/7ff0a65d3bb0fc5a1fb6cc71b03e0f6e71a68c5eea230d1ff1ba3fd6df49/tiktoken-0.11.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:61f1d15822e4404953d499fd1dcc62817a12ae9fb1e4898033ec8fe3915fdf8e", size = 1244301, upload-time = "2025-08-08T23:57:49.642Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/6e/5b71578799b72e5bdcef206a214c3ce860d999d579a3b56e74a6c8989ee2/tiktoken-0.11.0-cp312-cp312-win_amd64.whl", hash = "sha256:45927a71ab6643dfd3ef57d515a5db3d199137adf551f66453be098502838b0f", size = 884282, upload-time = "2025-08-08T23:57:50.759Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/cd/a9034bcee638716d9310443818d73c6387a6a96db93cbcb0819b77f5b206/tiktoken-0.11.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a5f3f25ffb152ee7fec78e90a5e5ea5b03b4ea240beed03305615847f7a6ace2", size = 1055339, upload-time = "2025-08-08T23:57:51.802Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/91/9922b345f611b4e92581f234e64e9661e1c524875c8eadd513c4b2088472/tiktoken-0.11.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7dc6e9ad16a2a75b4c4be7208055a1f707c9510541d94d9cc31f7fbdc8db41d8", size = 997080, upload-time = "2025-08-08T23:57:53.442Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/9d/49cd047c71336bc4b4af460ac213ec1c457da67712bde59b892e84f1859f/tiktoken-0.11.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5a0517634d67a8a48fd4a4ad73930c3022629a85a217d256a6e9b8b47439d1e4", size = 1128501, upload-time = "2025-08-08T23:57:54.808Z" },
+    { url = "https://files.pythonhosted.org/packages/52/d5/a0dcdb40dd2ea357e83cb36258967f0ae96f5dd40c722d6e382ceee6bba9/tiktoken-0.11.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7fb4effe60574675118b73c6fbfd3b5868e5d7a1f570d6cc0d18724b09ecf318", size = 1182743, upload-time = "2025-08-08T23:57:56.307Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/17/a0fc51aefb66b7b5261ca1314afa83df0106b033f783f9a7bcbe8e741494/tiktoken-0.11.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:94f984c9831fd32688aef4348803b0905d4ae9c432303087bae370dc1381a2b8", size = 1244057, upload-time = "2025-08-08T23:57:57.628Z" },
+    { url = "https://files.pythonhosted.org/packages/50/79/bcf350609f3a10f09fe4fc207f132085e497fdd3612f3925ab24d86a0ca0/tiktoken-0.11.0-cp313-cp313-win_amd64.whl", hash = "sha256:2177ffda31dec4023356a441793fed82f7af5291120751dee4d696414f54db0c", size = 883901, upload-time = "2025-08-08T23:57:59.359Z" },
+]
+
+[[package]]
+name = "timm"
+version = "1.0.16"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "huggingface-hub" },
+    { name = "pyyaml" },
+    { name = "safetensors" },
+    { name = "torch", version = "2.8.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
+    { name = "torch", version = "2.8.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin'" },
+    { name = "torchvision", version = "0.23.0", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" },
+    { name = "torchvision", version = "0.23.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
+    { name = "torchvision", version = "0.23.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/94/f6/4d7a8c261341fa6ad281920618739f2a650f41043afcedb570f24e99a776/timm-1.0.16.tar.gz", hash = "sha256:a3b8130dd2cb8dc3b9f5e3d09ab6d677a6315a8695fd5264eb6d52a4a46c1044", size = 2339999, upload-time = "2025-06-26T17:09:44.208Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/cf/e5/21ff33ecfa2101c1bb0f9b6df750553bd873b7fb532ce2cb276ff40b197f/tiktoken-0.9.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:e88f121c1c22b726649ce67c089b90ddda8b9662545a8aeb03cfef15967ddd03", size = 1065073, upload-time = "2025-02-14T06:02:24.768Z" },
-    { url = "https://files.pythonhosted.org/packages/8e/03/a95e7b4863ee9ceec1c55983e4cc9558bcfd8f4f80e19c4f8a99642f697d/tiktoken-0.9.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a6600660f2f72369acb13a57fb3e212434ed38b045fd8cc6cdd74947b4b5d210", size = 1008075, upload-time = "2025-02-14T06:02:26.92Z" },
-    { url = "https://files.pythonhosted.org/packages/40/10/1305bb02a561595088235a513ec73e50b32e74364fef4de519da69bc8010/tiktoken-0.9.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:95e811743b5dfa74f4b227927ed86cbc57cad4df859cb3b643be797914e41794", size = 1140754, upload-time = "2025-02-14T06:02:28.124Z" },
-    { url = "https://files.pythonhosted.org/packages/1b/40/da42522018ca496432ffd02793c3a72a739ac04c3794a4914570c9bb2925/tiktoken-0.9.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:99376e1370d59bcf6935c933cb9ba64adc29033b7e73f5f7569f3aad86552b22", size = 1196678, upload-time = "2025-02-14T06:02:29.845Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/41/1e59dddaae270ba20187ceb8aa52c75b24ffc09f547233991d5fd822838b/tiktoken-0.9.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:badb947c32739fb6ddde173e14885fb3de4d32ab9d8c591cbd013c22b4c31dd2", size = 1259283, upload-time = "2025-02-14T06:02:33.838Z" },
-    { url = "https://files.pythonhosted.org/packages/5b/64/b16003419a1d7728d0d8c0d56a4c24325e7b10a21a9dd1fc0f7115c02f0a/tiktoken-0.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:5a62d7a25225bafed786a524c1b9f0910a1128f4232615bf3f8257a73aaa3b16", size = 894897, upload-time = "2025-02-14T06:02:36.265Z" },
-    { url = "https://files.pythonhosted.org/packages/7a/11/09d936d37f49f4f494ffe660af44acd2d99eb2429d60a57c71318af214e0/tiktoken-0.9.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:2b0e8e05a26eda1249e824156d537015480af7ae222ccb798e5234ae0285dbdb", size = 1064919, upload-time = "2025-02-14T06:02:37.494Z" },
-    { url = "https://files.pythonhosted.org/packages/80/0e/f38ba35713edb8d4197ae602e80837d574244ced7fb1b6070b31c29816e0/tiktoken-0.9.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:27d457f096f87685195eea0165a1807fae87b97b2161fe8c9b1df5bd74ca6f63", size = 1007877, upload-time = "2025-02-14T06:02:39.516Z" },
-    { url = "https://files.pythonhosted.org/packages/fe/82/9197f77421e2a01373e27a79dd36efdd99e6b4115746ecc553318ecafbf0/tiktoken-0.9.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2cf8ded49cddf825390e36dd1ad35cd49589e8161fdcb52aa25f0583e90a3e01", size = 1140095, upload-time = "2025-02-14T06:02:41.791Z" },
-    { url = "https://files.pythonhosted.org/packages/f2/bb/4513da71cac187383541facd0291c4572b03ec23c561de5811781bbd988f/tiktoken-0.9.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cc156cb314119a8bb9748257a2eaebd5cc0753b6cb491d26694ed42fc7cb3139", size = 1195649, upload-time = "2025-02-14T06:02:43Z" },
-    { url = "https://files.pythonhosted.org/packages/fa/5c/74e4c137530dd8504e97e3a41729b1103a4ac29036cbfd3250b11fd29451/tiktoken-0.9.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:cd69372e8c9dd761f0ab873112aba55a0e3e506332dd9f7522ca466e817b1b7a", size = 1258465, upload-time = "2025-02-14T06:02:45.046Z" },
-    { url = "https://files.pythonhosted.org/packages/de/a8/8f499c179ec900783ffe133e9aab10044481679bb9aad78436d239eee716/tiktoken-0.9.0-cp313-cp313-win_amd64.whl", hash = "sha256:5ea0edb6f83dc56d794723286215918c1cde03712cbbafa0348b33448faf5b95", size = 894669, upload-time = "2025-02-14T06:02:47.341Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/14/10d0ea58a7580b8bd7c8d69420b3dc3a1deb890d4ff297deca9717689598/timm-1.0.16-py3-none-any.whl", hash = "sha256:a640e58f4ae41e0445517d1133b34be75bb2bd49cdb830d739925ce1fb7d2526", size = 2485733, upload-time = "2025-06-26T17:09:42.652Z" },
 ]
 
 [[package]]
 name = "tokenizers"
-version = "0.21.2"
+version = "0.21.4"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "huggingface-hub" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/ab/2d/b0fce2b8201635f60e8c95990080f58461cc9ca3d5026de2e900f38a7f21/tokenizers-0.21.2.tar.gz", hash = "sha256:fdc7cffde3e2113ba0e6cc7318c40e3438a4d74bbc62bf04bcc63bdfb082ac77", size = 351545, upload-time = "2025-06-24T10:24:52.449Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/c2/2f/402986d0823f8d7ca139d969af2917fefaa9b947d1fb32f6168c509f2492/tokenizers-0.21.4.tar.gz", hash = "sha256:fa23f85fbc9a02ec5c6978da172cdcbac23498c3ca9f3645c5c68740ac007880", size = 351253, upload-time = "2025-07-28T15:48:54.325Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/1d/cc/2936e2d45ceb130a21d929743f1e9897514691bec123203e10837972296f/tokenizers-0.21.2-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:342b5dfb75009f2255ab8dec0041287260fed5ce00c323eb6bab639066fef8ec", size = 2875206, upload-time = "2025-06-24T10:24:42.755Z" },
-    { url = "https://files.pythonhosted.org/packages/6c/e6/33f41f2cc7861faeba8988e7a77601407bf1d9d28fc79c5903f8f77df587/tokenizers-0.21.2-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:126df3205d6f3a93fea80c7a8a266a78c1bd8dd2fe043386bafdd7736a23e45f", size = 2732655, upload-time = "2025-06-24T10:24:41.56Z" },
-    { url = "https://files.pythonhosted.org/packages/33/2b/1791eb329c07122a75b01035b1a3aa22ad139f3ce0ece1b059b506d9d9de/tokenizers-0.21.2-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4a32cd81be21168bd0d6a0f0962d60177c447a1aa1b1e48fa6ec9fc728ee0b12", size = 3019202, upload-time = "2025-06-24T10:24:31.791Z" },
-    { url = "https://files.pythonhosted.org/packages/05/15/fd2d8104faa9f86ac68748e6f7ece0b5eb7983c7efc3a2c197cb98c99030/tokenizers-0.21.2-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8bd8999538c405133c2ab999b83b17c08b7fc1b48c1ada2469964605a709ef91", size = 2934539, upload-time = "2025-06-24T10:24:34.567Z" },
-    { url = "https://files.pythonhosted.org/packages/a5/2e/53e8fd053e1f3ffbe579ca5f9546f35ac67cf0039ed357ad7ec57f5f5af0/tokenizers-0.21.2-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5e9944e61239b083a41cf8fc42802f855e1dca0f499196df37a8ce219abac6eb", size = 3248665, upload-time = "2025-06-24T10:24:39.024Z" },
-    { url = "https://files.pythonhosted.org/packages/00/15/79713359f4037aa8f4d1f06ffca35312ac83629da062670e8830917e2153/tokenizers-0.21.2-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:514cd43045c5d546f01142ff9c79a96ea69e4b5cda09e3027708cb2e6d5762ab", size = 3451305, upload-time = "2025-06-24T10:24:36.133Z" },
-    { url = "https://files.pythonhosted.org/packages/38/5f/959f3a8756fc9396aeb704292777b84f02a5c6f25c3fc3ba7530db5feb2c/tokenizers-0.21.2-cp39-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b1b9405822527ec1e0f7d8d2fdb287a5730c3a6518189c968254a8441b21faae", size = 3214757, upload-time = "2025-06-24T10:24:37.784Z" },
-    { url = "https://files.pythonhosted.org/packages/c5/74/f41a432a0733f61f3d21b288de6dfa78f7acff309c6f0f323b2833e9189f/tokenizers-0.21.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fed9a4d51c395103ad24f8e7eb976811c57fbec2af9f133df471afcd922e5020", size = 3121887, upload-time = "2025-06-24T10:24:40.293Z" },
-    { url = "https://files.pythonhosted.org/packages/3c/6a/bc220a11a17e5d07b0dfb3b5c628621d4dcc084bccd27cfaead659963016/tokenizers-0.21.2-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:2c41862df3d873665ec78b6be36fcc30a26e3d4902e9dd8608ed61d49a48bc19", size = 9091965, upload-time = "2025-06-24T10:24:44.431Z" },
-    { url = "https://files.pythonhosted.org/packages/6c/bd/ac386d79c4ef20dc6f39c4706640c24823dca7ebb6f703bfe6b5f0292d88/tokenizers-0.21.2-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:ed21dc7e624e4220e21758b2e62893be7101453525e3d23264081c9ef9a6d00d", size = 9053372, upload-time = "2025-06-24T10:24:46.455Z" },
-    { url = "https://files.pythonhosted.org/packages/63/7b/5440bf203b2a5358f074408f7f9c42884849cd9972879e10ee6b7a8c3b3d/tokenizers-0.21.2-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:0e73770507e65a0e0e2a1affd6b03c36e3bc4377bd10c9ccf51a82c77c0fe365", size = 9298632, upload-time = "2025-06-24T10:24:48.446Z" },
-    { url = "https://files.pythonhosted.org/packages/a4/d2/faa1acac3f96a7427866e94ed4289949b2524f0c1878512516567d80563c/tokenizers-0.21.2-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:106746e8aa9014a12109e58d540ad5465b4c183768ea96c03cbc24c44d329958", size = 9470074, upload-time = "2025-06-24T10:24:50.378Z" },
-    { url = "https://files.pythonhosted.org/packages/d8/a5/896e1ef0707212745ae9f37e84c7d50269411aef2e9ccd0de63623feecdf/tokenizers-0.21.2-cp39-abi3-win32.whl", hash = "sha256:cabda5a6d15d620b6dfe711e1af52205266d05b379ea85a8a301b3593c60e962", size = 2330115, upload-time = "2025-06-24T10:24:55.069Z" },
-    { url = "https://files.pythonhosted.org/packages/13/c3/cc2755ee10be859c4338c962a35b9a663788c0c0b50c0bdd8078fb6870cf/tokenizers-0.21.2-cp39-abi3-win_amd64.whl", hash = "sha256:58747bb898acdb1007f37a7bbe614346e98dc28708ffb66a3fd50ce169ac6c98", size = 2509918, upload-time = "2025-06-24T10:24:53.71Z" },
+    { url = "https://files.pythonhosted.org/packages/98/c6/fdb6f72bf6454f52eb4a2510be7fb0f614e541a2554d6210e370d85efff4/tokenizers-0.21.4-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:2ccc10a7c3bcefe0f242867dc914fc1226ee44321eb618cfe3019b5df3400133", size = 2863987, upload-time = "2025-07-28T15:48:44.877Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/a6/28975479e35ddc751dc1ddc97b9b69bf7fcf074db31548aab37f8116674c/tokenizers-0.21.4-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:5e2f601a8e0cd5be5cc7506b20a79112370b9b3e9cb5f13f68ab11acd6ca7d60", size = 2732457, upload-time = "2025-07-28T15:48:43.265Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/8f/24f39d7b5c726b7b0be95dca04f344df278a3fe3a4deb15a975d194cbb32/tokenizers-0.21.4-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:39b376f5a1aee67b4d29032ee85511bbd1b99007ec735f7f35c8a2eb104eade5", size = 3012624, upload-time = "2025-07-28T13:22:43.895Z" },
+    { url = "https://files.pythonhosted.org/packages/58/47/26358925717687a58cb74d7a508de96649544fad5778f0cd9827398dc499/tokenizers-0.21.4-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2107ad649e2cda4488d41dfd031469e9da3fcbfd6183e74e4958fa729ffbf9c6", size = 2939681, upload-time = "2025-07-28T13:22:47.499Z" },
+    { url = "https://files.pythonhosted.org/packages/99/6f/cc300fea5db2ab5ddc2c8aea5757a27b89c84469899710c3aeddc1d39801/tokenizers-0.21.4-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3c73012da95afafdf235ba80047699df4384fdc481527448a078ffd00e45a7d9", size = 3247445, upload-time = "2025-07-28T15:48:39.711Z" },
+    { url = "https://files.pythonhosted.org/packages/be/bf/98cb4b9c3c4afd8be89cfa6423704337dc20b73eb4180397a6e0d456c334/tokenizers-0.21.4-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f23186c40395fc390d27f519679a58023f368a0aad234af145e0f39ad1212732", size = 3428014, upload-time = "2025-07-28T13:22:49.569Z" },
+    { url = "https://files.pythonhosted.org/packages/75/c7/96c1cc780e6ca7f01a57c13235dd05b7bc1c0f3588512ebe9d1331b5f5ae/tokenizers-0.21.4-cp39-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cc88bb34e23a54cc42713d6d98af5f1bf79c07653d24fe984d2d695ba2c922a2", size = 3193197, upload-time = "2025-07-28T13:22:51.471Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/90/273b6c7ec78af547694eddeea9e05de771278bd20476525ab930cecaf7d8/tokenizers-0.21.4-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51b7eabb104f46c1c50b486520555715457ae833d5aee9ff6ae853d1130506ff", size = 3115426, upload-time = "2025-07-28T15:48:41.439Z" },
+    { url = "https://files.pythonhosted.org/packages/91/43/c640d5a07e95f1cf9d2c92501f20a25f179ac53a4f71e1489a3dcfcc67ee/tokenizers-0.21.4-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:714b05b2e1af1288bd1bc56ce496c4cebb64a20d158ee802887757791191e6e2", size = 9089127, upload-time = "2025-07-28T15:48:46.472Z" },
+    { url = "https://files.pythonhosted.org/packages/44/a1/dd23edd6271d4dca788e5200a807b49ec3e6987815cd9d0a07ad9c96c7c2/tokenizers-0.21.4-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:1340ff877ceedfa937544b7d79f5b7becf33a4cfb58f89b3b49927004ef66f78", size = 9055243, upload-time = "2025-07-28T15:48:48.539Z" },
+    { url = "https://files.pythonhosted.org/packages/21/2b/b410d6e9021c4b7ddb57248304dc817c4d4970b73b6ee343674914701197/tokenizers-0.21.4-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:3c1f4317576e465ac9ef0d165b247825a2a4078bcd01cba6b54b867bdf9fdd8b", size = 9298237, upload-time = "2025-07-28T15:48:50.443Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/0a/42348c995c67e2e6e5c89ffb9cfd68507cbaeb84ff39c49ee6e0a6dd0fd2/tokenizers-0.21.4-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:c212aa4e45ec0bb5274b16b6f31dd3f1c41944025c2358faaa5782c754e84c24", size = 9461980, upload-time = "2025-07-28T15:48:52.325Z" },
+    { url = "https://files.pythonhosted.org/packages/3d/d3/dacccd834404cd71b5c334882f3ba40331ad2120e69ded32cf5fda9a7436/tokenizers-0.21.4-cp39-abi3-win32.whl", hash = "sha256:6c42a930bc5f4c47f4ea775c91de47d27910881902b0f20e4990ebe045a415d0", size = 2329871, upload-time = "2025-07-28T15:48:56.841Z" },
+    { url = "https://files.pythonhosted.org/packages/41/f2/fd673d979185f5dcbac4be7d09461cbb99751554ffb6718d0013af8604cb/tokenizers-0.21.4-cp39-abi3-win_amd64.whl", hash = "sha256:475d807a5c3eb72c59ad9b5fcdb254f6e17f53dfcbb9903233b0dfa9c943b597", size = 2507568, upload-time = "2025-07-28T15:48:55.456Z" },
 ]
 
 [[package]]
-name = "toml"
-version = "0.10.2"
+name = "tomlkit"
+version = "0.13.3"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/be/ba/1f744cdc819428fc6b5084ec34d9b30660f6f9daaf70eead706e3203ec3c/toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f", size = 22253, upload-time = "2020-11-01T01:40:22.204Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/cc/18/0bbf3884e9eaa38819ebe46a7bd25dcd56b67434402b66a58c4b8e552575/tomlkit-0.13.3.tar.gz", hash = "sha256:430cf247ee57df2b94ee3fbe588e71d362a941ebb545dec29b53961d61add2a1", size = 185207, upload-time = "2025-06-05T07:13:44.947Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/44/6f/7120676b6d73228c96e17f1f794d8ab046fc910d781c8d151120c3f1569e/toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b", size = 16588, upload-time = "2020-11-01T01:40:20.672Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/75/8539d011f6be8e29f339c42e633aae3cb73bffa95dd0f9adec09b9c58e85/tomlkit-0.13.3-py3-none-any.whl", hash = "sha256:c89c649d79ee40629a9fda55f8ace8c6a1b42deb912b2a8fd8d942ddadb606b0", size = 38901, upload-time = "2025-06-05T07:13:43.546Z" },
 ]
 
 [[package]]
 name = "torch"
-version = "2.7.0+cu128"
-source = { registry = "https://download.pytorch.org/whl/cu128" }
+version = "2.8.0"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.13' and sys_platform == 'darwin'",
+    "python_full_version < '3.13' and sys_platform == 'darwin'",
+]
 dependencies = [
-    { name = "filelock" },
-    { name = "fsspec" },
-    { name = "jinja2" },
-    { name = "networkx" },
+    { name = "filelock", marker = "sys_platform == 'darwin'" },
+    { name = "fsspec", marker = "sys_platform == 'darwin'" },
+    { name = "jinja2", marker = "sys_platform == 'darwin'" },
+    { name = "networkx", marker = "sys_platform == 'darwin'" },
+    { name = "setuptools", marker = "sys_platform == 'darwin'" },
+    { name = "sympy", marker = "sys_platform == 'darwin'" },
+    { name = "typing-extensions", marker = "sys_platform == 'darwin'" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/be/66/5c9a321b325aaecb92d4d1855421e3a055abd77903b7dab6575ca07796db/torch-2.8.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:619c2869db3ada2c0105487ba21b5008defcc472d23f8b80ed91ac4a380283b0", size = 73630478, upload-time = "2025-08-06T14:53:57.144Z" },
+    { url = "https://files.pythonhosted.org/packages/de/69/8b7b13bba430f5e21d77708b616f767683629fc4f8037564a177d20f90ed/torch-2.8.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:1a62a1ec4b0498930e2543535cf70b1bef8c777713de7ceb84cd79115f553767", size = 73915128, upload-time = "2025-08-06T14:54:34.769Z" },
+    { url = "https://files.pythonhosted.org/packages/04/6e/650bb7f28f771af0cb791b02348db8b7f5f64f40f6829ee82aa6ce99aabe/torch-2.8.0-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:7b677e17f5a3e69fdef7eb3b9da72622f8d322692930297e4ccb52fefc6c8211", size = 73632395, upload-time = "2025-08-06T14:55:28.645Z" },
+]
+
+[[package]]
+name = "torch"
+version = "2.8.0+cu129"
+source = { registry = "https://download.pytorch.org/whl/cu129" }
+resolution-markers = [
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'",
+    "python_full_version >= '3.13' and sys_platform == 'win32'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version < '3.13' and sys_platform == 'win32'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
+]
+dependencies = [
+    { name = "filelock", marker = "sys_platform != 'darwin'" },
+    { name = "fsspec", marker = "sys_platform != 'darwin'" },
+    { name = "jinja2", marker = "sys_platform != 'darwin'" },
+    { name = "networkx", marker = "sys_platform != 'darwin'" },
     { name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
     { name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
     { name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
@@ -4905,43 +6342,64 @@ dependencies = [
     { name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
     { name = "nvidia-nvjitlink-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
     { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "setuptools" },
-    { name = "sympy" },
-    { name = "triton", marker = "sys_platform == 'linux'" },
-    { name = "typing-extensions" },
+    { name = "setuptools", marker = "sys_platform != 'darwin'" },
+    { name = "sympy", marker = "sys_platform != 'darwin'" },
+    { name = "triton", version = "3.4.0", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform == 'linux'" },
+    { name = "typing-extensions", marker = "sys_platform != 'darwin'" },
 ]
 wheels = [
-    { url = "https://download.pytorch.org/whl/cu128/torch-2.7.0%2Bcu128-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:6bba7dca5d9a729f1e8e9befb98055498e551efaf5ed034824c168b560afc1ac" },
-    { url = "https://download.pytorch.org/whl/cu128/torch-2.7.0%2Bcu128-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:7c0f08d1c44a02abad389373dddfce75904b969a410be2f4e5109483dd3dc0ce" },
-    { url = "https://download.pytorch.org/whl/cu128/torch-2.7.0%2Bcu128-cp312-cp312-win_amd64.whl", hash = "sha256:1704e5dd66c9221e4e8b6ae2d80cbf54e129571e643f5fa9ca78cc6d2096403a" },
-    { url = "https://download.pytorch.org/whl/cu128/torch-2.7.0%2Bcu128-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:633f35e8b1b1f640ef5f8a98dbd84f19b548222ce7ba8f017fe47ce6badc106a" },
-    { url = "https://download.pytorch.org/whl/cu128/torch-2.7.0%2Bcu128-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:d2f69f909da5dc52113ec66a851d62079f3d52c83184cf64beebdf12ca2f705c" },
-    { url = "https://download.pytorch.org/whl/cu128/torch-2.7.0%2Bcu128-cp313-cp313-win_amd64.whl", hash = "sha256:58c749f52ddc9098155c77d6c74153bb13d8978fd6e1063b5d7b41d4644f5af5" },
-    { url = "https://download.pytorch.org/whl/cu128/torch-2.7.0%2Bcu128-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:fa05ac6ebed4777de7a5eff398c1f17b697c02422516748ce66a8151873e5a0e" },
-    { url = "https://download.pytorch.org/whl/cu128/torch-2.7.0%2Bcu128-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:78e13c26c38ae92d6841cf9ce760d7e9d52bca3e3183de371812e84274b054dc" },
-    { url = "https://download.pytorch.org/whl/cu128/torch-2.7.0%2Bcu128-cp313-cp313t-win_amd64.whl", hash = "sha256:3559e98be824c2b12ab807319cd61c6174d73a524c9961317de8e8a44133c5c5" },
+    { url = "https://download.pytorch.org/whl/cu129/torch-2.8.0%2Bcu129-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:692fe6e513b667f789a543fa9b1baba58e77a46d5c8629764ca0c00a56823e1f" },
+    { url = "https://download.pytorch.org/whl/cu129/torch-2.8.0%2Bcu129-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:02c7258e917f3043c978b53acf6f02b818db0d0d85db0e58ae578af333b9b4e2" },
+    { url = "https://download.pytorch.org/whl/cu129/torch-2.8.0%2Bcu129-cp312-cp312-win_amd64.whl", hash = "sha256:2bc729898e422b9f3da54349eed98f2f0b5dd415434508ee2ab2a13fb021815d" },
+    { url = "https://download.pytorch.org/whl/cu129/torch-2.8.0%2Bcu129-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:ad2d64316635e7ab06f6c973a252526d59a92a2045825c102f876914a72304d0" },
+    { url = "https://download.pytorch.org/whl/cu129/torch-2.8.0%2Bcu129-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:563740167be2189b71530b503f0c8a8d7a8267dd49d4de6f9c5f1d23fbe237df" },
+    { url = "https://download.pytorch.org/whl/cu129/torch-2.8.0%2Bcu129-cp313-cp313-win_amd64.whl", hash = "sha256:2cef066f9759ff4d7868a8c3695aa60d9a878598acb3685bb1ef2fdac29dcd68" },
+    { url = "https://download.pytorch.org/whl/cu129/torch-2.8.0%2Bcu129-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:2982bf34249cbb38f1090e71ad7097a214a21023ccdc0413961986ab7d0396e6" },
+    { url = "https://download.pytorch.org/whl/cu129/torch-2.8.0%2Bcu129-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:6344260959ebcfa6dae458e1c4365195bcfdf00f4f1f1ad438cbaf50756829ed" },
+    { url = "https://download.pytorch.org/whl/cu129/torch-2.8.0%2Bcu129-cp313-cp313t-win_amd64.whl", hash = "sha256:9c0cd89e54ce44ce3208c5cf4163773b9cda0067e4b48cfcac56a4e04af52040" },
+]
+
+[[package]]
+name = "torchao"
+version = "0.12.0"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/72/5e/f5df5e0bbc7d84e5da3b4599b5bad58f4a6657b22bcae64dd741faee80ab/torchao-0.12.0-cp39-abi3-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:326ea2800cc7d9d50f0d17742ad923e5c6d4c4dd5942558f4ed13db00bdebc7c", size = 6777967, upload-time = "2025-07-17T17:50:13.567Z" },
+    { url = "https://files.pythonhosted.org/packages/6c/5f/6bf9b5bed6d31e286516d23e1db7320d2ccfbf1b2234749833ad1e3d25a5/torchao-0.12.0-py3-none-any.whl", hash = "sha256:103f2a9164d2e4f705332af1aafbb8473eadd14d9164e45857ca187cde1f13d2", size = 962232, upload-time = "2025-07-17T17:50:15.119Z" },
 ]
 
 [[package]]
 name = "torchaudio"
-version = "2.7.0"
+version = "2.8.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "torch" },
+    { name = "torch", version = "2.8.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
+    { name = "torch", version = "2.8.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin'" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ac/cc/c2e2a3eb6ee956f73c68541e439916f8146170ea9cc61e72adea5c995312/torchaudio-2.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ddef94bf181e6447cbb05f38beaca8f6c5bb8d2b9ddced1aa3452025b9fc70d3", size = 1856736, upload-time = "2025-08-06T14:58:36.3Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/0d/24dad878784f1edd62862f27173781669f0c71eb46368636787d1e364188/torchaudio-2.8.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:862e2e40bf09d865e5df080a84c1a39bbcef40e43140f4b1737eb3a389d3b38f", size = 1692930, upload-time = "2025-08-06T14:58:41.312Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/a6/84d80f34472503e9eb82245d7df501c59602d75d7360e717fb9b84f91c5e/torchaudio-2.8.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:93a8583f280fe83ba021aa713319381ea71362cc87b67ee38e97a43cb2254aee", size = 4014607, upload-time = "2025-08-06T14:58:47.234Z" },
+    { url = "https://files.pythonhosted.org/packages/43/ab/96ad33afa320738a7cfb4b51ba97e2f3cfb1e04ae3115d5057655103ba4f/torchaudio-2.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:4b82cacd1b8ccd543b1149d8cab257a40dfda8119023d2e3a96c66349c84bffb", size = 2499890, upload-time = "2025-08-06T14:58:55.066Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/ea/2a68259c4dbb5fe44ebfdcfa40b115010d8c677221a7ef0f5577f3c4f5f1/torchaudio-2.8.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f851d32e94ca05e470f0c60e25726ec1e0eb71cb2ca5a0206b7fd03272ccc3c8", size = 1857045, upload-time = "2025-08-06T14:58:51.984Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/a3/1c79a8ef29fe403b83bdfc033db852bc2a888b80c406325e5c6fb37a7f2d/torchaudio-2.8.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:09535a9b727c0793cd07c1ace99f3f353626281bcc3e30c2f2314e3ebc9d3f96", size = 1692755, upload-time = "2025-08-06T14:58:50.868Z" },
+    { url = "https://files.pythonhosted.org/packages/49/df/61941198e9ac6bcebfdd57e1836e4f3c23409308e3d8d7458f0198a6a366/torchaudio-2.8.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:d2a85b124494736241884372fe1c6dd8c15e9bc1931bd325838c5c00238c7378", size = 4013897, upload-time = "2025-08-06T14:59:01.66Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/ab/7175d35a4bbc4a465a9f1388571842f16eb6dec5069d7ea9c8c2d7b5b401/torchaudio-2.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:c1b5139c840367a7855a062a06688a416619f6fd2ca46d9b9299b49a7d133dfd", size = 2500085, upload-time = "2025-08-06T14:58:44.95Z" },
+    { url = "https://files.pythonhosted.org/packages/34/1a/69b9f8349d9d57953d5e7e445075cbf74000173fb5f5d5d9e9d59415fc63/torchaudio-2.8.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:68df9c9068984edff8065c2b6656725e6114fe89281b0cf122c7505305fc98a4", size = 1935600, upload-time = "2025-08-06T14:58:46.051Z" },
+    { url = "https://files.pythonhosted.org/packages/71/76/40fec21b65bccfdc5c8cdb9d511033ab07a7ad4b05f0a5b07f85c68279fc/torchaudio-2.8.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:1951f10ed092f2dda57634f6a3950ef21c9d9352551aa84a9fccd51bbda18095", size = 1704199, upload-time = "2025-08-06T14:58:43.594Z" },
+    { url = "https://files.pythonhosted.org/packages/8e/53/95c3363413c2f2009f805144160b093a385f641224465fbcd717449c71fb/torchaudio-2.8.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:4f7d97494698d98854129349b12061e8c3398d33bd84c929fa9aed5fd1389f73", size = 4020596, upload-time = "2025-08-06T14:59:03.031Z" },
+    { url = "https://files.pythonhosted.org/packages/52/27/7fc2d7435af044ffbe0b9b8e98d99eac096d43f128a5cde23c04825d5dcf/torchaudio-2.8.0-cp313-cp313t-win_amd64.whl", hash = "sha256:d4a715d09ac28c920d031ee1e60ecbc91e8a5079ad8c61c0277e658436c821a6", size = 2549553, upload-time = "2025-08-06T14:59:00.019Z" },
 ]
+
+[[package]]
+name = "torchcodec"
+version = "0.6.0"
+source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/dd/b9/66dd7c4e16e8e6dcc52b4702ba7bbace589972b3597627d39d9dc3aa5fdd/torchaudio-2.7.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:65b4fc9b7f28367f918b02ae4db4290457bc4fdd160f22b7d684e93ab8dcb956", size = 1846733, upload-time = "2025-04-23T14:47:01.068Z" },
-    { url = "https://files.pythonhosted.org/packages/47/48/850edf788c674494a7e148eee6f5563cae34c9a3e3e0962dcfce66c1dae7/torchaudio-2.7.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:33004ed47f18f00044c97ee8cd9e3f5e1c2e26ef23d4f72b5f1ae33e6182587b", size = 1686687, upload-time = "2025-04-23T14:47:02.136Z" },
-    { url = "https://files.pythonhosted.org/packages/78/98/ec8c7aba67b44cdc59717d4b43d02023ded5da180d33c6469d20bf5bfa3c/torchaudio-2.7.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:a6f03494075bcdd62e7fade7baf50a0ef107aa809d02b5e1786391adced451a3", size = 3454437, upload-time = "2025-04-23T14:46:57.557Z" },
-    { url = "https://files.pythonhosted.org/packages/5e/23/b73163ac06e5a724375df61a5b6c853861a825fe98e64388f277514153dd/torchaudio-2.7.0-cp312-cp312-win_amd64.whl", hash = "sha256:275931c8a38ff84b5692df990506b41f18d0a0706574d96bc8456ad9e5fa85c8", size = 2493451, upload-time = "2025-04-23T14:46:46.456Z" },
-    { url = "https://files.pythonhosted.org/packages/c1/a5/bc4bb6b254d3d77e9fa4d219f29d3bff8db92acc9004c27e875f32d4724a/torchaudio-2.7.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:150fbde41da60296effed772b7a170f563cd44967555abb0603fc573f39ce245", size = 1847033, upload-time = "2025-04-23T14:46:58.774Z" },
-    { url = "https://files.pythonhosted.org/packages/96/af/4c8d4e781ea5924590cccf8595a09081eb07a577c03fbf4bf04a2f5f7134/torchaudio-2.7.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:9d921eeb036512a87efde007977b27bd326320cd7cd5f43195824173fe82e888", size = 1686308, upload-time = "2025-04-23T14:46:56.378Z" },
-    { url = "https://files.pythonhosted.org/packages/12/02/ad1083f6ce534989c704c3efcd615bdd160934229882aa0a3ea95cd24a9a/torchaudio-2.7.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:30675a5f99551e036974a7476729eb5d31f453cf792ae6e0a0d449960f84f464", size = 3455266, upload-time = "2025-04-23T14:46:50.327Z" },
-    { url = "https://files.pythonhosted.org/packages/88/49/923ebb2603156dd5c5ae6d845bf51a078e05f27432cd26f13ecdcc8713cd/torchaudio-2.7.0-cp313-cp313-win_amd64.whl", hash = "sha256:ce8cfc07a4e59c835404583e7d3e171208b332b61bb92643f8723f6f192da8bf", size = 2493639, upload-time = "2025-04-23T14:46:40.909Z" },
-    { url = "https://files.pythonhosted.org/packages/bf/85/dd4cd1202483e85c208e1ca3d31cc42c2972f1d955d11b742fa098a38a1b/torchaudio-2.7.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:9e08138cac75cde2064c8b5bbd12f27bdeb3d36f4b8c2285fc9c42eaa97c0676", size = 1929989, upload-time = "2025-04-23T14:46:54.144Z" },
-    { url = "https://files.pythonhosted.org/packages/ef/3a/8a1045f2b00c6300827c1e6a3e661e9d219b5406ef103dc2824604548b8c/torchaudio-2.7.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:1d928aeff495a0807b4da3b0dd46e15eae8070da5e7ed6d35c1dcfd9fdfe2b74", size = 1700439, upload-time = "2025-04-23T14:46:55.249Z" },
-    { url = "https://files.pythonhosted.org/packages/72/53/21d589a5a41702b5d37bae224286986cb707500d5ecdbfdcfdbac9381a08/torchaudio-2.7.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:ee4add33f24e9cb959bd9de89f36de5ebf844eda040d1d0b38f08617d67dedc3", size = 3466356, upload-time = "2025-04-23T14:46:49.131Z" },
-    { url = "https://files.pythonhosted.org/packages/00/0b/5ef81aaacce5e9c316659ddc61a2b1e4f984a504d4a06fe61bab04cc75f1/torchaudio-2.7.0-cp313-cp313t-win_amd64.whl", hash = "sha256:725dbbcc9e744ca62de8856262c6f472ca26b1cd5db062b062a2d6b66a336cc0", size = 2544970, upload-time = "2025-04-23T14:46:44.837Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/b3/11326a0e7a3c803a95975cfce4ac88fa4ea1a0d432bb876081046c5a5554/torchcodec-0.6.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fba260145a239b5afe13336e3a5bc1b089c9c31a073e9a7c2026d4cbd853fdd9", size = 3482584, upload-time = "2025-08-07T08:51:32.535Z" },
+    { url = "https://files.pythonhosted.org/packages/a7/d1/3f90561df013f6a015ef19de22726b64073fee405f53d3c4b8255ab05a67/torchcodec-0.6.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:fdef91a17fb1f1a159ce23710324a9a4e6d6a885275de73700f94a9ad562c6b2", size = 1370954, upload-time = "2025-08-07T08:51:15.021Z" },
+    { url = "https://files.pythonhosted.org/packages/87/d0/0b5dd42652e4527d578e1d6239dbb907bf83e502115e517b83a55d8b7f8b/torchcodec-0.6.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:de20cab5df7fa7cdd74ec1dc0d508324685573f86de6789f0ebb860b7ea20b33", size = 3446017, upload-time = "2025-08-07T08:51:34.484Z" },
+    { url = "https://files.pythonhosted.org/packages/97/62/a938334e39101d4304619b90847d8aef7d1c607c6bcf33638f72931ae990/torchcodec-0.6.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:46dab701a2d809e975a8b07d7ee47ed34f1d903511e374c74cfc1de6a5ab0e3f", size = 1374794, upload-time = "2025-08-07T08:51:17.355Z" },
 ]
 
 [[package]]
@@ -4950,7 +6408,8 @@ version = "0.11.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "requests" },
-    { name = "torch" },
+    { name = "torch", version = "2.8.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
+    { name = "torch", version = "2.8.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin'" },
     { name = "urllib3" },
 ]
 wheels = [
@@ -4958,76 +6417,84 @@ wheels = [
 ]
 
 [[package]]
-name = "torchmetrics"
-version = "1.7.3"
+name = "torchprofile"
+version = "0.0.4"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "lightning-utilities" },
-    { name = "numpy" },
-    { name = "packaging" },
-    { name = "torch" },
+    { name = "numpy", marker = "sys_platform != 'darwin'" },
+    { name = "torch", version = "2.8.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin'" },
+    { name = "torchvision", version = "0.23.0", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" },
+    { name = "torchvision", version = "0.23.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/48/22/8b16c4ec34d93ee15024924cbbe84fbd235bb3e1df2cc8f48c865c1528e7/torchmetrics-1.7.3.tar.gz", hash = "sha256:08450a19cdb67ba1608aac0b213e5dc73033e11b60ad4719696ebcede591621e", size = 566545, upload-time = "2025-06-13T15:39:37.498Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/6f/36/574c0c46e818533b78b3c09505211162918188325ab4165ef11a3f295755/torchprofile-0.0.4.tar.gz", hash = "sha256:96b6da17d752a06b02977e078aea95614893b31d4117dd5dcd081f30ce65611b", size = 4557, upload-time = "2021-06-22T04:58:03.592Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/6f/f2/bed7da46003c26ed44fc7fa3ecc98a84216f0d4758e5e6a3693754d490d9/torchmetrics-1.7.3-py3-none-any.whl", hash = "sha256:7b6fd43e92f0a1071c8bcb029637f252b0630699140a93ed8817ce7afe9db34e", size = 962639, upload-time = "2025-06-13T15:39:35.69Z" },
+    { url = "https://files.pythonhosted.org/packages/62/15/71ad4ed163b03cba1315f1d96e0bc8e39d5a97f92974ffa610a729b273ab/torchprofile-0.0.4-py3-none-any.whl", hash = "sha256:7151fe88dc770f0eeec241244a4c7feaec2c5e8c7852386bc2d6a8d7dde7384d", size = 7694, upload-time = "2021-06-22T04:58:02.485Z" },
 ]
 
 [[package]]
-name = "torchprofile"
-version = "0.0.4"
-source = { registry = "https://pypi.org/simple" }
+name = "torchvision"
+version = "0.23.0"
+source = { registry = "https://download.pytorch.org/whl/cu129" }
+resolution-markers = [
+    "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
+]
 dependencies = [
-    { name = "numpy", marker = "sys_platform != 'darwin'" },
-    { name = "torch", marker = "sys_platform != 'darwin'" },
-    { name = "torchvision", marker = "sys_platform != 'darwin'" },
+    { name = "numpy", marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" },
+    { name = "pillow", marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" },
+    { name = "torch", version = "2.8.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/6f/36/574c0c46e818533b78b3c09505211162918188325ab4165ef11a3f295755/torchprofile-0.0.4.tar.gz", hash = "sha256:96b6da17d752a06b02977e078aea95614893b31d4117dd5dcd081f30ce65611b", size = 4557, upload-time = "2021-06-22T04:58:03.592Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/62/15/71ad4ed163b03cba1315f1d96e0bc8e39d5a97f92974ffa610a729b273ab/torchprofile-0.0.4-py3-none-any.whl", hash = "sha256:7151fe88dc770f0eeec241244a4c7feaec2c5e8c7852386bc2d6a8d7dde7384d", size = 7694, upload-time = "2021-06-22T04:58:02.485Z" },
+    { url = "https://download.pytorch.org/whl/cu129/torchvision-0.23.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:630f602db2c594c9cbc89b964d5fb4873adf4193805df65339b24cd3f4cf57f7" },
+    { url = "https://download.pytorch.org/whl/cu129/torchvision-0.23.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:20f7e25a24f91d93d09398b80929dec805c4ee2f5527fad8eecd6e43dc5fd5d0" },
+    { url = "https://download.pytorch.org/whl/cu129/torchvision-0.23.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:cb70cc000e6a398270044c3406a89ee8ab6157a4e81b5d40c5904e1d0e22e2f8" },
 ]
 
 [[package]]
 name = "torchvision"
-version = "0.22.0"
+version = "0.23.0"
 source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.13' and sys_platform == 'darwin'",
+    "python_full_version < '3.13' and sys_platform == 'darwin'",
+]
 dependencies = [
-    { name = "numpy" },
-    { name = "pillow" },
-    { name = "torch" },
+    { name = "numpy", marker = "sys_platform == 'darwin'" },
+    { name = "pillow", marker = "sys_platform == 'darwin'" },
+    { name = "torch", version = "2.8.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/cb/ea/887d1d61cf4431a46280972de665f350af1898ce5006cd046326e5d0a2f2/torchvision-0.22.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:31c3165418fe21c3d81fe3459e51077c2f948801b8933ed18169f54652796a0f", size = 1947826, upload-time = "2025-04-23T14:41:59.188Z" },
-    { url = "https://files.pythonhosted.org/packages/72/ef/21f8b6122e13ae045b8e49658029c695fd774cd21083b3fa5c3f9c5d3e35/torchvision-0.22.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:8f116bc82e0c076e70ba7776e611ed392b9666aa443662e687808b08993d26af", size = 2514571, upload-time = "2025-04-23T14:41:53.458Z" },
-    { url = "https://files.pythonhosted.org/packages/7c/48/5f7617f6c60d135f86277c53f9d5682dfa4e66f4697f505f1530e8b69fb1/torchvision-0.22.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:ce4dc334ebd508de2c534817c9388e928bc2500cf981906ae8d6e2ca3bf4727a", size = 7446522, upload-time = "2025-04-23T14:41:34.9Z" },
-    { url = "https://files.pythonhosted.org/packages/99/94/a015e93955f5d3a68689cc7c385a3cfcd2d62b84655d18b61f32fb04eb67/torchvision-0.22.0-cp312-cp312-win_amd64.whl", hash = "sha256:24b8c9255c209ca419cc7174906da2791c8b557b75c23496663ec7d73b55bebf", size = 1716664, upload-time = "2025-04-23T14:41:58.019Z" },
-    { url = "https://files.pythonhosted.org/packages/e1/2a/9b34685599dcb341d12fc2730055155623db7a619d2415a8d31f17050952/torchvision-0.22.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ece17995857dd328485c9c027c0b20ffc52db232e30c84ff6c95ab77201112c5", size = 1947823, upload-time = "2025-04-23T14:41:39.956Z" },
-    { url = "https://files.pythonhosted.org/packages/77/77/88f64879483d66daf84f1d1c4d5c31ebb08e640411139042a258d5f7dbfe/torchvision-0.22.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:471c6dd75bb984c6ebe4f60322894a290bf3d4b195e769d80754f3689cd7f238", size = 2471592, upload-time = "2025-04-23T14:41:54.991Z" },
-    { url = "https://files.pythonhosted.org/packages/f7/82/2f813eaae7c1fae1f9d9e7829578f5a91f39ef48d6c1c588a8900533dd3d/torchvision-0.22.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:2b839ac0610a38f56bef115ee5b9eaca5f9c2da3c3569a68cc62dbcc179c157f", size = 7446333, upload-time = "2025-04-23T14:41:36.603Z" },
-    { url = "https://files.pythonhosted.org/packages/58/19/ca7a4f8907a56351dfe6ae0a708f4e6b3569b5c61d282e3e7f61cf42a4ce/torchvision-0.22.0-cp313-cp313-win_amd64.whl", hash = "sha256:4ada1c08b2f761443cd65b7c7b4aec9e2fc28f75b0d4e1b1ebc9d3953ebccc4d", size = 1716693, upload-time = "2025-04-23T14:41:41.031Z" },
-    { url = "https://files.pythonhosted.org/packages/6f/a7/f43e9c8d13118b4ffbaebea664c9338ab20fa115a908125afd2238ff16e7/torchvision-0.22.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:cdc96daa4658b47ce9384154c86ed1e70cba9d972a19f5de6e33f8f94a626790", size = 2137621, upload-time = "2025-04-23T14:41:51.427Z" },
-    { url = "https://files.pythonhosted.org/packages/6a/9a/2b59f5758ba7e3f23bc84e16947493bbce97392ec6d18efba7bdf0a3b10e/torchvision-0.22.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:753d3c84eeadd5979a33b3b73a25ecd0aa4af44d6b45ed2c70d44f5e0ac68312", size = 2476555, upload-time = "2025-04-23T14:41:38.357Z" },
-    { url = "https://files.pythonhosted.org/packages/7d/40/a7bc2ab9b1e56d10a7fd9ae83191bb425fa308caa23d148f1c568006e02c/torchvision-0.22.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:b30e3ed29e4a61f7499bca50f57d8ebd23dfc52b14608efa17a534a55ee59a03", size = 7617924, upload-time = "2025-04-23T14:41:42.709Z" },
-    { url = "https://files.pythonhosted.org/packages/c1/7b/30d423bdb2546250d719d7821aaf9058cc093d165565b245b159c788a9dd/torchvision-0.22.0-cp313-cp313t-win_amd64.whl", hash = "sha256:e5d680162694fac4c8a374954e261ddfb4eb0ce103287b0f693e4e9c579ef957", size = 1638621, upload-time = "2025-04-23T14:41:46.06Z" },
+    { url = "https://files.pythonhosted.org/packages/df/1d/0ea0b34bde92a86d42620f29baa6dcbb5c2fc85990316df5cb8f7abb8ea2/torchvision-0.23.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e0e2c04a91403e8dd3af9756c6a024a1d9c0ed9c0d592a8314ded8f4fe30d440", size = 1856885, upload-time = "2025-08-06T14:58:06.503Z" },
+    { url = "https://files.pythonhosted.org/packages/91/37/45a5b9407a7900f71d61b2b2f62db4b7c632debca397f205fdcacb502780/torchvision-0.23.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1c37e325e09a184b730c3ef51424f383ec5745378dc0eca244520aca29722600", size = 1856886, upload-time = "2025-08-06T14:58:05.491Z" },
+    { url = "https://files.pythonhosted.org/packages/05/35/72f91ad9ac7c19a849dedf083d347dc1123f0adeb401f53974f84f1d04c8/torchvision-0.23.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:2df618e1143805a7673aaf82cb5720dd9112d4e771983156aaf2ffff692eebf9", size = 2047192, upload-time = "2025-08-06T14:58:11.813Z" },
 ]
 
 [[package]]
-name = "torchx"
-version = "0.7.0"
-source = { registry = "https://pypi.org/simple" }
+name = "torchvision"
+version = "0.23.0+cu129"
+source = { registry = "https://download.pytorch.org/whl/cu129" }
+resolution-markers = [
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'",
+    "python_full_version >= '3.13' and sys_platform == 'win32'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version < '3.13' and sys_platform == 'win32'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
+]
 dependencies = [
-    { name = "docker" },
-    { name = "docstring-parser" },
-    { name = "filelock" },
-    { name = "fsspec" },
-    { name = "importlib-metadata" },
-    { name = "pyre-extensions" },
-    { name = "pyyaml" },
-    { name = "tabulate" },
-    { name = "urllib3" },
+    { name = "numpy", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+    { name = "pillow", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+    { name = "torch", version = "2.8.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/ed/a9/e07e64222e59f44eb18b333bc8db942a295c0d9c16f04c8703fe106a8fd5/torchx-0.7.0.tar.gz", hash = "sha256:933b800849c69ddff9feda931f0ae2c4083638eafc73abfdf361158667c68ad6", size = 196142, upload-time = "2024-07-16T22:06:57.222Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/a0/8d/282b884cef8f26b240e1ac8e60513a25a68e3463dc1c68771191a2c8a209/torchx-0.7.0-py3-none-any.whl", hash = "sha256:815c2628c30de1f5938c14c4427cfe280fdf93473b171be906ab7c1ea5971824", size = 256100, upload-time = "2024-07-16T21:16:01.849Z" },
+    { url = "https://download.pytorch.org/whl/cu129/torchvision-0.23.0%2Bcu129-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:6226be1b8399ef655a11965ea4975250f7823fc9b200b35deb9eeac350c667a9" },
+    { url = "https://download.pytorch.org/whl/cu129/torchvision-0.23.0%2Bcu129-cp312-cp312-win_amd64.whl", hash = "sha256:57cf57ada9a5407755e170a4ab3842337b83862c93f9483decaf0b6b4d69fa09" },
+    { url = "https://download.pytorch.org/whl/cu129/torchvision-0.23.0%2Bcu129-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:04316e24ddd1cee3b301208811a9d7c4cfca5f566ea367f33bda059d8f0e012e" },
+    { url = "https://download.pytorch.org/whl/cu129/torchvision-0.23.0%2Bcu129-cp313-cp313-win_amd64.whl", hash = "sha256:a486a0cee466807a17749d0b916d52088343453dc911baa20f0f459b2fa43c9a" },
+    { url = "https://download.pytorch.org/whl/cu129/torchvision-0.23.0%2Bcu129-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:c718f6d2c0e61feed39763925eea3e1f42979f6b21e61276f487409168d9e352" },
+    { url = "https://download.pytorch.org/whl/cu129/torchvision-0.23.0%2Bcu129-cp313-cp313t-win_amd64.whl", hash = "sha256:8218c1f614972abb4710afde96d0f70b174b235f390e165e6fd4cdd5cee6d93d" },
 ]
 
 [[package]]
@@ -5044,13 +6511,13 @@ wheels = [
 
 [[package]]
 name = "transformer-engine"
-version = "2.3.0"
+version = "2.8.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "transformer-engine-cu12" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/5a/5a/60b25ecc3396fc73b07bf9eb2fbf6a3e186efe1e3e86070017ba9d34a4a0/transformer_engine-2.3.0-py3-none-any.whl", hash = "sha256:2a02c2ac29b9781d09975d4cc373df66ed5f0326ff079a2908c3c26d35ac9ee9", size = 486192, upload-time = "2025-05-19T22:08:25.342Z" },
+    { url = "https://files.pythonhosted.org/packages/72/be/a7cf5f28b7abbe966956217b18208fc34cc9bfaf62fa9472c0603db74899/transformer_engine-2.8.0-py3-none-any.whl", hash = "sha256:795b056d31b0f67f5d7432725177782dd5084090c1e3c52532577070947fe9a7", size = 638319, upload-time = "2025-10-07T04:55:34.115Z" },
 ]
 
 [package.optional-dependencies]
@@ -5060,7 +6527,7 @@ pytorch = [
 
 [[package]]
 name = "transformer-engine-cu12"
-version = "2.3.0"
+version = "2.8.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "importlib-metadata" },
@@ -5068,22 +6535,26 @@ dependencies = [
     { name = "pydantic" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/72/6a/b94e7bb1c61db83beb89f5500cffb6205ea3c27a343bd29b35e3e5e55bbf/transformer_engine_cu12-2.3.0-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:3b3632ef9e4ea94ba2a373e45aa98c2ff4aaa929e5418610d1397c59d0fbc3b8", size = 266059296, upload-time = "2025-05-20T00:53:27.635Z" },
-    { url = "https://files.pythonhosted.org/packages/95/30/b4c0741fbe0402aa60f24db81ee50ee213cf068f1b79e2f447543e95f194/transformer_engine_cu12-2.3.0-py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:f4b48957de2318b7009c54bb9442b717a383f6b254a9fc26fac0c67d2d658d52", size = 266398998, upload-time = "2025-05-20T00:54:24.308Z" },
+    { url = "https://files.pythonhosted.org/packages/53/db/cde3e772cf5cd7e941b64d37e4a61e2762f36ecc2e6508525af536076f8d/transformer_engine_cu12-2.8.0-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:3a92c781fc3c1a3a6a0009871a36903fa364b2d51ce06b06641d29aeefd59310", size = 480373707, upload-time = "2025-10-07T05:03:05.392Z" },
+    { url = "https://files.pythonhosted.org/packages/b9/14/67860f2f1f9d0eca4a8e5e0cef5a0de5c4fc26340625051f032d16913d8c/transformer_engine_cu12-2.8.0-py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:8ee5f9df586491a35fd1a01cb95b6970a9f01a5e8f935ecdacd56173c44d6a67", size = 480875025, upload-time = "2025-10-07T04:54:43.762Z" },
 ]
 
 [[package]]
 name = "transformer-engine-torch"
-version = "2.3.0"
+version = "2.8.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "torch" },
+    { name = "einops" },
+    { name = "onnx" },
+    { name = "onnxscript" },
+    { name = "torch", version = "2.8.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
+    { name = "torch", version = "2.8.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/1b/8f/d8b39a34cebb91528f118591e622d9ba1d9e52e4ea40cca1252ec335d07c/transformer_engine_torch-2.3.0.tar.gz", hash = "sha256:2b0adb75d7a5bf590cc5452276db6badb0963de99e6c50c7fde6a3a8a7c504e9", size = 165832, upload-time = "2025-05-15T23:10:38.909Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/38/63/1e3953244ed4f318f87889309a56cdd664759f007967eb850ee415a5584d/transformer_engine_torch-2.8.0.tar.gz", hash = "sha256:ce09f1bd9b8e532a5c347b9e9b3a3a771722095daddca673ae82ccce8e68d759", size = 209805, upload-time = "2025-10-07T04:54:11.134Z" }
 
 [[package]]
 name = "transformers"
-version = "4.52.4"
+version = "4.55.4"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "filelock" },
@@ -5097,39 +6568,61 @@ dependencies = [
     { name = "tokenizers" },
     { name = "tqdm" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/da/a9/275037087f9d846580b02f2d7cae0e0a6955d46f84583d0151d6227bd416/transformers-4.52.4.tar.gz", hash = "sha256:aff3764441c1adc192a08dba49740d3cbbcb72d850586075aed6bd89b98203e6", size = 8945376, upload-time = "2025-05-30T09:17:17.947Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/2b/43/3cb831d5f28cc723516e5bb43a8c6042aca3038bb36b6bd6016b40dfd1e8/transformers-4.55.4.tar.gz", hash = "sha256:574a30559bc273c7a4585599ff28ab6b676e96dc56ffd2025ecfce2fd0ab915d", size = 9573015, upload-time = "2025-08-22T15:18:43.192Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/96/f2/25b27b396af03d5b64e61976b14f7209e2939e9e806c10749b6d277c273e/transformers-4.52.4-py3-none-any.whl", hash = "sha256:203f5c19416d5877e36e88633943761719538a25d9775977a24fe77a1e5adfc7", size = 10460375, upload-time = "2025-05-30T09:17:14.477Z" },
+    { url = "https://files.pythonhosted.org/packages/fa/0a/8791a6ee0529c45f669566969e99b75e2ab20eb0bfee8794ce295c18bdad/transformers-4.55.4-py3-none-any.whl", hash = "sha256:df28f3849665faba4af5106f0db4510323277c4bb595055340544f7e59d06458", size = 11269659, upload-time = "2025-08-22T15:18:40.025Z" },
 ]
 
 [[package]]
 name = "triton"
-version = "3.3.0"
-source = { registry = "https://download.pytorch.org/whl/cu128" }
+version = "3.4.0"
+source = { registry = "https://download.pytorch.org/whl/cu129" }
+resolution-markers = [
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'",
+    "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
+]
 dependencies = [
-    { name = "setuptools" },
+    { name = "setuptools", marker = "sys_platform == 'linux'" },
 ]
 wheels = [
-    { url = "https://download.pytorch.org/whl/triton-3.3.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl" },
-    { url = "https://download.pytorch.org/whl/triton-3.3.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl" },
-    { url = "https://download.pytorch.org/whl/triton-3.3.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl" },
-    { url = "https://download.pytorch.org/whl/triton-3.3.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl" },
-    { url = "https://download.pytorch.org/whl/triton-3.3.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl" },
-    { url = "https://download.pytorch.org/whl/triton-3.3.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl" },
+    { url = "https://download.pytorch.org/whl/triton-3.4.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl" },
+    { url = "https://download.pytorch.org/whl/triton-3.4.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl" },
+    { url = "https://download.pytorch.org/whl/triton-3.4.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl" },
+    { url = "https://download.pytorch.org/whl/triton-3.4.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl" },
+    { url = "https://download.pytorch.org/whl/triton-3.4.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl" },
+    { url = "https://download.pytorch.org/whl/triton-3.4.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl" },
+]
+
+[[package]]
+name = "triton"
+version = "3.4.0"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.13' and sys_platform == 'win32'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version < '3.13' and sys_platform == 'win32'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
+]
+dependencies = [
+    { name = "setuptools", marker = "sys_platform != 'darwin' and sys_platform != 'linux'" },
 ]
 
 [[package]]
 name = "trove-classifiers"
-version = "2025.5.9.12"
+version = "2025.8.6.13"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/38/04/1cd43f72c241fedcf0d9a18d0783953ee301eac9e5d9db1df0f0f089d9af/trove_classifiers-2025.5.9.12.tar.gz", hash = "sha256:7ca7c8a7a76e2cd314468c677c69d12cc2357711fcab4a60f87994c1589e5cb5", size = 16940, upload-time = "2025-05-09T12:04:48.829Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/c3/21/707af14daa638b0df15b5d5700349e0abdd3e5140069f9ab6e0ccb922806/trove_classifiers-2025.8.6.13.tar.gz", hash = "sha256:5a0abad839d2ed810f213ab133d555d267124ddea29f1d8a50d6eca12a50ae6e", size = 16932, upload-time = "2025-08-06T13:26:26.479Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/92/ef/c6deb083748be3bcad6f471b6ae983950c161890bf5ae1b2af80cc56c530/trove_classifiers-2025.5.9.12-py3-none-any.whl", hash = "sha256:e381c05537adac78881c8fa345fd0e9970159f4e4a04fcc42cfd3129cca640ce", size = 14119, upload-time = "2025-05-09T12:04:46.38Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/44/323a87d78f04d5329092aada803af3612dd004a64b69ba8b13046601a8c9/trove_classifiers-2025.8.6.13-py3-none-any.whl", hash = "sha256:c4e7fc83012770d80b3ae95816111c32b085716374dccee0d3fbf5c235495f9f", size = 14121, upload-time = "2025-08-06T13:26:25.063Z" },
 ]
 
 [[package]]
 name = "typer"
-version = "0.16.0"
+version = "0.16.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "click" },
@@ -5137,18 +6630,18 @@ dependencies = [
     { name = "shellingham" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/c5/8c/7d682431efca5fd290017663ea4588bf6f2c6aad085c7f108c5dbc316e70/typer-0.16.0.tar.gz", hash = "sha256:af377ffaee1dbe37ae9440cb4e8f11686ea5ce4e9bae01b84ae7c63b87f1dd3b", size = 102625, upload-time = "2025-05-26T14:30:31.824Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/43/78/d90f616bf5f88f8710ad067c1f8705bf7618059836ca084e5bb2a0855d75/typer-0.16.1.tar.gz", hash = "sha256:d358c65a464a7a90f338e3bb7ff0c74ac081449e53884b12ba658cbd72990614", size = 102836, upload-time = "2025-08-18T19:18:22.898Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/76/42/3efaf858001d2c2913de7f354563e3a3a2f0decae3efe98427125a8f441e/typer-0.16.0-py3-none-any.whl", hash = "sha256:1f79bed11d4d02d4310e3c1b7ba594183bcedb0ac73b27a9e5f28f6fb5b98855", size = 46317, upload-time = "2025-05-26T14:30:30.523Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/76/06dbe78f39b2203d2a47d5facc5df5102d0561e2807396471b5f7c5a30a1/typer-0.16.1-py3-none-any.whl", hash = "sha256:90ee01cb02d9b8395ae21ee3368421faf21fa138cb2a541ed369c08cec5237c9", size = 46397, upload-time = "2025-08-18T19:18:21.663Z" },
 ]
 
 [[package]]
 name = "types-pyyaml"
-version = "6.0.12.20250516"
+version = "6.0.12.20250809"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/4e/22/59e2aeb48ceeee1f7cd4537db9568df80d62bdb44a7f9e743502ea8aab9c/types_pyyaml-6.0.12.20250516.tar.gz", hash = "sha256:9f21a70216fc0fa1b216a8176db5f9e0af6eb35d2f2932acb87689d03a5bf6ba", size = 17378, upload-time = "2025-05-16T03:08:04.897Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/36/21/52ffdbddea3c826bc2758d811ccd7f766912de009c5cf096bd5ebba44680/types_pyyaml-6.0.12.20250809.tar.gz", hash = "sha256:af4a1aca028f18e75297da2ee0da465f799627370d74073e96fee876524f61b5", size = 17385, upload-time = "2025-08-09T03:14:34.867Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/99/5f/e0af6f7f6a260d9af67e1db4f54d732abad514252a7a378a6c4d17dd1036/types_pyyaml-6.0.12.20250516-py3-none-any.whl", hash = "sha256:8478208feaeb53a34cb5d970c56a7cd76b72659442e733e268a94dc72b2d0530", size = 20312, upload-time = "2025-05-16T03:08:04.019Z" },
+    { url = "https://files.pythonhosted.org/packages/35/3e/0346d09d6e338401ebf406f12eaf9d0b54b315b86f1ec29e34f1a0aedae9/types_pyyaml-6.0.12.20250809-py3-none-any.whl", hash = "sha256:032b6003b798e7de1a1ddfeefee32fac6486bdfe4845e0ae0e7fb3ee4512b52f", size = 20277, upload-time = "2025-08-09T03:14:34.055Z" },
 ]
 
 [[package]]
@@ -5174,24 +6667,11 @@ wheels = [
 
 [[package]]
 name = "typing-extensions"
-version = "4.14.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/d1/bc/51647cd02527e87d05cb083ccc402f93e441606ff1f01739a62c8ad09ba5/typing_extensions-4.14.0.tar.gz", hash = "sha256:8676b788e32f02ab42d9e7c61324048ae4c6d844a399eebace3d4979d75ceef4", size = 107423, upload-time = "2025-06-02T14:52:11.399Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/69/e0/552843e0d356fbb5256d21449fa957fa4eff3bbc135a74a691ee70c7c5da/typing_extensions-4.14.0-py3-none-any.whl", hash = "sha256:a1514509136dd0b477638fc68d6a91497af5076466ad0fa6c338e44e359944af", size = 43839, upload-time = "2025-06-02T14:52:10.026Z" },
-]
-
-[[package]]
-name = "typing-inspect"
-version = "0.9.0"
+version = "4.14.1"
 source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "mypy-extensions" },
-    { name = "typing-extensions" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/dc/74/1789779d91f1961fa9438e9a8710cdae6bd138c80d7303996933d117264a/typing_inspect-0.9.0.tar.gz", hash = "sha256:b23fc42ff6f6ef6954e4852c1fb512cdd18dbea03134f91f856a95ccc9461f78", size = 13825, upload-time = "2023-05-24T20:25:47.612Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/98/5a/da40306b885cc8c09109dc2e1abd358d5684b1425678151cdaed4731c822/typing_extensions-4.14.1.tar.gz", hash = "sha256:38b39f4aeeab64884ce9f74c94263ef78f3c22467c8724005483154c26648d36", size = 107673, upload-time = "2025-07-04T13:28:34.16Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/65/f3/107a22063bf27bdccf2024833d3445f4eea42b2e598abfbd46f6a63b6cb0/typing_inspect-0.9.0-py3-none-any.whl", hash = "sha256:9ee6fc59062311ef8547596ab6b955e1b8aa46242d854bfc78f4f6b0eff35f9f", size = 8827, upload-time = "2023-05-24T20:25:45.287Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/00/d631e67a838026495268c2f6884f3711a15a9a2a96cd244fdaea53b823fb/typing_extensions-4.14.1-py3-none-any.whl", hash = "sha256:d1e1e3b58374dc93031d6eda2420a48ea44a36c2b4766a4fdeb3710755731d76", size = 43906, upload-time = "2025-07-04T13:28:32.743Z" },
 ]
 
 [[package]]
@@ -5226,15 +6706,15 @@ wheels = [
 
 [[package]]
 name = "uvicorn"
-version = "0.34.3"
+version = "0.35.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "click" },
     { name = "h11" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/de/ad/713be230bcda622eaa35c28f0d328c3675c371238470abdea52417f17a8e/uvicorn-0.34.3.tar.gz", hash = "sha256:35919a9a979d7a59334b6b10e05d77c1d0d574c50e0fc98b8b1a0f165708b55a", size = 76631, upload-time = "2025-06-01T07:48:17.531Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/5e/42/e0e305207bb88c6b8d3061399c6a961ffe5fbb7e2aa63c9234df7259e9cd/uvicorn-0.35.0.tar.gz", hash = "sha256:bc662f087f7cf2ce11a1d7fd70b90c9f98ef2e2831556dd078d131b96cc94a01", size = 78473, upload-time = "2025-06-28T16:15:46.058Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/6d/0d/8adfeaa62945f90d19ddc461c55f4a50c258af7662d34b6a3d5d1f8646f6/uvicorn-0.34.3-py3-none-any.whl", hash = "sha256:16246631db62bdfbf069b0645177d6e8a77ba950cfedbfd093acef9444e4d885", size = 62431, upload-time = "2025-06-01T07:48:15.664Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/e2/dc81b1bd1dcfe91735810265e9d26bc8ec5da45b4c0f6237e286819194c3/uvicorn-0.35.0-py3-none-any.whl", hash = "sha256:197535216b25ff9b785e29a0b79199f55222193d47f820816e7da751e9bc8d4a", size = 66406, upload-time = "2025-06-28T16:15:44.816Z" },
 ]
 
 [package.optional-dependencies]
@@ -5270,49 +6750,47 @@ wheels = [
 
 [[package]]
 name = "virtualenv"
-version = "20.31.2"
+version = "20.34.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "distlib" },
     { name = "filelock" },
     { name = "platformdirs" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/56/2c/444f465fb2c65f40c3a104fd0c495184c4f2336d65baf398e3c75d72ea94/virtualenv-20.31.2.tar.gz", hash = "sha256:e10c0a9d02835e592521be48b332b6caee6887f332c111aa79a09b9e79efc2af", size = 6076316, upload-time = "2025-05-08T17:58:23.811Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/1c/14/37fcdba2808a6c615681cd216fecae00413c9dab44fb2e57805ecf3eaee3/virtualenv-20.34.0.tar.gz", hash = "sha256:44815b2c9dee7ed86e387b842a84f20b93f7f417f95886ca1996a72a4138eb1a", size = 6003808, upload-time = "2025-08-13T14:24:07.464Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/f3/40/b1c265d4b2b62b58576588510fc4d1fe60a86319c8de99fd8e9fec617d2c/virtualenv-20.31.2-py3-none-any.whl", hash = "sha256:36efd0d9650ee985f0cad72065001e66d49a6f24eb44d98980f630686243cf11", size = 6057982, upload-time = "2025-05-08T17:58:21.15Z" },
+    { url = "https://files.pythonhosted.org/packages/76/06/04c8e804f813cf972e3262f3f8584c232de64f0cde9f703b46cf53a45090/virtualenv-20.34.0-py3-none-any.whl", hash = "sha256:341f5afa7eee943e4984a9207c025feedd768baff6753cd660c857ceb3e36026", size = 5983279, upload-time = "2025-08-13T14:24:05.111Z" },
 ]
 
 [[package]]
 name = "vllm"
-version = "0.9.0"
+version = "0.11.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "aiohttp" },
     { name = "blake3" },
     { name = "cachetools" },
+    { name = "cbor2" },
     { name = "cloudpickle" },
     { name = "compressed-tensors" },
     { name = "depyf" },
+    { name = "diskcache" },
     { name = "einops" },
     { name = "fastapi", extra = ["standard"] },
     { name = "filelock" },
     { name = "gguf" },
-    { name = "huggingface-hub", extra = ["hf-xet"] },
     { name = "lark" },
     { name = "llguidance", marker = "platform_machine == 'aarch64' or platform_machine == 'arm64' or platform_machine == 'x86_64'" },
     { name = "lm-format-enforcer" },
-    { name = "mistral-common", extra = ["opencv"] },
+    { name = "mistral-common", extra = ["audio", "image"] },
     { name = "msgspec" },
     { name = "ninja" },
     { name = "numba" },
     { name = "numpy" },
     { name = "openai" },
+    { name = "openai-harmony" },
     { name = "opencv-python-headless" },
-    { name = "opentelemetry-api" },
-    { name = "opentelemetry-exporter-otlp" },
-    { name = "opentelemetry-sdk" },
-    { name = "opentelemetry-semantic-conventions-ai" },
-    { name = "outlines" },
+    { name = "outlines-core" },
     { name = "partial-json-parser" },
     { name = "pillow" },
     { name = "prometheus-client" },
@@ -5320,6 +6798,7 @@ dependencies = [
     { name = "protobuf" },
     { name = "psutil" },
     { name = "py-cpuinfo" },
+    { name = "pybase64" },
     { name = "pydantic" },
     { name = "python-json-logger" },
     { name = "pyyaml" },
@@ -5329,23 +6808,28 @@ dependencies = [
     { name = "requests" },
     { name = "scipy" },
     { name = "sentencepiece" },
+    { name = "setproctitle" },
     { name = "setuptools" },
     { name = "six" },
     { name = "tiktoken" },
     { name = "tokenizers" },
-    { name = "torch" },
+    { name = "torch", version = "2.8.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
+    { name = "torch", version = "2.8.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin'" },
     { name = "torchaudio" },
-    { name = "torchvision" },
+    { name = "torchvision", version = "0.23.0", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" },
+    { name = "torchvision", version = "0.23.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
+    { name = "torchvision", version = "0.23.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
     { name = "tqdm" },
     { name = "transformers" },
     { name = "typing-extensions" },
     { name = "watchfiles" },
     { name = "xformers", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "xgrammar", marker = "platform_machine == 'aarch64' or platform_machine == 'x86_64'" },
+    { name = "xgrammar", marker = "platform_machine == 'aarch64' or platform_machine == 'arm64' or platform_machine == 'x86_64'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/26/e6/b4703aa8a0ce74984471cbe36b0f8c715e198ff68ca19eb0b22600703488/vllm-0.9.0.tar.gz", hash = "sha256:78e3e71fbbd1adfc3b7095b4ab4b6d341c0cffc9e8b96f0c74db275d60108902", size = 8551061, upload-time = "2025-05-28T01:30:40.781Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/82/5a/36d2351206f4d8d871b10780f874d03957985e08298d430cc837723e07af/vllm-0.11.0.tar.gz", hash = "sha256:f435a64c24e9c4178d657a76f8edd8548ddc444012f7d06a9f79ac3a6392bfae", size = 10822208, upload-time = "2025-10-04T01:39:57.798Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/cd/08/cb88fd52c08df57ccc4f722241150643d521b3174f8d0c3a1ec5549c3927/vllm-0.9.0-cp38-abi3-manylinux1_x86_64.whl", hash = "sha256:a130715cc915377f78e84088fc35c426266e278a0793be0b2ad78deda2e2f55e", size = 377192911, upload-time = "2025-05-28T01:30:28.547Z" },
+    { url = "https://files.pythonhosted.org/packages/47/33/d19e0763c34392ec956534536fa837c060495bfff31ed83452135ea7608d/vllm-0.11.0-cp38-abi3-manylinux1_x86_64.whl", hash = "sha256:3861c75ff2b12e24f6d179ff5c084d791b42ded8675d76c8706697c79f68cd62", size = 438217982, upload-time = "2025-10-04T01:39:32.382Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/bf/973444bb959fc7acbbeb3d226bd4d135dcd49b6af174b29aab1b50e2d710/vllm-0.11.0-cp38-abi3-manylinux2014_aarch64.whl", hash = "sha256:52369c9ee949944354bdc7afc88ded2d1ed02b098bf90db06cf80098a19787b7", size = 401003969, upload-time = "2025-10-04T01:39:50.251Z" },
 ]
 
 [[package]]
@@ -5359,7 +6843,7 @@ wheels = [
 
 [[package]]
 name = "wandb"
-version = "0.20.1"
+version = "0.21.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "click" },
@@ -5367,26 +6851,23 @@ dependencies = [
     { name = "packaging" },
     { name = "platformdirs" },
     { name = "protobuf" },
-    { name = "psutil" },
     { name = "pydantic" },
     { name = "pyyaml" },
     { name = "requests" },
     { name = "sentry-sdk" },
-    { name = "setproctitle" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/62/1f/92be0ca87fb49eb48c16dcf0845a3579a57c4734fec2b95862cf5a0494a0/wandb-0.20.1.tar.gz", hash = "sha256:dbd3fc60dfe7bf83c4de24b206b99b44949fef323f817a783883db72fc5f3bfe", size = 40320062, upload-time = "2025-06-05T00:00:24.483Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/26/69/217598886af89350e36bc05c092a67c9c469cff1fd6446edd4c879027e36/wandb-0.21.1.tar.gz", hash = "sha256:753bbdaa3a7703344056e019425b39c17a3d31d8ca0c4d13c4efc046935b08b9", size = 40131395, upload-time = "2025-08-07T18:52:48.85Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/c9/18/afcc37d0b93dd6f6d0f0c5683b9cfff9416ae1539931f58932a2938c0070/wandb-0.20.1-py3-none-any.whl", hash = "sha256:e6395cabf074247042be1cf0dc6ab0b06aa4c9538c2e1fdc5b507a690ce0cf17", size = 6458872, upload-time = "2025-06-04T23:59:55.441Z" },
-    { url = "https://files.pythonhosted.org/packages/e6/b5/70f9e2a3d1380b729ae5853763d938edc50072df357f79bbd19b9aae8e3f/wandb-0.20.1-py3-none-macosx_10_14_x86_64.whl", hash = "sha256:2475a48c693adf677d40da9e1c8ceeaf86d745ffc3b7e3535731279d02f9e845", size = 22517483, upload-time = "2025-06-04T23:59:58.687Z" },
-    { url = "https://files.pythonhosted.org/packages/cc/7e/4eb9aeb2fd974d410a8f2eb11b0219536503913a050d46a03206151705c8/wandb-0.20.1-py3-none-macosx_11_0_arm64.whl", hash = "sha256:99cce804c31ec1e0d1e691650a7d51773ed7329c41745d56384fa3655a0e9b2c", size = 22034511, upload-time = "2025-06-05T00:00:01.301Z" },
-    { url = "https://files.pythonhosted.org/packages/34/38/1df22c2273e6f7ab0aae4fd032085d6d92ab112f5b261646e7dc5e675cfe/wandb-0.20.1-py3-none-macosx_11_0_x86_64.whl", hash = "sha256:ce3ee412677a1679e04b21e03a91e1e02eb90faf658d682bee86c33cf5f32e09", size = 22720771, upload-time = "2025-06-05T00:00:04.122Z" },
-    { url = "https://files.pythonhosted.org/packages/38/96/78fc7a7ea7158d136c84f481423f8736c9346a2387287ec8a6d92019975c/wandb-0.20.1-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5e58ca32c7147161158f09b0fb5f5896876f8569d0d10ae7b64d0510c868ce33", size = 21537453, upload-time = "2025-06-05T00:00:09.474Z" },
-    { url = "https://files.pythonhosted.org/packages/88/c9/41b8bdb493e5eda32b502bc1cc49d539335a92cacaf0ef304d7dae0240aa/wandb-0.20.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:591506ecbdd396648cc323ba270f3ab4aed3158e1dbfa7636c09f9f7f0253e1c", size = 23161349, upload-time = "2025-06-05T00:00:11.903Z" },
-    { url = "https://files.pythonhosted.org/packages/7d/f2/79e783cc50a47d373dfbda862eb5396de8139167e8c6443a16ef0166106f/wandb-0.20.1-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:382508532db09893f81cc926b1d333caa4c8a7db057878899fadf929bbdb3b56", size = 21550624, upload-time = "2025-06-05T00:00:14.28Z" },
-    { url = "https://files.pythonhosted.org/packages/26/32/23890a726302e7be28bda9fff47ce9b491af64e339aba4d32b3b8d1a7aaf/wandb-0.20.1-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:29ea495e49393db860f17437fe37e48018da90436ce10949b471780f09293bd7", size = 23237996, upload-time = "2025-06-05T00:00:16.647Z" },
-    { url = "https://files.pythonhosted.org/packages/af/94/296e520b086b2a4f10e99bcea3cd5856421b9c004824663501e3789a713b/wandb-0.20.1-py3-none-win32.whl", hash = "sha256:455ee0a652e59ab1e4b546fa1dc833dd3063aa7e64eb8abf95d22f0e9f08c574", size = 22518456, upload-time = "2025-06-05T00:00:19.006Z" },
-    { url = "https://files.pythonhosted.org/packages/52/5f/c44ad7b2a062ca5f4da99ae475cea274c38f6ec37bdaca1b1c653ee87274/wandb-0.20.1-py3-none-win_amd64.whl", hash = "sha256:6d2431652f096b7e394c29a99135a6441c02ed3198b963f0b351a5b5e56aeca0", size = 22518459, upload-time = "2025-06-05T00:00:21.374Z" },
+    { url = "https://files.pythonhosted.org/packages/65/d0/589f970741f3ead9ad28d4cbb668d1e6a39848df767f004ac9c7bed8f4b5/wandb-0.21.1-py3-none-macosx_10_14_x86_64.whl", hash = "sha256:96f9eedeae428de0d88f9751fb81f1b730ae7902f35c2f5a7a904d7733f124f3", size = 21701698, upload-time = "2025-08-07T18:52:22.399Z" },
+    { url = "https://files.pythonhosted.org/packages/41/6c/a6140a0f395a99902aafdfe63088b7aff509e4f14cd7dd084d47eab36f27/wandb-0.21.1-py3-none-macosx_11_0_arm64.whl", hash = "sha256:41a1ec1b98d9d7e1bcafc483bce82e184b6cbae7531328a0fe8dd0f56d96a92e", size = 21221046, upload-time = "2025-08-07T18:52:26.134Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/d8/dacbb30ed35141d48a387d84f2e792d4b61b5bcdbf5ffdbd3f0b57beb346/wandb-0.21.1-py3-none-macosx_11_0_x86_64.whl", hash = "sha256:f74d4691c38318ed8611e00ca3246b4152a03ff390fdce41816bea5705452a73", size = 21885803, upload-time = "2025-08-07T18:52:28.489Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/48/3a7290a33b1f64e29ac8779dab4d4cdef31a9ed3c3d9ea656a4507d64332/wandb-0.21.1-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c8fbd60b9abf4b9bec201f311602f61394d41a3503c801750b03975a5e36d1b", size = 20825318, upload-time = "2025-08-07T18:52:31.282Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/54/c0a087114ff1bb6c32e64aaa58aea4342cebc0ad58b1378c0a5a831d2508/wandb-0.21.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ded9313672630c0630f5b13c598ce9aa0e932e811ebc18823fcc4d73acfb6bb", size = 22362500, upload-time = "2025-08-07T18:52:33.889Z" },
+    { url = "https://files.pythonhosted.org/packages/65/68/3aae277ea9fb5d91eec066cf256755bed3a740d92b539888a7ce36cf3f6c/wandb-0.21.1-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:44f3194d697b409f91708c50c5f9d56e282434a0d60ac380b64f0fb6991cd630", size = 20830372, upload-time = "2025-08-07T18:52:36.76Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/bb/58d206e79be1f279ef06cb934ae1e208bcacd2cd73b7a7652236575010d6/wandb-0.21.1-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:e0b68bb6dbe94f1910c665c755f438292df40c272feb1a8b42208c1df52cce26", size = 22438521, upload-time = "2025-08-07T18:52:39.672Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/b8/dfe01f8e4c40d5dda820fd839c39431608a3453670f79404fa28915972d2/wandb-0.21.1-py3-none-win32.whl", hash = "sha256:98306c3fb369dfafb7194270b938b000ea2bb08dbddff10c19b5a805fd5cab80", size = 21569814, upload-time = "2025-08-07T18:52:42.58Z" },
+    { url = "https://files.pythonhosted.org/packages/51/ba/81c77d5d831fcddb89661c85175fcbb91d2ffecf6b0591972829da3eb42f/wandb-0.21.1-py3-none-win_amd64.whl", hash = "sha256:8be92a7e92b5cb5ce00ec0961f9dbaad7757ffdbc5b5a8f2cc7188e23f653f0a", size = 21569817, upload-time = "2025-08-07T18:52:45.559Z" },
 ]
 
 [[package]]
@@ -5458,25 +6939,11 @@ wheels = [
 
 [[package]]
 name = "wcwidth"
-version = "0.2.13"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/6c/63/53559446a878410fc5a5974feb13d31d78d752eb18aeba59c7fef1af7598/wcwidth-0.2.13.tar.gz", hash = "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5", size = 101301, upload-time = "2024-01-06T02:10:57.829Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/fd/84/fd2ba7aafacbad3c4201d395674fc6348826569da3c0937e75505ead3528/wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859", size = 34166, upload-time = "2024-01-06T02:10:55.763Z" },
-]
-
-[[package]]
-name = "webdataset"
-version = "1.0.2"
+version = "0.2.14"
 source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "braceexpand" },
-    { name = "numpy" },
-    { name = "pyyaml" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/5a/3a/68800d92e065cf4750ebecf973b13979c0c929b439e1293012938862038d/webdataset-1.0.2.tar.gz", hash = "sha256:7f0498be827cfa46cc5430a58768a24e2c6a410676a61be1838f53d61afdaab4", size = 80090, upload-time = "2025-06-19T23:26:21.945Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/24/30/6b0809f4510673dc723187aeaf24c7f5459922d01e2f794277a3dfb90345/wcwidth-0.2.14.tar.gz", hash = "sha256:4d478375d31bc5395a3c55c40ccdf3354688364cd61c4f6adacaa9215d0b3605", size = 102293, upload-time = "2025-09-22T16:29:53.023Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/d9/00/aca6beb3658dab4ed3dbb41a78e6e7f31342e0b41d28088f205525751601/webdataset-1.0.2-py3-none-any.whl", hash = "sha256:3dbfced32b25c0d199c6b9787937b6f85742bc3c84f652c846893075c1c082d9", size = 74956, upload-time = "2025-06-19T23:26:20.354Z" },
+    { url = "https://files.pythonhosted.org/packages/af/b5/123f13c975e9f27ab9c0770f514345bd406d0e8d3b7a0723af9d43f710af/wcwidth-0.2.14-py2.py3-none-any.whl", hash = "sha256:a7bb560c8aee30f9957e5f9895805edd20602f2d7f720186dfd906e82b4982e1", size = 37286, upload-time = "2025-09-22T16:29:51.641Z" },
 ]
 
 [[package]]
@@ -5522,90 +6989,93 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/52/24/ab44c871b0f07f491e5d2ad12c9bd7358e527510618cb1b803a88e986db1/werkzeug-3.1.3-py3-none-any.whl", hash = "sha256:54b78bf3716d19a65be4fceccc0d1d7b89e608834989dfae50ea87564639213e", size = 224498, upload-time = "2024-11-08T15:52:16.132Z" },
 ]
 
-[[package]]
-name = "wget"
-version = "3.2"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/47/6a/62e288da7bcda82b935ff0c6cfe542970f04e29c756b0e147251b2fb251f/wget-3.2.zip", hash = "sha256:35e630eca2aa50ce998b9b1a127bb26b30dfee573702782aa982f875e3f16061", size = 10857, upload-time = "2015-10-22T15:26:37.51Z" }
-
 [[package]]
 name = "wrapt"
-version = "1.17.2"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/c3/fc/e91cc220803d7bc4db93fb02facd8461c37364151b8494762cc88b0fbcef/wrapt-1.17.2.tar.gz", hash = "sha256:41388e9d4d1522446fe79d3213196bd9e3b301a336965b9e27ca2788ebd122f3", size = 55531, upload-time = "2025-01-14T10:35:45.465Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/a1/bd/ab55f849fd1f9a58ed7ea47f5559ff09741b25f00c191231f9f059c83949/wrapt-1.17.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:d5e2439eecc762cd85e7bd37161d4714aa03a33c5ba884e26c81559817ca0925", size = 53799, upload-time = "2025-01-14T10:33:57.4Z" },
-    { url = "https://files.pythonhosted.org/packages/53/18/75ddc64c3f63988f5a1d7e10fb204ffe5762bc663f8023f18ecaf31a332e/wrapt-1.17.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:3fc7cb4c1c744f8c05cd5f9438a3caa6ab94ce8344e952d7c45a8ed59dd88392", size = 38821, upload-time = "2025-01-14T10:33:59.334Z" },
-    { url = "https://files.pythonhosted.org/packages/48/2a/97928387d6ed1c1ebbfd4efc4133a0633546bec8481a2dd5ec961313a1c7/wrapt-1.17.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8fdbdb757d5390f7c675e558fd3186d590973244fab0c5fe63d373ade3e99d40", size = 38919, upload-time = "2025-01-14T10:34:04.093Z" },
-    { url = "https://files.pythonhosted.org/packages/73/54/3bfe5a1febbbccb7a2f77de47b989c0b85ed3a6a41614b104204a788c20e/wrapt-1.17.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5bb1d0dbf99411f3d871deb6faa9aabb9d4e744d67dcaaa05399af89d847a91d", size = 88721, upload-time = "2025-01-14T10:34:07.163Z" },
-    { url = "https://files.pythonhosted.org/packages/25/cb/7262bc1b0300b4b64af50c2720ef958c2c1917525238d661c3e9a2b71b7b/wrapt-1.17.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d18a4865f46b8579d44e4fe1e2bcbc6472ad83d98e22a26c963d46e4c125ef0b", size = 80899, upload-time = "2025-01-14T10:34:09.82Z" },
-    { url = "https://files.pythonhosted.org/packages/2a/5a/04cde32b07a7431d4ed0553a76fdb7a61270e78c5fd5a603e190ac389f14/wrapt-1.17.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc570b5f14a79734437cb7b0500376b6b791153314986074486e0b0fa8d71d98", size = 89222, upload-time = "2025-01-14T10:34:11.258Z" },
-    { url = "https://files.pythonhosted.org/packages/09/28/2e45a4f4771fcfb109e244d5dbe54259e970362a311b67a965555ba65026/wrapt-1.17.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6d9187b01bebc3875bac9b087948a2bccefe464a7d8f627cf6e48b1bbae30f82", size = 86707, upload-time = "2025-01-14T10:34:12.49Z" },
-    { url = "https://files.pythonhosted.org/packages/c6/d2/dcb56bf5f32fcd4bd9aacc77b50a539abdd5b6536872413fd3f428b21bed/wrapt-1.17.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:9e8659775f1adf02eb1e6f109751268e493c73716ca5761f8acb695e52a756ae", size = 79685, upload-time = "2025-01-14T10:34:15.043Z" },
-    { url = "https://files.pythonhosted.org/packages/80/4e/eb8b353e36711347893f502ce91c770b0b0929f8f0bed2670a6856e667a9/wrapt-1.17.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e8b2816ebef96d83657b56306152a93909a83f23994f4b30ad4573b00bd11bb9", size = 87567, upload-time = "2025-01-14T10:34:16.563Z" },
-    { url = "https://files.pythonhosted.org/packages/17/27/4fe749a54e7fae6e7146f1c7d914d28ef599dacd4416566c055564080fe2/wrapt-1.17.2-cp312-cp312-win32.whl", hash = "sha256:468090021f391fe0056ad3e807e3d9034e0fd01adcd3bdfba977b6fdf4213ea9", size = 36672, upload-time = "2025-01-14T10:34:17.727Z" },
-    { url = "https://files.pythonhosted.org/packages/15/06/1dbf478ea45c03e78a6a8c4be4fdc3c3bddea5c8de8a93bc971415e47f0f/wrapt-1.17.2-cp312-cp312-win_amd64.whl", hash = "sha256:ec89ed91f2fa8e3f52ae53cd3cf640d6feff92ba90d62236a81e4e563ac0e991", size = 38865, upload-time = "2025-01-14T10:34:19.577Z" },
-    { url = "https://files.pythonhosted.org/packages/ce/b9/0ffd557a92f3b11d4c5d5e0c5e4ad057bd9eb8586615cdaf901409920b14/wrapt-1.17.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6ed6ffac43aecfe6d86ec5b74b06a5be33d5bb9243d055141e8cabb12aa08125", size = 53800, upload-time = "2025-01-14T10:34:21.571Z" },
-    { url = "https://files.pythonhosted.org/packages/c0/ef/8be90a0b7e73c32e550c73cfb2fa09db62234227ece47b0e80a05073b375/wrapt-1.17.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:35621ae4c00e056adb0009f8e86e28eb4a41a4bfa8f9bfa9fca7d343fe94f998", size = 38824, upload-time = "2025-01-14T10:34:22.999Z" },
-    { url = "https://files.pythonhosted.org/packages/36/89/0aae34c10fe524cce30fe5fc433210376bce94cf74d05b0d68344c8ba46e/wrapt-1.17.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a604bf7a053f8362d27eb9fefd2097f82600b856d5abe996d623babd067b1ab5", size = 38920, upload-time = "2025-01-14T10:34:25.386Z" },
-    { url = "https://files.pythonhosted.org/packages/3b/24/11c4510de906d77e0cfb5197f1b1445d4fec42c9a39ea853d482698ac681/wrapt-1.17.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5cbabee4f083b6b4cd282f5b817a867cf0b1028c54d445b7ec7cfe6505057cf8", size = 88690, upload-time = "2025-01-14T10:34:28.058Z" },
-    { url = "https://files.pythonhosted.org/packages/71/d7/cfcf842291267bf455b3e266c0c29dcb675b5540ee8b50ba1699abf3af45/wrapt-1.17.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:49703ce2ddc220df165bd2962f8e03b84c89fee2d65e1c24a7defff6f988f4d6", size = 80861, upload-time = "2025-01-14T10:34:29.167Z" },
-    { url = "https://files.pythonhosted.org/packages/d5/66/5d973e9f3e7370fd686fb47a9af3319418ed925c27d72ce16b791231576d/wrapt-1.17.2-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8112e52c5822fc4253f3901b676c55ddf288614dc7011634e2719718eaa187dc", size = 89174, upload-time = "2025-01-14T10:34:31.702Z" },
-    { url = "https://files.pythonhosted.org/packages/a7/d3/8e17bb70f6ae25dabc1aaf990f86824e4fd98ee9cadf197054e068500d27/wrapt-1.17.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9fee687dce376205d9a494e9c121e27183b2a3df18037f89d69bd7b35bcf59e2", size = 86721, upload-time = "2025-01-14T10:34:32.91Z" },
-    { url = "https://files.pythonhosted.org/packages/6f/54/f170dfb278fe1c30d0ff864513cff526d624ab8de3254b20abb9cffedc24/wrapt-1.17.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:18983c537e04d11cf027fbb60a1e8dfd5190e2b60cc27bc0808e653e7b218d1b", size = 79763, upload-time = "2025-01-14T10:34:34.903Z" },
-    { url = "https://files.pythonhosted.org/packages/4a/98/de07243751f1c4a9b15c76019250210dd3486ce098c3d80d5f729cba029c/wrapt-1.17.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:703919b1633412ab54bcf920ab388735832fdcb9f9a00ae49387f0fe67dad504", size = 87585, upload-time = "2025-01-14T10:34:36.13Z" },
-    { url = "https://files.pythonhosted.org/packages/f9/f0/13925f4bd6548013038cdeb11ee2cbd4e37c30f8bfd5db9e5a2a370d6e20/wrapt-1.17.2-cp313-cp313-win32.whl", hash = "sha256:abbb9e76177c35d4e8568e58650aa6926040d6a9f6f03435b7a522bf1c487f9a", size = 36676, upload-time = "2025-01-14T10:34:37.962Z" },
-    { url = "https://files.pythonhosted.org/packages/bf/ae/743f16ef8c2e3628df3ddfd652b7d4c555d12c84b53f3d8218498f4ade9b/wrapt-1.17.2-cp313-cp313-win_amd64.whl", hash = "sha256:69606d7bb691b50a4240ce6b22ebb319c1cfb164e5f6569835058196e0f3a845", size = 38871, upload-time = "2025-01-14T10:34:39.13Z" },
-    { url = "https://files.pythonhosted.org/packages/3d/bc/30f903f891a82d402ffb5fda27ec1d621cc97cb74c16fea0b6141f1d4e87/wrapt-1.17.2-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:4a721d3c943dae44f8e243b380cb645a709ba5bd35d3ad27bc2ed947e9c68192", size = 56312, upload-time = "2025-01-14T10:34:40.604Z" },
-    { url = "https://files.pythonhosted.org/packages/8a/04/c97273eb491b5f1c918857cd26f314b74fc9b29224521f5b83f872253725/wrapt-1.17.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:766d8bbefcb9e00c3ac3b000d9acc51f1b399513f44d77dfe0eb026ad7c9a19b", size = 40062, upload-time = "2025-01-14T10:34:45.011Z" },
-    { url = "https://files.pythonhosted.org/packages/4e/ca/3b7afa1eae3a9e7fefe499db9b96813f41828b9fdb016ee836c4c379dadb/wrapt-1.17.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e496a8ce2c256da1eb98bd15803a79bee00fc351f5dfb9ea82594a3f058309e0", size = 40155, upload-time = "2025-01-14T10:34:47.25Z" },
-    { url = "https://files.pythonhosted.org/packages/89/be/7c1baed43290775cb9030c774bc53c860db140397047cc49aedaf0a15477/wrapt-1.17.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40d615e4fe22f4ad3528448c193b218e077656ca9ccb22ce2cb20db730f8d306", size = 113471, upload-time = "2025-01-14T10:34:50.934Z" },
-    { url = "https://files.pythonhosted.org/packages/32/98/4ed894cf012b6d6aae5f5cc974006bdeb92f0241775addad3f8cd6ab71c8/wrapt-1.17.2-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a5aaeff38654462bc4b09023918b7f21790efb807f54c000a39d41d69cf552cb", size = 101208, upload-time = "2025-01-14T10:34:52.297Z" },
-    { url = "https://files.pythonhosted.org/packages/ea/fd/0c30f2301ca94e655e5e057012e83284ce8c545df7661a78d8bfca2fac7a/wrapt-1.17.2-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a7d15bbd2bc99e92e39f49a04653062ee6085c0e18b3b7512a4f2fe91f2d681", size = 109339, upload-time = "2025-01-14T10:34:53.489Z" },
-    { url = "https://files.pythonhosted.org/packages/75/56/05d000de894c4cfcb84bcd6b1df6214297b8089a7bd324c21a4765e49b14/wrapt-1.17.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:e3890b508a23299083e065f435a492b5435eba6e304a7114d2f919d400888cc6", size = 110232, upload-time = "2025-01-14T10:34:55.327Z" },
-    { url = "https://files.pythonhosted.org/packages/53/f8/c3f6b2cf9b9277fb0813418e1503e68414cd036b3b099c823379c9575e6d/wrapt-1.17.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:8c8b293cd65ad716d13d8dd3624e42e5a19cc2a2f1acc74b30c2c13f15cb61a6", size = 100476, upload-time = "2025-01-14T10:34:58.055Z" },
-    { url = "https://files.pythonhosted.org/packages/a7/b1/0bb11e29aa5139d90b770ebbfa167267b1fc548d2302c30c8f7572851738/wrapt-1.17.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4c82b8785d98cdd9fed4cac84d765d234ed3251bd6afe34cb7ac523cb93e8b4f", size = 106377, upload-time = "2025-01-14T10:34:59.3Z" },
-    { url = "https://files.pythonhosted.org/packages/6a/e1/0122853035b40b3f333bbb25f1939fc1045e21dd518f7f0922b60c156f7c/wrapt-1.17.2-cp313-cp313t-win32.whl", hash = "sha256:13e6afb7fe71fe7485a4550a8844cc9ffbe263c0f1a1eea569bc7091d4898555", size = 37986, upload-time = "2025-01-14T10:35:00.498Z" },
-    { url = "https://files.pythonhosted.org/packages/09/5e/1655cf481e079c1f22d0cabdd4e51733679932718dc23bf2db175f329b76/wrapt-1.17.2-cp313-cp313t-win_amd64.whl", hash = "sha256:eaf675418ed6b3b31c7a989fd007fa7c3be66ce14e5c3b27336383604c9da85c", size = 40750, upload-time = "2025-01-14T10:35:03.378Z" },
-    { url = "https://files.pythonhosted.org/packages/2d/82/f56956041adef78f849db6b289b282e72b55ab8045a75abad81898c28d19/wrapt-1.17.2-py3-none-any.whl", hash = "sha256:b18f2d1533a71f069c7f82d524a52599053d4c7166e9dd374ae2136b7f40f7c8", size = 23594, upload-time = "2025-01-14T10:35:44.018Z" },
+version = "1.17.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/95/8f/aeb76c5b46e273670962298c23e7ddde79916cb74db802131d49a85e4b7d/wrapt-1.17.3.tar.gz", hash = "sha256:f66eb08feaa410fe4eebd17f2a2c8e2e46d3476e9f8c783daa8e09e0faa666d0", size = 55547, upload-time = "2025-08-12T05:53:21.714Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/9f/41/cad1aba93e752f1f9268c77270da3c469883d56e2798e7df6240dcb2287b/wrapt-1.17.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:ab232e7fdb44cdfbf55fc3afa31bcdb0d8980b9b95c38b6405df2acb672af0e0", size = 53998, upload-time = "2025-08-12T05:51:47.138Z" },
+    { url = "https://files.pythonhosted.org/packages/60/f8/096a7cc13097a1869fe44efe68dace40d2a16ecb853141394047f0780b96/wrapt-1.17.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:9baa544e6acc91130e926e8c802a17f3b16fbea0fd441b5a60f5cf2cc5c3deba", size = 39020, upload-time = "2025-08-12T05:51:35.906Z" },
+    { url = "https://files.pythonhosted.org/packages/33/df/bdf864b8997aab4febb96a9ae5c124f700a5abd9b5e13d2a3214ec4be705/wrapt-1.17.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6b538e31eca1a7ea4605e44f81a48aa24c4632a277431a6ed3f328835901f4fd", size = 39098, upload-time = "2025-08-12T05:51:57.474Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/81/5d931d78d0eb732b95dc3ddaeeb71c8bb572fb01356e9133916cd729ecdd/wrapt-1.17.3-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:042ec3bb8f319c147b1301f2393bc19dba6e176b7da446853406d041c36c7828", size = 88036, upload-time = "2025-08-12T05:52:34.784Z" },
+    { url = "https://files.pythonhosted.org/packages/ca/38/2e1785df03b3d72d34fc6252d91d9d12dc27a5c89caef3335a1bbb8908ca/wrapt-1.17.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3af60380ba0b7b5aeb329bc4e402acd25bd877e98b3727b0135cb5c2efdaefe9", size = 88156, upload-time = "2025-08-12T05:52:13.599Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/8b/48cdb60fe0603e34e05cffda0b2a4adab81fd43718e11111a4b0100fd7c1/wrapt-1.17.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0b02e424deef65c9f7326d8c19220a2c9040c51dc165cddb732f16198c168396", size = 87102, upload-time = "2025-08-12T05:52:14.56Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/51/d81abca783b58f40a154f1b2c56db1d2d9e0d04fa2d4224e357529f57a57/wrapt-1.17.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:74afa28374a3c3a11b3b5e5fca0ae03bef8450d6aa3ab3a1e2c30e3a75d023dc", size = 87732, upload-time = "2025-08-12T05:52:36.165Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/b1/43b286ca1392a006d5336412d41663eeef1ad57485f3e52c767376ba7e5a/wrapt-1.17.3-cp312-cp312-win32.whl", hash = "sha256:4da9f45279fff3543c371d5ababc57a0384f70be244de7759c85a7f989cb4ebe", size = 36705, upload-time = "2025-08-12T05:53:07.123Z" },
+    { url = "https://files.pythonhosted.org/packages/28/de/49493f962bd3c586ab4b88066e967aa2e0703d6ef2c43aa28cb83bf7b507/wrapt-1.17.3-cp312-cp312-win_amd64.whl", hash = "sha256:e71d5c6ebac14875668a1e90baf2ea0ef5b7ac7918355850c0908ae82bcb297c", size = 38877, upload-time = "2025-08-12T05:53:05.436Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/48/0f7102fe9cb1e8a5a77f80d4f0956d62d97034bbe88d33e94699f99d181d/wrapt-1.17.3-cp312-cp312-win_arm64.whl", hash = "sha256:604d076c55e2fdd4c1c03d06dc1a31b95130010517b5019db15365ec4a405fc6", size = 36885, upload-time = "2025-08-12T05:52:54.367Z" },
+    { url = "https://files.pythonhosted.org/packages/fc/f6/759ece88472157acb55fc195e5b116e06730f1b651b5b314c66291729193/wrapt-1.17.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a47681378a0439215912ef542c45a783484d4dd82bac412b71e59cf9c0e1cea0", size = 54003, upload-time = "2025-08-12T05:51:48.627Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/a9/49940b9dc6d47027dc850c116d79b4155f15c08547d04db0f07121499347/wrapt-1.17.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:54a30837587c6ee3cd1a4d1c2ec5d24e77984d44e2f34547e2323ddb4e22eb77", size = 39025, upload-time = "2025-08-12T05:51:37.156Z" },
+    { url = "https://files.pythonhosted.org/packages/45/35/6a08de0f2c96dcdd7fe464d7420ddb9a7655a6561150e5fc4da9356aeaab/wrapt-1.17.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:16ecf15d6af39246fe33e507105d67e4b81d8f8d2c6598ff7e3ca1b8a37213f7", size = 39108, upload-time = "2025-08-12T05:51:58.425Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/37/6faf15cfa41bf1f3dba80cd3f5ccc6622dfccb660ab26ed79f0178c7497f/wrapt-1.17.3-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:6fd1ad24dc235e4ab88cda009e19bf347aabb975e44fd5c2fb22a3f6e4141277", size = 88072, upload-time = "2025-08-12T05:52:37.53Z" },
+    { url = "https://files.pythonhosted.org/packages/78/f2/efe19ada4a38e4e15b6dff39c3e3f3f73f5decf901f66e6f72fe79623a06/wrapt-1.17.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0ed61b7c2d49cee3c027372df5809a59d60cf1b6c2f81ee980a091f3afed6a2d", size = 88214, upload-time = "2025-08-12T05:52:15.886Z" },
+    { url = "https://files.pythonhosted.org/packages/40/90/ca86701e9de1622b16e09689fc24b76f69b06bb0150990f6f4e8b0eeb576/wrapt-1.17.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:423ed5420ad5f5529db9ce89eac09c8a2f97da18eb1c870237e84c5a5c2d60aa", size = 87105, upload-time = "2025-08-12T05:52:17.914Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/e0/d10bd257c9a3e15cbf5523025252cc14d77468e8ed644aafb2d6f54cb95d/wrapt-1.17.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e01375f275f010fcbf7f643b4279896d04e571889b8a5b3f848423d91bf07050", size = 87766, upload-time = "2025-08-12T05:52:39.243Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/cf/7d848740203c7b4b27eb55dbfede11aca974a51c3d894f6cc4b865f42f58/wrapt-1.17.3-cp313-cp313-win32.whl", hash = "sha256:53e5e39ff71b3fc484df8a522c933ea2b7cdd0d5d15ae82e5b23fde87d44cbd8", size = 36711, upload-time = "2025-08-12T05:53:10.074Z" },
+    { url = "https://files.pythonhosted.org/packages/57/54/35a84d0a4d23ea675994104e667ceff49227ce473ba6a59ba2c84f250b74/wrapt-1.17.3-cp313-cp313-win_amd64.whl", hash = "sha256:1f0b2f40cf341ee8cc1a97d51ff50dddb9fcc73241b9143ec74b30fc4f44f6cb", size = 38885, upload-time = "2025-08-12T05:53:08.695Z" },
+    { url = "https://files.pythonhosted.org/packages/01/77/66e54407c59d7b02a3c4e0af3783168fff8e5d61def52cda8728439d86bc/wrapt-1.17.3-cp313-cp313-win_arm64.whl", hash = "sha256:7425ac3c54430f5fc5e7b6f41d41e704db073309acfc09305816bc6a0b26bb16", size = 36896, upload-time = "2025-08-12T05:52:55.34Z" },
+    { url = "https://files.pythonhosted.org/packages/02/a2/cd864b2a14f20d14f4c496fab97802001560f9f41554eef6df201cd7f76c/wrapt-1.17.3-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:cf30f6e3c077c8e6a9a7809c94551203c8843e74ba0c960f4a98cd80d4665d39", size = 54132, upload-time = "2025-08-12T05:51:49.864Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/46/d011725b0c89e853dc44cceb738a307cde5d240d023d6d40a82d1b4e1182/wrapt-1.17.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:e228514a06843cae89621384cfe3a80418f3c04aadf8a3b14e46a7be704e4235", size = 39091, upload-time = "2025-08-12T05:51:38.935Z" },
+    { url = "https://files.pythonhosted.org/packages/2e/9e/3ad852d77c35aae7ddebdbc3b6d35ec8013af7d7dddad0ad911f3d891dae/wrapt-1.17.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:5ea5eb3c0c071862997d6f3e02af1d055f381b1d25b286b9d6644b79db77657c", size = 39172, upload-time = "2025-08-12T05:51:59.365Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/f7/c983d2762bcce2326c317c26a6a1e7016f7eb039c27cdf5c4e30f4160f31/wrapt-1.17.3-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:281262213373b6d5e4bb4353bc36d1ba4084e6d6b5d242863721ef2bf2c2930b", size = 87163, upload-time = "2025-08-12T05:52:40.965Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/0f/f673f75d489c7f22d17fe0193e84b41540d962f75fce579cf6873167c29b/wrapt-1.17.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:dc4a8d2b25efb6681ecacad42fca8859f88092d8732b170de6a5dddd80a1c8fa", size = 87963, upload-time = "2025-08-12T05:52:20.326Z" },
+    { url = "https://files.pythonhosted.org/packages/df/61/515ad6caca68995da2fac7a6af97faab8f78ebe3bf4f761e1b77efbc47b5/wrapt-1.17.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:373342dd05b1d07d752cecbec0c41817231f29f3a89aa8b8843f7b95992ed0c7", size = 86945, upload-time = "2025-08-12T05:52:21.581Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/bd/4e70162ce398462a467bc09e768bee112f1412e563620adc353de9055d33/wrapt-1.17.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d40770d7c0fd5cbed9d84b2c3f2e156431a12c9a37dc6284060fb4bec0b7ffd4", size = 86857, upload-time = "2025-08-12T05:52:43.043Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/b8/da8560695e9284810b8d3df8a19396a6e40e7518059584a1a394a2b35e0a/wrapt-1.17.3-cp314-cp314-win32.whl", hash = "sha256:fbd3c8319de8e1dc79d346929cd71d523622da527cca14e0c1d257e31c2b8b10", size = 37178, upload-time = "2025-08-12T05:53:12.605Z" },
+    { url = "https://files.pythonhosted.org/packages/db/c8/b71eeb192c440d67a5a0449aaee2310a1a1e8eca41676046f99ed2487e9f/wrapt-1.17.3-cp314-cp314-win_amd64.whl", hash = "sha256:e1a4120ae5705f673727d3253de3ed0e016f7cd78dc463db1b31e2463e1f3cf6", size = 39310, upload-time = "2025-08-12T05:53:11.106Z" },
+    { url = "https://files.pythonhosted.org/packages/45/20/2cda20fd4865fa40f86f6c46ed37a2a8356a7a2fde0773269311f2af56c7/wrapt-1.17.3-cp314-cp314-win_arm64.whl", hash = "sha256:507553480670cab08a800b9463bdb881b2edeed77dc677b0a5915e6106e91a58", size = 37266, upload-time = "2025-08-12T05:52:56.531Z" },
+    { url = "https://files.pythonhosted.org/packages/77/ed/dd5cf21aec36c80443c6f900449260b80e2a65cf963668eaef3b9accce36/wrapt-1.17.3-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:ed7c635ae45cfbc1a7371f708727bf74690daedc49b4dba310590ca0bd28aa8a", size = 56544, upload-time = "2025-08-12T05:51:51.109Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/96/450c651cc753877ad100c7949ab4d2e2ecc4d97157e00fa8f45df682456a/wrapt-1.17.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:249f88ed15503f6492a71f01442abddd73856a0032ae860de6d75ca62eed8067", size = 40283, upload-time = "2025-08-12T05:51:39.912Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/86/2fcad95994d9b572db57632acb6f900695a648c3e063f2cd344b3f5c5a37/wrapt-1.17.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:5a03a38adec8066d5a37bea22f2ba6bbf39fcdefbe2d91419ab864c3fb515454", size = 40366, upload-time = "2025-08-12T05:52:00.693Z" },
+    { url = "https://files.pythonhosted.org/packages/64/0e/f4472f2fdde2d4617975144311f8800ef73677a159be7fe61fa50997d6c0/wrapt-1.17.3-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:5d4478d72eb61c36e5b446e375bbc49ed002430d17cdec3cecb36993398e1a9e", size = 108571, upload-time = "2025-08-12T05:52:44.521Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/01/9b85a99996b0a97c8a17484684f206cbb6ba73c1ce6890ac668bcf3838fb/wrapt-1.17.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:223db574bb38637e8230eb14b185565023ab624474df94d2af18f1cdb625216f", size = 113094, upload-time = "2025-08-12T05:52:22.618Z" },
+    { url = "https://files.pythonhosted.org/packages/25/02/78926c1efddcc7b3aa0bc3d6b33a822f7d898059f7cd9ace8c8318e559ef/wrapt-1.17.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e405adefb53a435f01efa7ccdec012c016b5a1d3f35459990afc39b6be4d5056", size = 110659, upload-time = "2025-08-12T05:52:24.057Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/ee/c414501ad518ac3e6fe184753632fe5e5ecacdcf0effc23f31c1e4f7bfcf/wrapt-1.17.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:88547535b787a6c9ce4086917b6e1d291aa8ed914fdd3a838b3539dc95c12804", size = 106946, upload-time = "2025-08-12T05:52:45.976Z" },
+    { url = "https://files.pythonhosted.org/packages/be/44/a1bd64b723d13bb151d6cc91b986146a1952385e0392a78567e12149c7b4/wrapt-1.17.3-cp314-cp314t-win32.whl", hash = "sha256:41b1d2bc74c2cac6f9074df52b2efbef2b30bdfe5f40cb78f8ca22963bc62977", size = 38717, upload-time = "2025-08-12T05:53:15.214Z" },
+    { url = "https://files.pythonhosted.org/packages/79/d9/7cfd5a312760ac4dd8bf0184a6ee9e43c33e47f3dadc303032ce012b8fa3/wrapt-1.17.3-cp314-cp314t-win_amd64.whl", hash = "sha256:73d496de46cd2cdbdbcce4ae4bcdb4afb6a11234a1df9c085249d55166b95116", size = 41334, upload-time = "2025-08-12T05:53:14.178Z" },
+    { url = "https://files.pythonhosted.org/packages/46/78/10ad9781128ed2f99dbc474f43283b13fea8ba58723e98844367531c18e9/wrapt-1.17.3-cp314-cp314t-win_arm64.whl", hash = "sha256:f38e60678850c42461d4202739f9bf1e3a737c7ad283638251e79cc49effb6b6", size = 38471, upload-time = "2025-08-12T05:52:57.784Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/f6/a933bd70f98e9cf3e08167fc5cd7aaaca49147e48411c0bd5ae701bb2194/wrapt-1.17.3-py3-none-any.whl", hash = "sha256:7171ae35d2c33d326ac19dd8facb1e82e5fd04ef8c6c0e394d7af55a55051c22", size = 23591, upload-time = "2025-08-12T05:53:20.674Z" },
 ]
 
 [[package]]
 name = "xformers"
-version = "0.0.30"
+version = "0.0.32.post1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "numpy", marker = "(python_full_version >= '3.13' and platform_machine == 'arm64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and platform_machine != 'arm64' and sys_platform == 'linux')" },
-    { name = "torch", marker = "(python_full_version >= '3.13' and platform_machine == 'arm64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and platform_machine != 'arm64' and sys_platform == 'linux')" },
+    { name = "numpy", marker = "platform_machine != 'aarch64' and sys_platform == 'linux'" },
+    { name = "torch", version = "2.8.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "platform_machine != 'aarch64' and sys_platform == 'linux'" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/bf/f7/dd2269cce89fd1221947dd7cc3a60707ffe721ef55c1803ac3b1a1f7ae5c/xformers-0.0.30.tar.gz", hash = "sha256:a12bf3eb39e294cdbe8a7253ac9b665f41bac61d6d98df174e34ef7bdb6f2fc4", size = 10214139, upload-time = "2025-04-28T20:51:02.045Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/6f/33/3b9c4d3d5b2da453d27de891df4ad653ac5795324961aa3a5c15b0353fe6/xformers-0.0.32.post1.tar.gz", hash = "sha256:1de84a45c497c8d92326986508d81f4b0a8c6be4d3d62a29b8ad6048a6ab51e1", size = 12106196, upload-time = "2025-08-14T18:07:45.486Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/e6/c6/6f2c364881da54e51a23c17c50db0518d30353bb6da8b1751be9174df538/xformers-0.0.30-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:357875986f50f105f445dc9a002c8450623cd4a6a469865c463285d0376fe77b", size = 31521318, upload-time = "2025-04-28T20:50:41.599Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/df/6817346f1a77278315d5fe1fc9f239ba3282ba36e8ab3256babd448dde62/xformers-0.0.32.post1-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:5f245b5555188da112070d8fefb6b7ae1ae47422856521d66c837e9d2352fbe4", size = 117199943, upload-time = "2025-08-14T18:07:34.78Z" },
 ]
 
 [[package]]
 name = "xgrammar"
-version = "0.1.19"
+version = "0.1.25"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "ninja", marker = "python_full_version >= '3.13' or platform_machine != 'arm64' or sys_platform != 'linux'" },
-    { name = "pydantic", marker = "python_full_version >= '3.13' or platform_machine != 'arm64' or sys_platform != 'linux'" },
-    { name = "sentencepiece", marker = "python_full_version >= '3.13' or platform_machine != 'arm64' or sys_platform != 'linux'" },
-    { name = "tiktoken", marker = "python_full_version >= '3.13' or platform_machine != 'arm64' or sys_platform != 'linux'" },
-    { name = "torch", marker = "python_full_version >= '3.13' or platform_machine != 'arm64' or sys_platform != 'linux'" },
-    { name = "transformers", marker = "python_full_version >= '3.13' or platform_machine != 'arm64' or sys_platform != 'linux'" },
-    { name = "triton", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "mlx-lm", marker = "platform_machine == 'arm64' and sys_platform == 'darwin'" },
+    { name = "ninja" },
+    { name = "numpy" },
+    { name = "pydantic" },
+    { name = "torch", version = "2.8.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'darwin'" },
+    { name = "torch", version = "2.8.0+cu129", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "sys_platform != 'darwin'" },
+    { name = "transformers" },
+    { name = "triton", version = "3.4.0", source = { registry = "https://download.pytorch.org/whl/cu129" }, marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/b5/55/73e1e4f918ade656c4fa7f3a5fcfb3d521a429fe305d2cb8ca58bfb201d4/xgrammar-0.1.19.tar.gz", hash = "sha256:75bf3e814283b1cbaee9252234c5d4081f0058d29b26d8984f1cdf031c99b775", size = 1714056, upload-time = "2025-05-08T07:13:46.05Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/f2/a9/dc3c63cf7f082d183711e46ef34d10d8a135c2319dc581905d79449f52ea/xgrammar-0.1.25.tar.gz", hash = "sha256:70ce16b27e8082f20808ed759b0733304316facc421656f0f30cfce514b5b77a", size = 2297187, upload-time = "2025-09-21T05:58:58.942Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/3a/a8/886975ef77106ba8fad8f7c253d8aead02e1d285a831857f4a67365a7c6e/xgrammar-0.1.19-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:70f1bb54e9bdb92830424713629e37ffcd4f8e4ebbbf03a72860503e25d349bf", size = 504554, upload-time = "2025-05-08T07:13:23.754Z" },
-    { url = "https://files.pythonhosted.org/packages/cc/9d/e27686ad71be897cda26289d7f899250f41a3fd8a12b472f1ba3ea8fc5ae/xgrammar-0.1.19-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:70ee7d359386e816eb85f9f763d68a0f2dfedb3da8601ed38e6e8e8391aa9b98", size = 457619, upload-time = "2025-05-08T07:13:25.283Z" },
-    { url = "https://files.pythonhosted.org/packages/bc/64/e64c7a06fbbe8d610dd520cb00045c109ad4f56457198220d63830efd426/xgrammar-0.1.19-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:16439a86378f7e07d2db91f8a9645d1ff9959b018f1fae6768a057b4b3926dc7", size = 5717888, upload-time = "2025-05-08T07:13:26.85Z" },
-    { url = "https://files.pythonhosted.org/packages/c6/68/df91740b23287d06c9d67fadd5d0dc096bb1beaf6079ab083f143545f520/xgrammar-0.1.19-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9beb2cb2b55c9524f24b3cbf8181c47e435586976aa0c37e220661f786c601f", size = 5834560, upload-time = "2025-05-08T07:13:29.006Z" },
-    { url = "https://files.pythonhosted.org/packages/df/42/d0248e8af1c69a92c409ee06e6f07fb047567c366e4d08676b6a3bc356f6/xgrammar-0.1.19-cp312-cp312-win_amd64.whl", hash = "sha256:4a430dbf229c04539f0929069df245f5f652298e37dc3f04ce0a6aa8639546ef", size = 527418, upload-time = "2025-05-08T07:13:31.229Z" },
-    { url = "https://files.pythonhosted.org/packages/75/80/988ba82581b74ec7638b61897fdb6725d9998ce52c26ea93b98cc0259148/xgrammar-0.1.19-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:057a883ac2f37afe15e045eaad5dad8458bdaa1b69d62f554ff7ac6ca3f4b4a7", size = 457657, upload-time = "2025-05-08T07:13:32.687Z" },
-    { url = "https://files.pythonhosted.org/packages/de/1d/46ac48834d0166057612c5eec1bc2e9e69ff16f8de676fb379b8b53dadbd/xgrammar-0.1.19-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6f26bbcf8d4f7698c64f4304b99b45dffe4633012d0c91f1c3f687dd08696ef7", size = 5834052, upload-time = "2025-05-08T07:13:34.354Z" },
-    { url = "https://files.pythonhosted.org/packages/e2/f5/c6eeba50fd93b03f0c9256e48d0b9f6195d30bb7ce31f5324fc1da8a90d3/xgrammar-0.1.19-cp313-cp313-win_amd64.whl", hash = "sha256:6b4bfd84df561b978e4158796adbfa23c80db96e19754483508d4f9003f2f88f", size = 527495, upload-time = "2025-05-08T07:13:35.902Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/d7/a7bdb158afa88af7e6e0d312e9677ba5fb5e423932008c9aa2c45af75d5d/xgrammar-0.1.25-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:96500d7578c46e8551253b9211b02e02f54e147bc290479a64717d80dcf4f7e3", size = 678250, upload-time = "2025-09-21T05:58:37.936Z" },
+    { url = "https://files.pythonhosted.org/packages/10/9d/b20588a3209d544a3432ebfcf2e3b1a455833ee658149b08c18eef0c6f59/xgrammar-0.1.25-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:73ba9031e359447af53ce89dfb0775e7b9f4b358d513bcc28a6b4deace661dd5", size = 621550, upload-time = "2025-09-21T05:58:39.464Z" },
+    { url = "https://files.pythonhosted.org/packages/99/9c/39bb38680be3b6d6aa11b8a46a69fb43e2537d6728710b299fa9fc231ff0/xgrammar-0.1.25-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c519518ebc65f75053123baaf23776a21bda58f64101a64c2fc4aa467c9cd480", size = 8519097, upload-time = "2025-09-21T05:58:40.831Z" },
+    { url = "https://files.pythonhosted.org/packages/c6/c2/695797afa9922c30c45aa94e087ad33a9d87843f269461b622a65a39022a/xgrammar-0.1.25-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47fdbfc6007df47de2142613220292023e88e4a570546b39591f053e4d9ec33f", size = 8712184, upload-time = "2025-09-21T05:58:43.142Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/7f/aa80d1d4c4632cd3d8d083f1de8b470fcb3df23d9165992a3ced019f1b93/xgrammar-0.1.25-cp312-cp312-win_amd64.whl", hash = "sha256:c9b3defb6b45272e896da401f43b513f5ac12104ec3101bbe4d3a7d02bcf4a27", size = 698264, upload-time = "2025-09-21T05:58:44.787Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/5b/4090c73d3e04ff96a1464a8695dc12bde4cc83bb4b9c2f06d2e02a744828/xgrammar-0.1.25-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2d80d4bfe65d1a3334536c804b6471f32e6759f1972c9abe0ae49d5e21462c0b", size = 621445, upload-time = "2025-09-21T05:58:46.331Z" },
+    { url = "https://files.pythonhosted.org/packages/e1/48/df8c52a22f47f1e3237d9457fd6fefe8b9bca75a13a81d1901690260c86b/xgrammar-0.1.25-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a1a6a638167d704a22a0c9670e2176104c38e38c351286a07a77143e22f9053", size = 8710998, upload-time = "2025-09-21T05:58:47.731Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/82/e48284c5061550ff682b1096c43146244207c64541cf36fcce88c66a0407/xgrammar-0.1.25-cp313-cp313-win_amd64.whl", hash = "sha256:ffadeba0b704667a7eb6202d409533e9d1e80af15a10add107684e0cde45b8e4", size = 698260, upload-time = "2025-09-21T05:58:49.44Z" },
 ]
 
 [[package]]
@@ -5646,6 +7116,28 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/27/ee/518b72faa2073f5aa8e3262408d284892cb79cf2754ba0c3a5870645ef73/xxhash-3.5.0-cp313-cp313-win_arm64.whl", hash = "sha256:4811336f1ce11cac89dcbd18f3a25c527c16311709a89313c3acaf771def2d4b", size = 26801, upload-time = "2024-08-17T09:19:06.547Z" },
 ]
 
+[[package]]
+name = "yappi"
+version = "1.6.10"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/02/5b/cfde09baf28f7046194b98f1c4907e172c48e7c1b2db35a918fc8a57727a/yappi-1.6.10.tar.gz", hash = "sha256:463b822727658937bd95a7d80ca9758605b8cd0014e004e9e520ec9cb4db0c92", size = 59379, upload-time = "2024-11-12T11:24:38.351Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/50/01/1823649d33aee627440939d7247e1fa7ef64bd907ca4ea88438274d392fc/yappi-1.6.10-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:32c6d928604d7a236090bc36d324f309fe8344c91123bb84e37c43f6677adddc", size = 32914, upload-time = "2024-11-12T11:23:45.877Z" },
+    { url = "https://files.pythonhosted.org/packages/54/c5/85852db160c93ee3190741a4fff25075518ad97dea1e2ad47ca6eab31d2f/yappi-1.6.10-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9683c40de7e4ddff225068032cd97a6d928e4beddd9c7cf6515325be8ac28036", size = 77223, upload-time = "2024-11-12T11:23:46.834Z" },
+    { url = "https://files.pythonhosted.org/packages/38/01/b03a2bc47fbb2d9bcad072fc2e08730f814defaac2ffbf76ef785fdff5d0/yappi-1.6.10-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:733a212014f2b44673ed62be53b3d4dd458844cd2008ba107f27a3293e42f43a", size = 81250, upload-time = "2024-11-12T11:23:47.872Z" },
+    { url = "https://files.pythonhosted.org/packages/39/44/a3c64e0de45a0fc0bf327af95465a94cb8340a64e5abb7bb8af1cfd76f7f/yappi-1.6.10-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:7d80938e566ac6329daa3b036fdf7bd34488010efcf0a65169a44603878daa4e", size = 76118, upload-time = "2024-11-12T11:23:48.829Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/68/6806060eaec421a21554c2f7ee8b1379ff02b059e0c753eb55e5b7b701a4/yappi-1.6.10-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:01705971b728a4f95829b723d08883c7623ec275f4066f4048b28dc0151fe0af", size = 78522, upload-time = "2024-11-12T11:23:49.993Z" },
+    { url = "https://files.pythonhosted.org/packages/af/4f/0afcacc683f3c34570effc78e6d4c154dea9d6cc8549c2535fb75441be30/yappi-1.6.10-cp312-cp312-win32.whl", hash = "sha256:8dd13a430b046e2921ddf63d992da97968724b41a03e68292f06a2afa11c9d6e", size = 32020, upload-time = "2024-11-12T11:23:51.51Z" },
+    { url = "https://files.pythonhosted.org/packages/cb/45/17f50baed4a886fab2c34a040cefefe6623abcaaadf23f851207da9cd5e6/yappi-1.6.10-cp312-cp312-win_amd64.whl", hash = "sha256:a50eb3aec893c40554f8f811d3341af266d844e7759f7f7abfcdba2744885ea3", size = 34471, upload-time = "2024-11-12T11:23:52.457Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/33/9ca066f48c7fb21e0ab16fd5e1c99771275a8cec435ef7ac1840d13252f0/yappi-1.6.10-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:944df9ebc6b283d6591a6b5f4c586d0eb9c6131c915f1b20fb36127ade83720d", size = 32924, upload-time = "2024-11-12T11:23:53.435Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/ef/a81fac59ca7a13fd26321d59a54841f70f76ce91b5884c001d77f534b3b1/yappi-1.6.10-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3736ea6458edbabd96918d88e2963594823e4ab4c58d62a52ef81f6b5839ec19", size = 77308, upload-time = "2024-11-12T11:23:55.393Z" },
+    { url = "https://files.pythonhosted.org/packages/62/59/8fdcb2a660388a7778c52cdfa0c52654955cf7953f85efacd8fd771f8da0/yappi-1.6.10-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f27bbc3311a3662231cff395d38061683fac5c538f3bab6796ff05511d2cce43", size = 81347, upload-time = "2024-11-12T11:23:56.926Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/28/62d8f97a62eafc443bb057442ae75b7f4741230c2dd774c5b7002bc05a4e/yappi-1.6.10-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:354cf94d659302b421b13c03487f2f1bce969b97b85fba88afb11f2ef83c35f3", size = 76239, upload-time = "2024-11-12T11:23:57.93Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/aa/ea0dbf6e00c7dcb81b4d84d35f6e0584c448674fc19533ddb3198533d41b/yappi-1.6.10-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1d82839835ae2c291b88fb56d82f80c88c00d76df29f3c1ed050db73b553bef0", size = 78712, upload-time = "2024-11-12T11:23:59.171Z" },
+    { url = "https://files.pythonhosted.org/packages/88/72/81acfc73b5d66031284c7b4d384200d016f96e26038466269ed139114e98/yappi-1.6.10-cp313-cp313-win32.whl", hash = "sha256:fc84074575afcc5a2a712e132c0b51541b7434b3099be99f573964ef3b6064a8", size = 32026, upload-time = "2024-11-12T11:24:00.305Z" },
+    { url = "https://files.pythonhosted.org/packages/23/71/47f12130412703a6816dba27ebd0aa853612ea6fbe3f93f7698c3520ea92/yappi-1.6.10-cp313-cp313-win_amd64.whl", hash = "sha256:334b31dfefae02bc28b7cd50953aaaae3292e40c15efb613792e4a587281a161", size = 34471, upload-time = "2024-11-12T11:24:01.378Z" },
+]
+
 [[package]]
 name = "yarl"
 version = "1.20.1"
@@ -5713,7 +7205,7 @@ wheels = [
 
 [[package]]
 name = "zarr"
-version = "3.0.8"
+version = "3.1.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "donfig" },
@@ -5722,9 +7214,9 @@ dependencies = [
     { name = "packaging" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/52/60/9652fd0536fbaca8d08cbc1a5572c52e0ce01773297df75da8bb47e45907/zarr-3.0.8.tar.gz", hash = "sha256:88505d095af899a88ae8ac4db02f4650ef0801d2ff6f65b6d1f0a45dcf760a6d", size = 256825, upload-time = "2025-05-19T14:19:00.123Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/15/a9/29fe1800380092ae03ac6207d757f3e5affaf1fcd2e5ef074cf4fc68f0fa/zarr-3.1.1.tar.gz", hash = "sha256:17db72f37f2489452d2137ac891c4133b8f976f9189d8efd3e75f3b3add84e8c", size = 314075, upload-time = "2025-07-30T11:51:36.81Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/00/3b/e20bdf84088c11f2c396d034506cbffadd53e024111c1aa4585c2aba1523/zarr-3.0.8-py3-none-any.whl", hash = "sha256:7f81e7aec086437d98882aa432209107114bd7f3a9f4958b2af9c6b5928a70a7", size = 205364, upload-time = "2025-05-19T14:18:58.789Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/48/bde2f58cfbc9fd6ab844e2f2fd79d5e54195c12a17aa9b47c0b0e701a421/zarr-3.1.1-py3-none-any.whl", hash = "sha256:9a0b7e7c27bf62965b8eef6b8b8fdb9b47381f0738be35e40f37be6479b546be", size = 255373, upload-time = "2025-07-30T11:51:34.623Z" },
 ]
 
 [[package]]