diff --git a/.gitmodules b/.gitmodules
index 307a59361d..4c94abbb10 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,12 +1,12 @@
-[submodule "3rdparty/NeMo"]
-	path = 3rdparty/NeMo-workspace/NeMo
-	url = https://github.com/NVIDIA/NeMo.git
-	branch = pjin/ashors/rl-qwen3-export
-	shallow = true
 [submodule "3rdparty/Megatron-LM"]
 	path = 3rdparty/Megatron-LM-workspace/Megatron-LM
 	url = https://github.com/terrykong/Megatron-LM.git
-	branch = sahilj/megatron-external-loss-norm
+	branch = yuya/nemo-rl-use
+	shallow = true
+[submodule "3rdparty/Megatron-Bridge"]
+	path = 3rdparty/Megatron-Bridge-workspace/Megatron-Bridge
+	url = https://github.com/NVIDIA-NeMo/Megatron-Bridge.git
+	branch = yuya/nemo-rl-use-chunkpatch
 	shallow = true
 [submodule "3rdparty/Automodel-workspace/Automodel"]
 	path = 3rdparty/Automodel-workspace/Automodel
diff --git a/3rdparty/Megatron-Bridge-workspace/Megatron-Bridge b/3rdparty/Megatron-Bridge-workspace/Megatron-Bridge
new file mode 160000
index 0000000000..a1bbfc2429
--- /dev/null
+++ b/3rdparty/Megatron-Bridge-workspace/Megatron-Bridge
@@ -0,0 +1 @@
+Subproject commit a1bbfc2429a23786a0a288ac55437fc931c567bd
diff --git a/nemo_rl/models/megatron/converters/__init__.py b/3rdparty/Megatron-Bridge-workspace/is_megatron_bridge_installed.py
similarity index 70%
rename from nemo_rl/models/megatron/converters/__init__.py
rename to 3rdparty/Megatron-Bridge-workspace/is_megatron_bridge_installed.py
index 3563de4959..b2ae4cf651 100644
--- a/nemo_rl/models/megatron/converters/__init__.py
+++ b/3rdparty/Megatron-Bridge-workspace/is_megatron_bridge_installed.py
@@ -11,17 +11,11 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+try:
+    from megatron.bridge import AutoBridge  # noqa: F401
 
-from .common import (
-    get_global_expert_num,
-    get_global_layer_num,
-    get_local_expert_num,
-    get_local_layer_num,
-)
+    INSTALLED = True
+except Exception:
+    INSTALLED = False
 
-__all__ = [
-    "get_global_expert_num",
-    "get_global_layer_num",
-    "get_local_expert_num",
-    "get_local_layer_num",
-]
+print(f"Megatron Bridge {INSTALLED=}")
diff --git a/3rdparty/Megatron-Bridge-workspace/pyproject.toml b/3rdparty/Megatron-Bridge-workspace/pyproject.toml
new file mode 100644
index 0000000000..b76ae67595
--- /dev/null
+++ b/3rdparty/Megatron-Bridge-workspace/pyproject.toml
@@ -0,0 +1,14 @@
+[build-system]
+requires = [
+    "setuptools>=61.0",
+    "wheel",
+]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "megatron-bridge"
+dynamic = ["dependencies", "version"]
+authors = [{ name = "NVIDIA", email = "nemo-toolkit@nvidia.com" }]
+description = "Standalone packaging for the Megatron Bridge sub-module."
+requires-python = ">=3.10"
+
diff --git a/3rdparty/Megatron-Bridge-workspace/setup.py b/3rdparty/Megatron-Bridge-workspace/setup.py
new file mode 100644
index 0000000000..d12fa2d8cb
--- /dev/null
+++ b/3rdparty/Megatron-Bridge-workspace/setup.py
@@ -0,0 +1,115 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import sys
+import tomllib
+
+import setuptools
+
+# Conditional packaging mirroring NeMo and Megatron-LM workspaces
+final_packages = []
+final_package_dir = {}
+
+# If the submodule is present, expose `megatron.bridge` package from the checkout
+bridge_src_dir = "Megatron-Bridge/src/megatron/bridge"
+bridge_package_name = "megatron.bridge"
+
+CACHED_DEPENDENCIES = [
+    "accelerate>=1.6.0",
+    "datasets",
+    "numpy<2",
+    "omegaconf>=2.3.0",
+    "packaging",
+    "tensorboard>=2.19.0",
+    "torch",
+    "transformers>=4.51.3",
+    "typing-extensions",
+    "rich",
+    "wandb>=0.19.10",
+    "six>=1.17.0",
+    "regex>=2024.11.6",
+    "pyyaml>=6.0.2",
+    "einops>=0.8.1",
+    "sentencepiece>=0.2.0",
+    "tiktoken>=0.9.0",
+    "tqdm>=4.67.1",
+    "hydra-core>1.3,<=1.3.2",
+    "megatron-core>=0.14.0a0,<0.15.0",
+    "nvidia-modelopt[torch,onnx]>=0.33.0a0,<0.34.0; sys_platform != 'darwin'",
+    "nvidia-resiliency-ext>=0.4.0a0,<0.5.0; sys_platform != 'darwin'",
+    "transformer-engine[pytorch]>=2.5.0a0,<2.6.0; sys_platform != 'darwin'",
+]
+
+# If the bridge source exists, compare cached dependencies with the submodule's pyproject
+if os.path.exists(bridge_src_dir):
+    pyproject_path = os.path.join("Megatron-Bridge", "pyproject.toml")
+    if not os.path.exists(pyproject_path):
+        raise FileNotFoundError(
+            f"[megatron-bridge][setup] {pyproject_path} not found; skipping dependency consistency check."
+        )
+
+    with open(pyproject_path, "rb") as f:
+        data = tomllib.load(f)
+    project = data["project"]
+    deps_list = project["dependencies"]
+    submodule_deps = set(str(d).strip() for d in deps_list)
+
+    missing_in_cached = submodule_deps - set(CACHED_DEPENDENCIES)
+    extra_in_cached = set(CACHED_DEPENDENCIES) - submodule_deps
+
+    if missing_in_cached or extra_in_cached:
+        print(
+            "[megatron-bridge][setup] Dependency mismatch between Megatron-Bridge-workspace/Megatron-Bridge/pyproject.toml vs Megatron-Bridge-workspace/setup.py::CACHED_DEPENDENCIES.",
+            file=sys.stderr,
+        )
+        if missing_in_cached:
+            print(
+                "  - Present in Megatron-Bridge/pyproject.toml but missing from CACHED_DEPENDENCIES:",
+                file=sys.stderr,
+            )
+            for dep in sorted(missing_in_cached):
+                print(f"    * {dep}", file=sys.stderr)
+        if extra_in_cached:
+            print(
+                "  - Present in CACHED_DEPENDENCIES but not in Megatron-Bridge/pyproject.toml:",
+                file=sys.stderr,
+            )
+            for dep in sorted(extra_in_cached):
+                print(f"    * {dep}", file=sys.stderr)
+        print(
+            "  Please update CACHED_DEPENDENCIES or the submodule pyproject to keep them in sync.",
+            file=sys.stderr,
+        )
+        sys.exit(1)
+    else:
+        print(
+            "[megatron-bridge][setup] Dependency sets are consistent with the submodule pyproject.",
+            file=sys.stderr,
+        )
+
+if os.path.exists(bridge_src_dir):
+    final_packages.append(bridge_package_name)
+    final_package_dir[bridge_package_name] = bridge_src_dir
+
+setuptools.setup(
+    name="megatron-bridge",
+    version="0.0.0",
+    description="Standalone packaging for the Megatron Bridge sub-module.",
+    author="NVIDIA",
+    author_email="nemo-toolkit@nvidia.com",
+    packages=final_packages,
+    package_dir=final_package_dir,
+    py_modules=["is_megatron_bridge_installed"],
+    install_requires=CACHED_DEPENDENCIES,
+)
diff --git a/3rdparty/Megatron-LM-workspace/Megatron-LM b/3rdparty/Megatron-LM-workspace/Megatron-LM
index 2ff0f099ff..e2d5bcd605 160000
--- a/3rdparty/Megatron-LM-workspace/Megatron-LM
+++ b/3rdparty/Megatron-LM-workspace/Megatron-LM
@@ -1 +1 @@
-Subproject commit 2ff0f099ffc30ffd152e3e29e921a1609d00855c
+Subproject commit e2d5bcd605108e2cf64fdb91fdfc669f10a57f56
diff --git a/3rdparty/NeMo-workspace/NeMo b/3rdparty/NeMo-workspace/NeMo
deleted file mode 160000
index 5c42641e34..0000000000
--- a/3rdparty/NeMo-workspace/NeMo
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 5c42641e344a487c7ca5b253a7483f0af8ef40e6
diff --git a/3rdparty/NeMo-workspace/is_nemo_installed.py b/3rdparty/NeMo-workspace/is_nemo_installed.py
deleted file mode 100644
index 4eeadc0006..0000000000
--- a/3rdparty/NeMo-workspace/is_nemo_installed.py
+++ /dev/null
@@ -1,27 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import contextlib
-import io
-
-try:
-    with (
-        contextlib.redirect_stdout(io.StringIO()),
-        contextlib.redirect_stderr(io.StringIO()),
-    ):
-        # Silence the logging because NeMo is very verbose
-        from nemo.tron.init import initialize_megatron  # noqa: F401
-    INSTALLED = True
-except ImportError:
-    INSTALLED = False
-print(f"NeMo {INSTALLED=}")
diff --git a/3rdparty/NeMo-workspace/pyproject.toml b/3rdparty/NeMo-workspace/pyproject.toml
deleted file mode 100644
index 3eb6af1c86..0000000000
--- a/3rdparty/NeMo-workspace/pyproject.toml
+++ /dev/null
@@ -1,10 +0,0 @@
-[build-system]
-requires = ["setuptools>=61.0", "wheel"]
-
-[project]
-name = "nemo-tron"
-dynamic = ["dependencies", "version"]
-authors = [{ name = "NVIDIA", email = "nemo-toolkit@nvidia.com" }]
-description = "Standalone packaging for the NeMo Tron sub-module."
-requires-python = ">=3.10"
-# Dependencies will be managed in setup.py
diff --git a/3rdparty/NeMo-workspace/setup.py b/3rdparty/NeMo-workspace/setup.py
deleted file mode 100644
index 6bc940202c..0000000000
--- a/3rdparty/NeMo-workspace/setup.py
+++ /dev/null
@@ -1,55 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os
-
-import setuptools
-
-# --- Configuration Start ---
-final_packages = []
-final_package_dir = {}
-
-# --- nemo package conditional section ---
-nemo_package_source_dir = "NeMo/nemo"
-nemo_package_name = "nemo"
-
-if os.path.exists(nemo_package_source_dir):
-    final_packages.append(nemo_package_name)
-    final_package_dir[nemo_package_name] = nemo_package_source_dir
-# --- End of nemo package conditional section ---
-
-setuptools.setup(
-    name="nemo-tron",  # Must match [project].name in pyproject.toml
-    version="0.0.0",  # Must match [project].version in pyproject.toml
-    description="Standalone packaging for the NeMo Tron sub-module.",  # Can be sourced from pyproject.toml too
-    author="NVIDIA",
-    author_email="nemo-toolkit@nvidia.com",
-    packages=final_packages,
-    package_dir=final_package_dir,
-    py_modules=["is_nemo_installed"],
-    install_requires=[
-        "lightning",
-        "wget",
-        "onnx",
-        "fiddle",
-        "cloudpickle",
-        "braceexpand",
-        "webdataset",
-        "h5py",
-        "ijson",
-        "matplotlib",
-        "scikit-learn",
-        "nemo-run",
-        "hatchling",
-    ],
-)
diff --git a/nemo_rl/models/megatron/common.py b/nemo_rl/models/megatron/common.py
index 6ae03a64a0..c6efee6e93 100644
--- a/nemo_rl/models/megatron/common.py
+++ b/nemo_rl/models/megatron/common.py
@@ -16,6 +16,7 @@
 
 import torch
 import torch.distributed as dist
+from megatron.bridge.training.state import GlobalState
 from megatron.core.models.gpt import GPTModel
 from megatron.core.packed_seq_params import PackedSeqParams
 from megatron.core.parallel_state import (
@@ -26,7 +27,6 @@
     get_tensor_model_parallel_rank,
 )
 from megatron.training.utils import get_ltor_masks_and_position_ids
-from nemo.tron.state import GlobalState
 
 from nemo_rl.algorithms.loss_functions import LossFunction, SequencePackingLossWrapper
 from nemo_rl.distributed.batched_data_dict import BatchedDataDict
diff --git a/nemo_rl/models/megatron/community_import.py b/nemo_rl/models/megatron/community_import.py
index 6f8f0b08e2..c84d3fc145 100644
--- a/nemo_rl/models/megatron/community_import.py
+++ b/nemo_rl/models/megatron/community_import.py
@@ -13,50 +13,54 @@
 # limitations under the License.
 
 import os
+from typing import Optional
 
-from transformers import AutoConfig
+from megatron.bridge import AutoBridge
 
+from nemo_rl.models.policy import MegatronConfig
 
-def import_model_from_hf_name(hf_model_name: str, output_path: str):
-    hf_config = AutoConfig.from_pretrained(hf_model_name, trust_remote_code=True)
-    if hf_config.model_type == "llama":
-        from nemo.tron.converter.llama import HFLlamaImporter
 
-        print(f"Importing model {hf_model_name} to {output_path}...")
-        importer = HFLlamaImporter(
-            hf_model_name,
-            output_path=output_path,
-        )
-    elif hf_config.model_type == "qwen2":
-        from nemo.tron.converter.qwen import HFQwen2Importer
-
-        print(f"Importing model {hf_model_name} to {output_path}...")
-        importer = HFQwen2Importer(
-            hf_model_name,
-            output_path=output_path,
-        )
-    elif hf_config.model_type in ("qwen3", "qwen3_moe"):
-        from nemo.tron.converter.qwen import HFQwen3Importer
-
-        print(f"Importing model {hf_model_name} to {output_path}...")
-        importer = HFQwen3Importer(
-            hf_model_name,
-            output_path=output_path,
-        )
-    elif hf_config.model_type in ("deepseek_v2", "deepseek_v3"):
-        from nemo.tron.converter.deepseek import HFDeepSeekImporter
+def import_model_from_hf_name(
+    hf_model_name: str,
+    output_path: str,
+    megatron_config: Optional[MegatronConfig] = None,
+):
+    """Import a Hugging Face model into Megatron checkpoint format and save the Megatron checkpoint to the output path.
+
+    Args:
+        hf_model_name: Hugging Face model ID or local path (e.g., 'meta-llama/Llama-3.1-8B-Instruct').
+        output_path: Directory to write the Megatron checkpoint (e.g., /tmp/megatron_ckpt).
+        megatron_config: Optional megatron config with paralellism settings for distributed megatron model import.
+    """
+    bridge = AutoBridge.from_hf_pretrained(hf_model_name, trust_remote_code=True)
+
+    model_provider = bridge.to_megatron_provider(load_weights=True)
+
+    if megatron_config is not None:
+        model_provider.tensor_model_parallel_size = megatron_config[
+            "tensor_model_parallel_size"
+        ]
+        model_provider.pipeline_model_parallel_size = megatron_config[
+            "pipeline_model_parallel_size"
+        ]
+        model_provider.expert_model_parallel_size = megatron_config[
+            "expert_model_parallel_size"
+        ]
+        model_provider.expert_tensor_parallel_size = megatron_config[
+            "expert_tensor_parallel_size"
+        ]
+        model_provider.num_layers_in_first_pipeline_stage = megatron_config[
+            "num_layers_in_first_pipeline_stage"
+        ]
+        model_provider.num_layers_in_last_pipeline_stage = megatron_config[
+            "num_layers_in_last_pipeline_stage"
+        ]
+        model_provider.pipeline_dtype = megatron_config["pipeline_dtype"]
+    model_provider.initialize_model_parallel(seed=0)
+    megatron_model = model_provider.provide_distributed_model(wrap_with_ddp=False)
+
+    bridge.save_megatron_model(megatron_model, output_path)
 
-        print(f"Importing model {hf_model_name} to {output_path}...")
-        importer = HFDeepSeekImporter(
-            hf_model_name,
-            output_path=output_path,
-        )
-    else:
-        raise ValueError(
-            f"Unknown model type: {hf_config.model_type}. Currently, DeepSeek, Qwen and Llama are supported. "
-            "If you'd like to run with a different model, please raise an issue or consider adding your own converter."
-        )
-    importer.apply()
     # resetting mcore state
     import megatron.core.rerun_state_machine
 
@@ -75,33 +79,10 @@ def export_model_from_megatron(
             f"HF checkpoint already exists at {output_path}. Delete it to run or set overwrite=True."
         )
 
-    hf_config = AutoConfig.from_pretrained(hf_model_name, trust_remote_code=True)
-
-    if hf_config.model_type == "llama":
-        from nemo.tron.converter.llama import HFLlamaExporter
-
-        exporter_cls = HFLlamaExporter
-    elif hf_config.model_type == "qwen2":
-        from nemo.tron.converter.qwen import HFQwen2Exporter
+    bridge = AutoBridge.from_hf_pretrained(hf_model_name, trust_remote_code=True)
+    megatron_model = bridge.load_megatron_model(input_path)
+    bridge.save_hf_pretrained(megatron_model, output_path)
 
-        exporter_cls = HFQwen2Exporter
-
-    elif hf_config.model_type in ("qwen3", "qwen3_moe"):
-        from nemo.tron.converter.qwen import HFQwen3Exporter
-
-        exporter_cls = HFQwen3Exporter
-    else:
-        raise ValueError(
-            f"Unknown model: {hf_model_name}. Currently, only Qwen2, Qwen3 and Llama are supported. "
-            "If you'd like to run with a different model, please raise an issue or consider adding your own converter."
-        )
-    print(f"Exporting model {hf_model_name} to {output_path}...")
-    exporter = exporter_cls(
-        input_path=input_path,
-        output_path=output_path,
-        hf_tokenizer_path=hf_tokenizer_path,
-    )
-    exporter.apply()
     # resetting mcore state
     import megatron.core.rerun_state_machine
 
diff --git a/nemo_rl/models/megatron/converters/common.py b/nemo_rl/models/megatron/converters/common.py
deleted file mode 100644
index 92a4177608..0000000000
--- a/nemo_rl/models/megatron/converters/common.py
+++ /dev/null
@@ -1,523 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import re
-from collections import defaultdict
-from typing import Any
-
-import einops
-import numpy as np
-import torch
-from megatron.core import parallel_state
-from nemo.lightning.io.state import (
-    StateDictTransform,
-    TransformCTX,
-    _match_keys,
-    _ModelState,
-)
-from transformers import AutoConfig, AutoModelForCausalLM
-from transformers.integrations.accelerate import init_empty_weights
-
-import nemo_rl.models.megatron.converters.deepseek as deepseek_converter
-import nemo_rl.models.megatron.converters.llama as llama_converter
-import nemo_rl.models.megatron.converters.qwen2 as qwen2_converter
-import nemo_rl.models.megatron.converters.qwen3 as qwen3_converter
-
-_GROUP_TO_RANKS_CACHE = {}
-
-
-def get_local_layer_num(s):
-    """Assumes layer number is preceeded by 'layers.'."""
-    segments = s.split(".")
-    number = None
-    for i, segment in enumerate(segments):
-        if segment == "layers":
-            if segments[i + 1].isdigit():
-                number = int(segments[i + 1])
-                break
-    return number
-
-
-def get_local_expert_num(s):
-    """Assumes experts have 'experts.' in their name. Expert num succeeds '.weight'."""
-    segments = s.split(".")
-    if "experts" not in segments or segments[-1] == "_extra_state":
-        return None
-    number = int(segments[-1].strip("weight"))
-    return number
-
-
-def get_global_layer_num(s, cfg) -> int:
-    """Assumes layer number is preceeded by 'layers.'.
-
-    Assumes pipeline model parallel size is set.
-    In the state dict, the layer number is the local layer number (PP local).
-    This function converts the local layer number to the global layer number.
-    """
-    local_layer_num = get_local_layer_num(s)
-    assert local_layer_num is not None, f"Local layer number is None for {s}"
-    pp_rank = parallel_state.get_pipeline_model_parallel_rank()
-    pp_size = parallel_state.get_pipeline_model_parallel_world_size()
-
-    first_stage_layers = cfg.num_layers_in_first_pipeline_stage
-    last_stage_layers = cfg.num_layers_in_last_pipeline_stage
-
-    if first_stage_layers is None and last_stage_layers is None:
-        first_stage_layers = last_stage_layers = cfg.num_layers // pp_size
-    elif first_stage_layers is None:
-        first_stage_layers = (cfg.num_layers - last_stage_layers) // (pp_size - 1)
-    elif last_stage_layers is None:
-        last_stage_layers = (cfg.num_layers - first_stage_layers) // (pp_size - 1)
-
-    # Calculate global offset based on rank
-    if pp_rank == 0:
-        global_offset = 0
-    elif pp_rank == pp_size - 1:
-        global_offset = cfg.num_layers - last_stage_layers
-    else:
-        middle_layers = cfg.num_layers - first_stage_layers - last_stage_layers
-        layers_per_middle_stage = middle_layers // (pp_size - 2)
-        global_offset = first_stage_layers + (pp_rank - 1) * layers_per_middle_stage
-
-    return global_offset + local_layer_num
-
-
-def get_global_expert_num(s, cfg):
-    """Assumes experts have 'experts.' in their name. Expert num succeeds '.weight'.
-
-    Assumes expert model parallel size is set.
-    In the state dict, the expert number is the local expert number (expert local).
-    This function converts the local expert number to the global expert number.
-    """
-    local_expert_num = get_local_expert_num(s)
-    global_expert_num = (
-        parallel_state.get_expert_model_parallel_rank()
-        * cfg.num_moe_experts
-        // parallel_state.get_expert_model_parallel_world_size()
-        + local_expert_num
-    )
-    return global_expert_num
-
-
-def get_global_key_from_local_key(local_key, model_cfg):
-    local_layer = get_local_layer_num(local_key)
-    if local_layer is not None:
-        global_layer = get_global_layer_num(local_key, model_cfg)
-        # Replace the first occurrence of the digits after "layers." with the global layer number.
-        global_key = re.sub(r"(?<=layers\.)\d+", str(global_layer), local_key, count=1)
-    else:
-        global_key = local_key
-    local_expert = get_local_expert_num(global_key)
-    if local_expert is not None:
-        global_expert = get_global_expert_num(global_key, model_cfg)
-        # Replace the last occurrence of the digits after "weight" with the global expert number.
-        global_key = re.sub(r"(?<=weight)\d+", str(global_expert), global_key)
-    return global_key
-
-
-def split_fc1_tp(ctx: TransformCTX, linear_fc1: torch.Tensor):
-    # gate proj and up proj are mixed right now, and we need to reshape them
-    # [ gate_tp0 ]     [ gate_tp0 ]
-    # [  up_tp0  ] --\ [ gate_tp1 ] --\ (split gate)
-    # [ gate_tp1 ] --/ [  up_tp0  ] --/ (split  up)
-    # [  up_tp1  ]     [  up_tp1  ]
-    megatron_config = ctx.source.config
-    tp = megatron_config.tensor_model_parallel_size
-    linear_fc1 = einops.rearrange(linear_fc1, "(t c d) a1 ->  c (t d) a1", c=2, t=tp)
-    mlp_gate_proj_weight = linear_fc1[0]
-    mlp_up_proj_weight = linear_fc1[1]
-    return mlp_gate_proj_weight, mlp_up_proj_weight
-
-
-def split_fc1_etp(ctx: TransformCTX, linear_fc1: torch.Tensor):
-    # gate proj and up proj are mixed right now, and we need to reshape them
-    # [ gate_tp0 ]     [ gate_tp0 ]
-    # [  up_tp0  ] --\ [ gate_tp1 ] --\ (split gate)
-    # [ gate_tp1 ] --/ [  up_tp0  ] --/ (split  up)
-    # [  up_tp1  ]     [  up_tp1  ]
-    megatron_config = ctx.source.config
-    etp = megatron_config.expert_tensor_parallel_size
-    linear_fc1 = einops.rearrange(linear_fc1, "(t c d) a1 ->  c (t d) a1", c=2, t=etp)
-    mlp_gate_proj_weight = linear_fc1[0]
-    mlp_up_proj_weight = linear_fc1[1]
-    return mlp_gate_proj_weight, mlp_up_proj_weight
-
-
-def split_qkv_gpu(ctx: TransformCTX, linear_qkv: torch.Tensor):
-    """Split interleave-concatenated qkv to q, k, v.
-
-    Example: export layer linear_qkv to HF {q|k|v}_proj
-    """
-    megatron_config = ctx.source.config
-
-    head_num = megatron_config.num_attention_heads
-    num_query_groups = megatron_config.num_query_groups
-    heads_per_group = head_num // num_query_groups
-    # hidden_size = megatron_config.hidden_size
-    head_size = megatron_config.kv_channels
-    qkv_total_dim = head_num + 2 * num_query_groups
-
-    linear_qkv = linear_qkv.reshape([qkv_total_dim, head_size, -1])
-    # when converting base model (linear_qkv), hidden size = megatron_config.hidden_size
-    # when converting lora (linear_qkv.adapter.linear_out), hidden size = lora_r
-    hidden_size = linear_qkv.size(-1)
-    q_slice = torch.cat(
-        [
-            torch.arange(
-                (heads_per_group + 2) * i, (heads_per_group + 2) * i + heads_per_group
-            )
-            for i in range(num_query_groups)
-        ]
-    )
-    k_slice = torch.arange(heads_per_group, qkv_total_dim, (heads_per_group + 2))
-    v_slice = torch.arange(heads_per_group + 1, qkv_total_dim, (heads_per_group + 2))
-
-    q_proj = linear_qkv[q_slice].reshape(-1, hidden_size)
-    k_proj = linear_qkv[k_slice].reshape(-1, hidden_size)
-    v_proj = linear_qkv[v_slice].reshape(-1, hidden_size)
-
-    return q_proj, k_proj, v_proj
-
-
-def split_qkv_bias_gpu(ctx: TransformCTX, qkv_bias: torch.Tensor):
-    """Split interleave-concatenated qkv bias to separate q, k, v bias.
-
-    Example: export layer linear_qkv bias to HF {q|k|v}_proj bias
-    """
-    megatron_config = ctx.source.config
-
-    head_num = megatron_config.num_attention_heads
-    num_query_groups = megatron_config.num_query_groups
-    heads_per_group = head_num // num_query_groups
-    head_size = megatron_config.kv_channels
-    qkv_total_dim = head_num + 2 * num_query_groups
-
-    qkv_bias = qkv_bias.reshape([qkv_total_dim, head_size])
-    q_slice = torch.cat(
-        [
-            torch.arange(
-                (heads_per_group + 2) * i, (heads_per_group + 2) * i + heads_per_group
-            )
-            for i in range(num_query_groups)
-        ]
-    )
-    k_slice = torch.arange(heads_per_group, qkv_total_dim, (heads_per_group + 2))
-    v_slice = torch.arange(heads_per_group + 1, qkv_total_dim, (heads_per_group + 2))
-
-    q_bias = qkv_bias[q_slice].reshape(-1)
-    k_bias = qkv_bias[k_slice].reshape(-1)
-    v_bias = qkv_bias[v_slice].reshape(-1)
-
-    return q_bias, k_bias, v_bias
-
-
-def update_transforms_for_nemorl(export_transforms):
-    # In place update
-    for transform in export_transforms:
-        if transform.transform.__name__ == "split_fc1":
-            if (
-                "experts" in transform.source_key
-                and "shared_experts" not in transform.source_key
-            ):
-                transform.transform = split_fc1_etp
-            else:
-                transform.transform = split_fc1_tp
-        elif transform.transform.__name__ == "split_qkv":
-            # This transform previously moved qkv weights to cpu
-            transform.transform = split_qkv_gpu
-        elif transform.transform.__name__ == "split_qkv_bias":
-            # This transform previously moved qkv weights to cpu
-            transform.transform = split_qkv_bias_gpu
-    return export_transforms
-
-
-class MegatronToHFConverter:
-    def __init__(self, hf_model_name, megatron_model):
-        # We only care about the state_dict keys and the config, so we
-        # don't need to load the model weights
-        config = AutoConfig.from_pretrained(hf_model_name, trust_remote_code=True)
-        with init_empty_weights():
-            self.target_model = AutoModelForCausalLM.from_config(
-                config, trust_remote_code=True
-            )
-
-        local_keys = list(megatron_model.state_dict().keys())
-        global_keys = [
-            get_global_key_from_local_key(k, megatron_model.config) for k in local_keys
-        ]
-
-        pp_group = parallel_state.get_pipeline_model_parallel_group()
-        pp_world_size = torch.distributed.get_world_size(pp_group)
-        pp_gathered_global_keys = [None] * pp_world_size
-        torch.distributed.all_gather_object(
-            pp_gathered_global_keys, global_keys, group=pp_group
-        )
-        pp_gathered_global_keys = list({k for l in pp_gathered_global_keys for k in l})  # type: ignore
-
-        ep_group = parallel_state.get_expert_model_parallel_group()
-        ep_world_size = parallel_state.get_expert_model_parallel_world_size()
-        ep_gathered_global_keys = [None] * ep_world_size
-        torch.distributed.all_gather_object(
-            ep_gathered_global_keys, pp_gathered_global_keys, group=ep_group
-        )
-        ep_gathered_global_keys = list({k for l in ep_gathered_global_keys for k in l})
-
-        global_keys = ep_gathered_global_keys
-        global_keys_map = {k: None for k in global_keys}
-
-        if config.model_type == "qwen2":
-            self.export_mapping = qwen2_converter.get_export_mapping(megatron_model)
-            self.export_transforms = qwen2_converter.get_export_transforms(config)
-
-            def get_source_fn(
-                source_state_dict: dict[str, Any], source_config: dict[str, Any]
-            ) -> _ModelState:
-                return _ModelState(source_state_dict)
-
-            self.get_source_fn = get_source_fn
-        elif config.model_type in ("qwen3", "qwen3_moe"):
-            self.export_mapping = qwen3_converter.get_export_mapping(config)
-            self.export_transforms = qwen3_converter.get_export_transforms(config)
-
-            def get_source_fn(
-                source_state_dict: dict[str, Any], source_config: dict[str, Any]
-            ) -> _ModelState:
-                return _ModelState(source_state_dict)
-
-            self.get_source_fn = get_source_fn
-        elif config.model_type == "llama":
-            self.export_mapping = llama_converter.get_export_mapping()
-            self.export_transforms = llama_converter.get_export_transforms(config)
-
-            def get_source_fn(
-                source_state_dict: dict[str, Any], source_config: dict[str, Any]
-            ) -> _ModelState:
-                return _ModelState(source_state_dict)
-
-            self.get_source_fn = get_source_fn
-        elif config.model_type in ("deepseek_v2", "deepseek_v3"):
-            self.export_mapping = deepseek_converter.get_export_mapping(
-                source=global_keys_map,
-                source_config=megatron_model.config.__dict__,
-            )
-            self.export_transforms = deepseek_converter.get_export_transforms()
-            self.get_source_fn = deepseek_converter.get_source_fn
-        else:
-            raise ValueError(
-                f"No converter mapping and transforms found for {hf_model_name} with model_type {config.model_type}"
-            )
-
-        self.export_transforms = update_transforms_for_nemorl(self.export_transforms)
-
-        updated_global_keys_map = self.get_source_fn(
-            global_keys_map, megatron_model.config.__dict__
-        ).state_dict()
-
-        # Set the value of the state_dict to the megatron key name so that
-        # StateDictTransform will set the value of the target state dict to
-        # the megatron key name
-        dummy_source = _ModelState({k: k for k in updated_global_keys_map.keys()})
-
-        ctx = TransformCTX(
-            source=dummy_source,
-            source_state=dummy_source.state_dict(),
-            target=self.target_model,
-            target_state=self._get_empty_state_dict(),
-        )
-        for key, val in self.export_mapping.items():
-            ctx = StateDictTransform(key, val)(ctx)
-
-        for transform in self.export_transforms:
-            if type(transform.target_key) == tuple:
-                for t in transform.target_key:
-                    ctx = StateDictTransform(transform.source_key, t)(ctx)
-            else:
-                ctx = StateDictTransform(transform.source_key, transform.target_key)(
-                    ctx
-                )
-
-        hf_keys_to_megatron_keys = ctx.target_state
-        megatron_keys_to_hf_keys = defaultdict(set)
-        for hf_key, megatron_key in hf_keys_to_megatron_keys.items():
-            if isinstance(megatron_key, list):
-                for k in megatron_key:
-                    megatron_keys_to_hf_keys[k].add(hf_key)
-            else:
-                megatron_keys_to_hf_keys[megatron_key].add(hf_key)
-        self.megatron_keys_to_hf_keys = dict(megatron_keys_to_hf_keys)
-
-    def _get_empty_state_dict(self, source_keys=None):
-        if source_keys is None:
-            # If source_keys is None, then we use all the target model keys
-            target_keys = self.target_model.state_dict().keys()
-        else:
-            # Otherwise, we only use the target keys corresponding to the source_keys
-            target_keys = set()
-            for k in source_keys:
-                target_keys = target_keys.union(self.megatron_keys_to_hf_keys[k])
-
-        state_dict = {k: None for k in target_keys}
-        return state_dict
-
-    def _group(
-        self,
-        state_dict,
-        key,
-        item,
-        main_state_dict_keys,
-        main_items,
-        exception_state_dict_keys_list,
-        exception_items,
-    ):
-        source_matches = _match_keys(list(state_dict.keys()), key)
-        if source_matches.size == 1 and source_matches == np.array(None):
-            # no match, don't include these keys
-            return
-        elif source_matches.ndim == 1:
-            # normal case
-            main_state_dict_keys.extend(source_matches)
-            main_items.append(item)
-        elif source_matches.ndim == 2:
-            for source_match in source_matches:
-                if None in source_match:
-                    # partial wildcard match case (e.g. an MoE layer with missing experts in this batch)
-                    non_none_sources = [s for s in source_match if s is not None]
-                    exception_state_dict_keys_list.append(non_none_sources)
-                    exception_items.append(item)
-                else:
-                    # normal case
-                    main_state_dict_keys.extend(source_match)
-                    main_items.append(item)
-        else:
-            raise NotImplementedError(
-                f"source_matches.ndim = {source_matches.ndim}. Expressions with more than 2 wildcard expressions are not supported."
-            )
-
-    def _get_groups(self, state_dict):
-        """This function is used to group mappings and transforms together.
-
-        Goes through the mappings and transforms once to collect mapping and transform groups
-        [(mapping, state_dict_keys)], [(transforms, state_dict_keys)] that can be converted
-        together.
-
-        This is necessary because:
-        1. If the mapping or transform expression has 2 wildcard expressions,
-           _match_keys assumes the matches for each wildcard are the same size. For example,
-           if the mapping is "layers.*.mlp.experts.*.linear_fc1.weight", where the first wildcard
-           matches the layer number and the second wildcard matches the expert number, it assumes
-           the number of experts is the same for each layer. This will fail in the case we're doing
-           batched streaming refit and the current state dict is missing experts from some layers.
-           To handle this, we separate out the partial keys (e.g. the ones corresponding to less experts)
-           in a separate group and run them through the mapping and transforms separately.
-
-           NOTE: this function currently only handles expressions with up to 2 wildcard expressions
-           and will fail if the mapping or transform expression has more than 2 wildcard expressions.
-
-        2. An expression matches 0 keys in the current state dict. This can happen during batched
-           streaming refit if the current state dict doesn't have any keys that match the expression.
-           To handle this, we skip these mapping/transforms.
-
-        """
-        # Most of the keys will be able to converted together (main)
-        # For the keys that can't be converted together (exception), we need to handle them separately
-        main_state_dict_keys: list[str] = []
-        exception_mappings_state_dict_keys_list: list[list[str]] = []
-        exception_transforms_state_dict_keys_list: list[list[str]] = []
-
-        main_mappings: list[tuple[str, Any]] = []
-        exception_mappings: list[tuple[str, Any]] = []
-        for key, val in self.export_mapping.items():
-            self._group(
-                state_dict,
-                key,
-                (key, val),
-                main_state_dict_keys,
-                main_mappings,
-                exception_mappings_state_dict_keys_list,
-                exception_mappings,
-            )
-
-        main_transforms = []
-        exception_transforms = []
-        for transform in self.export_transforms:
-            if type(transform.source_key) == tuple:
-                source_keys = transform.source_key
-            else:
-                source_keys = (transform.source_key,)
-            for source_key in source_keys:
-                self._group(
-                    state_dict,
-                    source_key,
-                    transform,
-                    main_state_dict_keys,
-                    main_transforms,
-                    exception_transforms_state_dict_keys_list,
-                    exception_transforms,
-                )
-
-        mapping_groups = [({k: v for k, v in main_mappings}, main_state_dict_keys)]
-        for (k, v), exception_state_dict_keys in zip(
-            exception_mappings, exception_mappings_state_dict_keys_list
-        ):
-            mapping_groups.append(({k: v}, exception_state_dict_keys))
-        transform_groups = [(main_transforms, main_state_dict_keys)]
-        for exception_transform, exception_state_dict_keys in zip(
-            exception_transforms, exception_transforms_state_dict_keys_list
-        ):
-            transform_groups.append(([exception_transform], exception_state_dict_keys))
-
-        return mapping_groups, transform_groups
-
-    def convert(self, state_dict, megatron_config):
-        state_dict = self.get_source_fn(
-            state_dict, megatron_config.__dict__
-        ).state_dict()
-
-        mapping_groups, transform_groups = self._get_groups(state_dict)
-
-        converted_state_dict = {}
-        for mapping, state_dict_keys in mapping_groups:
-            source = _ModelState({k: state_dict[k] for k in state_dict_keys})
-            source.config = megatron_config
-            ctx = TransformCTX(
-                source=source,
-                source_state=source.state_dict(),
-                target=self.target_model,
-                target_state=self._get_empty_state_dict(list(state_dict_keys)),
-            )
-
-            for key, val in mapping.items():
-                ctx = StateDictTransform(key, val)(ctx)
-
-            for k, v in ctx.target_state.items():
-                if v is not None:
-                    converted_state_dict[k] = v
-
-        for transforms, state_dict_keys in transform_groups:
-            source = _ModelState({k: state_dict[k] for k in state_dict_keys})
-            source.config = megatron_config
-            ctx = TransformCTX(
-                source=source,
-                source_state=source.state_dict(),
-                target=self.target_model,
-                target_state=self._get_empty_state_dict(list(state_dict_keys)),
-            )
-            for transform in transforms:
-                ctx = transform(ctx)
-
-            for k, v in ctx.target_state.items():
-                if v is not None:
-                    converted_state_dict[k] = v
-
-        return converted_state_dict
diff --git a/nemo_rl/models/megatron/converters/deepseek.py b/nemo_rl/models/megatron/converters/deepseek.py
deleted file mode 100644
index 512e4a8dbe..0000000000
--- a/nemo_rl/models/megatron/converters/deepseek.py
+++ /dev/null
@@ -1,128 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from typing import Any
-
-from nemo.lightning import io
-from nemo.lightning.io.state import TransformFns, _ModelState
-
-
-def get_export_mapping(source, source_config):
-    mapping = {
-        # Embed
-        "embedding.word_embeddings.weight": "model.embed_tokens.weight",
-        # Attention
-        "decoder.layers.*.input_layernorm.weight": "model.layers.*.input_layernorm.weight",
-        "decoder.layers.*.self_attention.linear_proj.weight": "model.layers.*.self_attn.o_proj.weight",
-        "decoder.layers.*.self_attention.linear_q_down_proj.weight": "model.layers.*.self_attn.q_a_proj.weight",
-        "decoder.layers.*.self_attention.linear_q_up_proj.weight": "model.layers.*.self_attn.q_b_proj.weight",
-        "decoder.layers.*.self_attention.linear_kv_down_proj.weight": "model.layers.*.self_attn.kv_a_proj_with_mqa.weight",
-        "decoder.layers.*.self_attention.linear_kv_up_proj.weight": "model.layers.*.self_attn.kv_b_proj.weight",
-        "decoder.layers.*.self_attention.linear_q_up_proj.layer_norm_weight": "model.layers.*.self_attn.q_a_layernorm.weight",
-        "decoder.layers.*.self_attention.linear_kv_up_proj.layer_norm_weight": "model.layers.*.self_attn.kv_a_layernorm.weight",
-        "decoder.layers.*.pre_mlp_layernorm.weight": "model.layers.*.post_attention_layernorm.weight",
-        # Dense MLP
-        "decoder.layers.*.mlp.linear_fc2.weight": "model.layers.*.mlp.down_proj.weight",
-        # MoE
-        "decoder.layers.*.mlp.router.weight": "model.layers.*.mlp.gate.weight",
-        "decoder.layers.*.mlp.experts.linear_fc2.weight*": "model.layers.*.mlp.experts.*.down_proj.weight",
-        "decoder.layers.*.mlp.shared_experts.linear_fc2.weight": "model.layers.*.mlp.shared_experts.down_proj.weight",
-        # LM Head
-        "decoder.final_layernorm.weight": "model.norm.weight",
-        "output_layer.weight": "lm_head.weight",
-    }
-    # For lite model
-    if source_config["q_lora_rank"] is None:
-        del mapping["decoder.layers.*.self_attention.linear_q_down_proj.weight"]
-        del mapping["decoder.layers.*.self_attention.linear_q_up_proj.weight"]
-        mapping["decoder.layers.*.self_attention.linear_q_proj.weight"] = (
-            "model.layers.*.self_attn.q_proj.weight"
-        )
-    # Account for Mcore local spec
-    if (
-        source_config["q_lora_rank"] is not None
-        and "decoder.layers.0.self_attention.q_layernorm.weight" in source
-    ):
-        mapping["decoder.layers.*.self_attention.q_layernorm.weight"] = mapping.pop(
-            "decoder.layers.*.self_attention.linear_q_up_proj.layer_norm_weight"
-        )
-
-    if "decoder.layers.0.self_attention.kv_layernorm.weight" in source:
-        mapping["decoder.layers.*.self_attention.kv_layernorm.weight"] = mapping.pop(
-            "decoder.layers.*.self_attention.linear_kv_up_proj.layer_norm_weight"
-        )
-
-    if source_config.get("moe_router_enable_expert_bias", False):
-        mapping.update(
-            {
-                "decoder.layers.*.mlp.router.expert_bias": "model.layers.*.mlp.gate.e_score_correction_bias",
-            }
-        )
-    return mapping
-
-
-def get_export_transforms():
-    transforms = [
-        io.state_transform(
-            source_key="decoder.layers.*.mlp.linear_fc1.weight",
-            target_key=(
-                "model.layers.*.mlp.gate_proj.weight",
-                "model.layers.*.mlp.up_proj.weight",
-            ),
-            fn=TransformFns.split_fc1,
-        ),
-        io.state_transform(
-            source_key="decoder.layers.*.mlp.experts.linear_fc1.weight*",
-            target_key=(
-                "model.layers.*.mlp.experts.*.gate_proj.weight",
-                "model.layers.*.mlp.experts.*.up_proj.weight",
-            ),
-            fn=TransformFns.split_fc1,
-        ),
-        io.state_transform(
-            source_key="decoder.layers.*.mlp.shared_experts.linear_fc1.weight",
-            target_key=(
-                "model.layers.*.mlp.shared_experts.gate_proj.weight",
-                "model.layers.*.mlp.shared_experts.up_proj.weight",
-            ),
-            fn=TransformFns.split_fc1,
-        ),
-    ]
-    return transforms
-
-
-def get_source_fn(
-    source_state_dict: dict[str, Any], source_config: dict[str, Any]
-) -> _ModelState:
-    """Modify source state_dict before conversion.
-
-    In deepseek, HF weight `model.layers.*.post_attention_layernorm.weight` is mapped to mcore weight
-    a) `decoder.layers.*.mlp.linear_fc1.layer_norm_weight`, if the layer is dense
-    b) `decoder.layers.*.pre_mlp_layernorm.weight`, if the layer is MoE
-
-    We rename decoder.layers.*.mlp.linear_fc1.layer_norm_weight in the first case to unify key names
-    """
-    for layer_i in range(source_config["num_layers"]):
-        if (
-            f"decoder.layers.{layer_i}.mlp.linear_fc1.layer_norm_weight"
-            in source_state_dict
-        ):
-            weight = source_state_dict.pop(
-                f"decoder.layers.{layer_i}.mlp.linear_fc1.layer_norm_weight"
-            )
-            source_state_dict[f"decoder.layers.{layer_i}.pre_mlp_layernorm.weight"] = (
-                weight
-            )
-    modified_source = _ModelState(source_state_dict)
-    return modified_source
diff --git a/nemo_rl/models/megatron/converters/llama.py b/nemo_rl/models/megatron/converters/llama.py
deleted file mode 100644
index 101378f86e..0000000000
--- a/nemo_rl/models/megatron/converters/llama.py
+++ /dev/null
@@ -1,65 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from nemo.lightning import io
-from nemo.lightning.io.state import TransformFns
-
-
-def get_export_mapping():
-    mapping = {
-        "decoder.layers.*.self_attention.linear_proj.weight": "model.layers.*.self_attn.o_proj.weight",
-        "decoder.layers.*.mlp.linear_fc2.weight": "model.layers.*.mlp.down_proj.weight",
-        "decoder.layers.*.self_attention.linear_qkv.layer_norm_weight": "model.layers.*.input_layernorm.weight",
-        "decoder.layers.*.mlp.linear_fc1.layer_norm_weight": "model.layers.*.post_attention_layernorm.weight",
-        "decoder.final_layernorm.weight": "model.norm.weight",
-    }
-    return mapping
-
-
-def get_export_transforms(hf_config):
-    transforms = [
-        io.state_transform(
-            source_key="decoder.layers.*.self_attention.linear_qkv.weight",
-            target_key=(
-                "model.layers.*.self_attn.q_proj.weight",
-                "model.layers.*.self_attn.k_proj.weight",
-                "model.layers.*.self_attn.v_proj.weight",
-            ),
-            fn=TransformFns.split_qkv,
-        ),
-        io.state_transform(
-            source_key="decoder.layers.*.mlp.linear_fc1.weight",
-            target_key=(
-                "model.layers.*.mlp.gate_proj.weight",
-                "model.layers.*.mlp.up_proj.weight",
-            ),
-            fn=TransformFns.split_fc1,
-        ),
-        io.state_transform(
-            source_key="embedding.word_embeddings.weight",
-            target_key="model.embed_tokens.weight",
-            fn=TransformFns.prune_padding,
-        ),
-    ]
-
-    if not hf_config.tie_word_embeddings:
-        transforms.append(
-            io.state_transform(
-                source_key="output_layer.weight",
-                target_key="lm_head.weight",
-                fn=TransformFns.prune_padding,
-            )
-        )
-
-    return transforms
diff --git a/nemo_rl/models/megatron/converters/qwen2.py b/nemo_rl/models/megatron/converters/qwen2.py
deleted file mode 100644
index 92fbf84e88..0000000000
--- a/nemo_rl/models/megatron/converters/qwen2.py
+++ /dev/null
@@ -1,74 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from nemo.lightning import io
-from nemo.lightning.io.state import TransformFns
-
-
-def get_export_mapping(source):
-    mapping = {
-        "decoder.layers.*.self_attention.linear_proj.weight": "model.layers.*.self_attn.o_proj.weight",
-        "decoder.layers.*.mlp.linear_fc2.weight": "model.layers.*.mlp.down_proj.weight",
-        "decoder.layers.*.self_attention.linear_qkv.layer_norm_weight": "model.layers.*.input_layernorm.weight",
-        "decoder.layers.*.mlp.linear_fc1.layer_norm_weight": "model.layers.*.post_attention_layernorm.weight",
-        "decoder.final_layernorm.weight": "model.norm.weight",
-    }
-    return mapping
-
-
-def get_export_transforms(hf_config):
-    transforms = [
-        io.state_transform(
-            source_key="decoder.layers.*.self_attention.linear_qkv.weight",
-            target_key=(
-                "model.layers.*.self_attn.q_proj.weight",
-                "model.layers.*.self_attn.k_proj.weight",
-                "model.layers.*.self_attn.v_proj.weight",
-            ),
-            fn=TransformFns.split_qkv,
-        ),
-        io.state_transform(
-            source_key="decoder.layers.*.self_attention.linear_qkv.bias",
-            target_key=(
-                "model.layers.*.self_attn.q_proj.bias",
-                "model.layers.*.self_attn.k_proj.bias",
-                "model.layers.*.self_attn.v_proj.bias",
-            ),
-            fn=TransformFns.split_qkv_bias,
-        ),
-        io.state_transform(
-            source_key="decoder.layers.*.mlp.linear_fc1.weight",
-            target_key=(
-                "model.layers.*.mlp.gate_proj.weight",
-                "model.layers.*.mlp.up_proj.weight",
-            ),
-            fn=TransformFns.split_fc1,
-        ),
-        io.state_transform(
-            source_key="embedding.word_embeddings.weight",
-            target_key="model.embed_tokens.weight",
-            fn=TransformFns.prune_padding,
-        ),
-    ]
-
-    if not hf_config.tie_word_embeddings:
-        transforms.append(
-            io.state_transform(
-                source_key="output_layer.weight",
-                target_key="lm_head.weight",
-                fn=TransformFns.prune_padding,
-            ),
-        )
-
-    return transforms
diff --git a/nemo_rl/models/megatron/converters/qwen3.py b/nemo_rl/models/megatron/converters/qwen3.py
deleted file mode 100644
index 1dcb278106..0000000000
--- a/nemo_rl/models/megatron/converters/qwen3.py
+++ /dev/null
@@ -1,89 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from nemo.lightning import io
-from nemo.lightning.io.state import TransformFns
-
-
-def get_export_mapping(config):
-    mapping = {
-        "**.self_attention.linear_proj.weight": "**.self_attn.o_proj.weight",
-        "**.self_attention.linear_qkv.layer_norm_weight": "**.input_layernorm.weight",
-        "**.self_attention.q_layernorm.weight": "**.self_attn.q_norm.weight",
-        "**.self_attention.k_layernorm.weight": "**.self_attn.k_norm.weight",
-        "decoder.final_layernorm.weight": "model.norm.weight",
-    }
-    is_moe = getattr(config, "num_experts", 0) > 0
-    if is_moe:
-        mapping.update(
-            {
-                "**.mlp.experts.linear_fc2.weight*": "**.mlp.experts.*.down_proj.weight",
-                "**.mlp.router.weight": "**.mlp.gate.weight",
-                "**.pre_mlp_layernorm.weight": "**.post_attention_layernorm.weight",
-            }
-        )
-    else:
-        mapping.update(
-            {
-                "**.mlp.linear_fc2.weight": "**.mlp.down_proj.weight",
-                "**.mlp.linear_fc1.layer_norm_weight": "**.post_attention_layernorm.weight",
-            }
-        )
-    return mapping
-
-
-def get_export_transforms(config):
-    is_moe = getattr(config, "num_experts", 0) > 0
-    transforms = [
-        io.state_transform(
-            source_key="**.self_attention.linear_qkv.weight",
-            target_key=(
-                "**.self_attn.q_proj.weight",
-                "**.self_attn.k_proj.weight",
-                "**.self_attn.v_proj.weight",
-            ),
-            fn=TransformFns.split_qkv,
-        ),
-        (
-            io.state_transform(
-                source_key="**.mlp.linear_fc1.weight",
-                target_key=("**.mlp.gate_proj.weight", "**.mlp.up_proj.weight"),
-                fn=TransformFns.split_fc1,
-            )
-            if not is_moe
-            else io.state_transform(
-                source_key="**.mlp.experts.linear_fc1.weight*",
-                target_key=(
-                    "**.mlp.experts.*.gate_proj.weight",
-                    "**.mlp.experts.*.up_proj.weight",
-                ),
-                fn=TransformFns.split_fc1,
-            )
-        ),
-        io.state_transform(
-            source_key="embedding.word_embeddings.weight",
-            target_key="model.embed_tokens.weight",
-            fn=TransformFns.prune_padding,
-        ),
-    ]
-    if not config.tie_word_embeddings:
-        transforms.append(
-            io.state_transform(
-                source_key="output_layer.weight",
-                target_key="lm_head.weight",
-                fn=TransformFns.prune_padding,
-            )
-        )
-
-    return transforms
diff --git a/nemo_rl/models/megatron/refit_utils.py b/nemo_rl/models/megatron/refit_utils.py
deleted file mode 100644
index af6ffa2634..0000000000
--- a/nemo_rl/models/megatron/refit_utils.py
+++ /dev/null
@@ -1,331 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import re
-import time
-from typing import Any, List, Tuple
-
-import torch
-from megatron.core import parallel_state
-from megatron.core.extensions.transformer_engine import (
-    TEColumnParallelGroupedLinear,
-    TEColumnParallelLinear,
-    TERowParallelGroupedLinear,
-    TERowParallelLinear,
-)
-from megatron.core.tensor_parallel.layers import (
-    ColumnParallelLinear,
-    RowParallelLinear,
-    VocabParallelEmbedding,
-)
-from torch.distributed import get_process_group_ranks
-
-from nemo_rl.models.megatron.converters.common import get_global_key_from_local_key
-
-
-def get_tp_dim(model, param_name, named_modules_dict):
-    # pass in named_modules_dict so we can get it ahead of time instead
-    # of once for each param
-    pattern = re.compile(r"\.(?:weight|bias)\d*$")
-    if not pattern.search(param_name):
-        return None
-
-    prefix = ""
-    if hasattr(model, "module"):
-        prefix = "module."
-        if hasattr(model.module, "module"):
-            prefix = "module.module."
-    key = prefix + ".".join(param_name.split(".")[:-1])
-    module = named_modules_dict.get(key)
-    if module is None:
-        return None
-    if hasattr(module, "parallel_mode") and module.parallel_mode is not None:
-        # TE layers sometimes have parallel_mode we can check directly
-        if module.parallel_mode == "column":
-            return 0
-        elif module.parallel_mode == "row":
-            return 1
-        else:
-            return None
-    elif isinstance(
-        module,
-        (
-            VocabParallelEmbedding,
-            ColumnParallelLinear,
-            TEColumnParallelGroupedLinear,
-            TEColumnParallelLinear,
-        ),
-    ):
-        return 0
-    elif isinstance(
-        module, (RowParallelLinear, TERowParallelGroupedLinear, TERowParallelLinear)
-    ):
-        return 1
-    else:
-        return None
-
-
-@torch.no_grad()
-def gather_params(model, keys: list[str], key_to_global_keys: dict[str, list[str]]):
-    st = time.perf_counter()
-
-    tp_group = parallel_state.get_tensor_model_parallel_group()
-    tp_world_size = torch.distributed.get_world_size(tp_group)
-    etp_group = parallel_state.get_expert_tensor_parallel_group()
-    etp_world_size = torch.distributed.get_world_size(etp_group)
-    pp_group = parallel_state.get_pipeline_model_parallel_group()
-    pp_world_size = torch.distributed.get_world_size(pp_group)
-    pp_global_ranks = torch.distributed.get_process_group_ranks(group=pp_group)
-    pp_local_rank_id = parallel_state.get_pipeline_model_parallel_rank()
-    ep_group = parallel_state.get_expert_model_parallel_group()
-    ep_world_size = torch.distributed.get_world_size(ep_group)
-
-    named_modules_dict = dict(model.named_modules())
-    state_dict = model.state_dict()
-    gathered_params = {}
-    ep_pattern = re.compile(r"mlp\.experts.*\.weight\d*$")
-
-    for local_key, owner_pp_local_rank_id, shape, dtype in sorted(keys):
-        if local_key in state_dict and owner_pp_local_rank_id == pp_local_rank_id:
-            param = state_dict[local_key]
-
-            tp_dim = get_tp_dim(model, local_key, named_modules_dict)
-
-            # If the parameter is TP-sharded, gather its slices on GPU.
-            if tp_dim is not None:
-                if ep_pattern.search(local_key):
-                    world_size = etp_world_size
-                    group = etp_group
-                else:
-                    world_size = tp_world_size
-                    group = tp_group
-
-                gathered_slices = [torch.empty_like(param) for _ in range(world_size)]
-                torch.distributed.all_gather(gathered_slices, param, group=group)
-                full_param = torch.cat(gathered_slices, dim=tp_dim)
-            else:
-                full_param = param
-        else:
-            full_param = torch.empty(
-                *shape, dtype=dtype, device=torch.cuda.current_device()
-            )
-
-        # Broadcast across PP group.
-        src_global_rank = pp_global_ranks[owner_pp_local_rank_id]
-
-        # Broadcast from the rank that has the parameter
-        torch.distributed.broadcast(full_param, src=src_global_rank, group=pp_group)
-        pp_gathered_params = [full_param]
-
-        # gather across EP group
-        if ep_pattern.search(local_key):
-            stacked_pp_gathered_params = torch.stack(pp_gathered_params)
-
-            ep_gathered_params = [
-                torch.empty(
-                    stacked_pp_gathered_params.shape,
-                    dtype=dtype,
-                    device=torch.cuda.current_device(),
-                )
-                for _ in range(ep_world_size)
-            ]
-            torch.distributed.all_gather(
-                ep_gathered_params, stacked_pp_gathered_params, group=ep_group
-            )
-            flat_gathered_params = [
-                x for y in ep_gathered_params for x in torch.unbind(y)
-            ]
-
-        else:
-            flat_gathered_params = pp_gathered_params
-
-        flat_gathered_global_keys = key_to_global_keys[
-            (local_key, owner_pp_local_rank_id)
-        ]
-        for k, p in zip(flat_gathered_global_keys, flat_gathered_params):
-            if k is not None:
-                gathered_params[k] = p
-
-    return gathered_params
-
-
-@torch.no_grad()
-def get_param_info(model, dtype):
-    # Get parallel info
-    tp_group = parallel_state.get_tensor_model_parallel_group()
-    tp_world_size = torch.distributed.get_world_size(tp_group)
-    tp_group_rank_ids = get_process_group_ranks(tp_group)
-
-    etp_group = parallel_state.get_expert_tensor_parallel_group()
-    etp_world_size = torch.distributed.get_world_size(etp_group)
-    etp_group_rank_ids = get_process_group_ranks(etp_group)
-
-    pp_group = parallel_state.get_pipeline_model_parallel_group()
-    pp_world_size = torch.distributed.get_world_size(pp_group)
-    pp_group_rank_ids = get_process_group_ranks(pp_group)
-    pp_local_rank_id = parallel_state.get_pipeline_model_parallel_rank()
-
-    ep_group = parallel_state.get_expert_model_parallel_group()
-    ep_world_size = torch.distributed.get_world_size(ep_group)
-    ep_group_rank_ids = get_process_group_ranks(ep_group)
-
-    # Collect parameter info
-    param_info = []
-
-    # Dictionary of modules we can quickly look up to check if a module has TP
-    named_modules_dict = dict(model.named_modules())
-
-    # Process each parameter in the model
-    # state_dict includes parameters and persistent buffers
-    ep_pattern = re.compile(r"mlp\.experts.*\.weight\d*$")
-    for name, param in model.state_dict().items():
-        # Skip _extra_state entries (these are metadata, not actual weights)
-        if "_extra_state" in name:
-            continue
-
-        use_etp = True if ep_pattern.search(name) else False
-        if use_etp:
-            tensor_mp_rank_ids = etp_group_rank_ids
-        else:
-            tensor_mp_rank_ids = tp_group_rank_ids
-
-        shape = list(param.shape)
-        tp_dim = get_tp_dim(model, name, named_modules_dict)
-        if tp_dim is not None:
-            tp_rank_ids = tuple(sorted(tensor_mp_rank_ids))
-            shape[tp_dim] *= len(tp_rank_ids)
-        else:
-            tp_rank_ids = (torch.distributed.get_rank(),)
-
-        pp_rank_ids = tuple(sorted(pp_group_rank_ids))
-        ep_rank_ids = tuple(sorted(ep_group_rank_ids))
-
-        if ep_pattern.search(name):
-            ep_rank_ids = tuple(sorted(ep_group_rank_ids))
-        else:
-            ep_rank_ids = (torch.distributed.get_rank(),)
-
-        # Calculate size for this parameter
-        prec_to_bytes = {
-            torch.bfloat16: 2,
-            torch.float16: 2,
-            torch.float32: 4,
-        }
-        scale = prec_to_bytes[dtype] / prec_to_bytes[param.dtype]
-        size_in_bytes = (
-            param.element_size()
-            * param.numel()
-            * len(tensor_mp_rank_ids)
-            * len(ep_rank_ids)
-            * scale
-        )
-        param_info.append(
-            (
-                (
-                    name,
-                    pp_local_rank_id,
-                    tuple(shape),
-                    param.dtype,
-                ),
-                size_in_bytes,
-            )
-        )
-    # Gather parameter info from all pipeline parallel ranks to ensure complete coverage
-    pp_group = parallel_state.get_pipeline_model_parallel_group()
-    pp_world_size = torch.distributed.get_world_size(pp_group)
-
-    # Gather all parameter info from all PP ranks
-    pp_gathered_param_infos = [None] * pp_world_size
-    torch.distributed.all_gather_object(
-        pp_gathered_param_infos, param_info, group=pp_group
-    )
-    pp_gathered_param_infos = [x for y in pp_gathered_param_infos for x in y]  # type: ignore
-
-    # Gather parameter info from all expert parallel ranks to ensure complete coverage
-    ep_group = parallel_state.get_expert_model_parallel_group()
-    ep_world_size = torch.distributed.get_world_size(ep_group)
-
-    # Gather all parameter info from all EP ranks
-    ep_gathered_param_infos = [None] * ep_world_size
-    torch.distributed.all_gather_object(
-        ep_gathered_param_infos, pp_gathered_param_infos, group=ep_group
-    )
-    all_param_infos = [x for y in ep_gathered_param_infos for x in y]
-
-    # Merge all parameter infos, keeping only unique parameter names
-    merged_param_info = []
-    seen_params = set()
-
-    for name, size in all_param_infos:
-        if name not in seen_params:
-            merged_param_info.append((name, size))
-            seen_params.add(name)
-
-    # Update param_info with the merged information
-    param_info = merged_param_info
-    print(f"Prepared {len(param_info)} tensors for refit")
-
-    return param_info
-
-
-@torch.no_grad()
-def get_local_key_to_global_keys(model, state_dict_info: List[Tuple[Any, int]]):
-    """Get the local key to global keys mapping."""
-    # Get parallel info
-    tp_group = parallel_state.get_tensor_model_parallel_group()
-    tp_world_size = torch.distributed.get_world_size(tp_group)
-
-    pp_group = parallel_state.get_pipeline_model_parallel_group()
-    pp_world_size = torch.distributed.get_world_size(pp_group)
-    pp_global_ranks = torch.distributed.get_process_group_ranks(group=pp_group)
-    pp_local_rank_id = parallel_state.get_pipeline_model_parallel_rank()
-
-    ep_group = parallel_state.get_expert_model_parallel_group()
-    ep_world_size = torch.distributed.get_world_size(ep_group)
-
-    # start calculating the global key
-    ep_pattern = re.compile(r"mlp\.experts.*\.weight\d*$")
-    state_dict = model.state_dict()
-    final_key_to_global_keys = {}
-
-    for param_info, size in state_dict_info:
-        local_key, owner_pp_local_rank_id, _, _ = param_info
-
-        # Step 1: create global key from local key
-        # if: for if a parameter is sharded along PP or EP;
-        # else: not sharded (like embedding)
-        pp_gathered_objs = [None]
-        if local_key in state_dict and owner_pp_local_rank_id == pp_local_rank_id:
-            pp_gathered_objs[0] = get_global_key_from_local_key(local_key, model.config)
-
-        # Step 2: gather global keys from ranks in PP group
-        src_global_rank = pp_global_ranks[owner_pp_local_rank_id]
-        torch.distributed.broadcast_object_list(
-            pp_gathered_objs, src=src_global_rank, group=pp_group
-        )
-
-        # Step 3: gather global keys from ranks in EP group
-        if ep_pattern.search(local_key):
-            ep_gathered_objs = [None] * ep_world_size
-            torch.distributed.all_gather_object(
-                ep_gathered_objs, pp_gathered_objs, group=ep_group
-            )
-            flat_gathered_objs = [x for y in ep_gathered_objs for x in y]
-        else:
-            flat_gathered_objs = pp_gathered_objs
-
-        final_key_to_global_keys[(local_key, owner_pp_local_rank_id)] = (
-            flat_gathered_objs
-        )
-
-    return final_key_to_global_keys
diff --git a/nemo_rl/models/policy/megatron_policy_worker.py b/nemo_rl/models/policy/megatron_policy_worker.py
index 70a80b1b8f..9f3be4526d 100644
--- a/nemo_rl/models/policy/megatron_policy_worker.py
+++ b/nemo_rl/models/policy/megatron_policy_worker.py
@@ -22,6 +22,42 @@
 
 import ray
 import torch
+from megatron.bridge import AutoBridge
+from megatron.bridge.models.model_provider import get_model
+from megatron.bridge.training import fault_tolerance
+from megatron.bridge.training.checkpointing import (
+    checkpoint_exists,
+    init_checkpointing_context,
+    load_checkpoint,
+    maybe_finalize_async_save,
+    save_checkpoint,
+)
+from megatron.bridge.training.config import (
+    CheckpointConfig,
+    ConfigContainer,
+    DistributedDataParallelConfig,
+    LoggerConfig,
+    OptimizerConfig,
+    SchedulerConfig,
+    TokenizerConfig,
+    TrainingConfig,
+)
+from megatron.bridge.training.initialize import (
+    initialize_megatron,
+    set_jit_fusion_options,
+)
+from megatron.bridge.training.optim import setup_optimizer
+from megatron.bridge.training.setup import (
+    HAVE_FSDP2,
+    _update_model_config_funcs,
+)
+from megatron.bridge.training.state import GlobalState
+from megatron.bridge.training.tokenizers.tokenizer import build_tokenizer
+from megatron.bridge.training.utils.train_utils import (
+    logical_and_across_model_parallel_group,
+    reduce_max_stat_across_model_parallel_group,
+)
+from megatron.bridge.utils.common_utils import get_rank_safe
 from megatron.core import parallel_state
 from megatron.core.distributed import DistributedDataParallel
 from megatron.core.distributed.custom_fsdp import (
@@ -56,34 +92,6 @@
     run_mcore_engine,
 )
 from megatron.training.utils import get_ltor_masks_and_position_ids
-from nemo.tron import fault_tolerance
-from nemo.tron.checkpointing import checkpoint_exists, load_checkpoint, save_checkpoint
-from nemo.tron.config import (
-    CheckpointConfig,
-    ConfigContainer,
-    DistributedDataParallelConfig,
-    LoggerConfig,
-    OptimizerConfig,
-    SchedulerConfig,
-    TokenizerConfig,
-    TrainingConfig,
-)
-from nemo.tron.init import initialize_megatron, set_jit_fusion_options
-from nemo.tron.model import get_model_from_config
-from nemo.tron.optim import setup_optimizer
-from nemo.tron.setup import (
-    HAVE_FSDP2,
-    _init_checkpointing_context,
-    _update_model_config_funcs,
-)
-from nemo.tron.state import GlobalState
-from nemo.tron.tokenizers.tokenizer import build_tokenizer
-from nemo.tron.utils.async_utils import maybe_finalize_async_save
-from nemo.tron.utils.common_utils import get_rank_safe
-from nemo.tron.utils.train_utils import (
-    logical_and_across_model_parallel_group,
-    reduce_max_stat_across_model_parallel_group,
-)
 from ray.util.queue import Queue
 from transformers import PreTrainedTokenizerBase
 
@@ -105,12 +113,6 @@
     forward_step_arbitrary_loss,
 )
 from nemo_rl.models.megatron.community_import import import_model_from_hf_name
-from nemo_rl.models.megatron.converters.common import MegatronToHFConverter
-from nemo_rl.models.megatron.refit_utils import (
-    gather_params,
-    get_local_key_to_global_keys,
-    get_param_info,
-)
 from nemo_rl.models.policy import PolicyConfig
 from nemo_rl.models.policy.interfaces import (
     LogprobOutputSpec,
@@ -128,6 +130,59 @@
 TokenizerType = TypeVar("TokenizerType", bound=PreTrainedTokenizerBase)
 
 
+def broadcast_object_across_pp_ranks(obj):
+    """Broadcast an object across pipeline parallel ranks.
+
+    This utility function handles broadcasting an object from the rank that owns it
+    to all other pipeline parallel ranks. If only one rank has the object (non-None),
+    it will be broadcast to all other ranks.
+
+    Args:
+        obj: The object to broadcast. Can be None on ranks that don't own it.
+
+    Returns:
+        The object on all ranks (either the original or the broadcast copy).
+
+    Raises:
+        ValueError: If the object doesn't exist on any pipeline parallel rank.
+    """
+    pp_size = get_pipeline_model_parallel_world_size()
+    pp_group = get_pipeline_model_parallel_group()
+
+    if pp_size == 1:
+        return obj
+
+    # ------------------------------------------------------------------
+    # 1. Gather presence flags from all PP ranks to find the source rank
+    # ------------------------------------------------------------------
+    has_obj = obj is not None
+    obj_flags = [None] * pp_size
+    torch.distributed.all_gather_object(obj_flags, has_obj, group=pp_group)
+
+    # ------------------------------------------------------------------
+    # 2. Identify the owning rank (the only rank with True flag)
+    # ------------------------------------------------------------------
+    src_rank = None  # Rank *inside* the PP group
+    for rank, flag in enumerate(obj_flags):
+        if flag:
+            src_rank = rank
+            break
+
+    if src_rank is None:
+        raise ValueError("Object must exist on at least one PP rank")
+
+    # ------------------------------------------------------------------
+    # 3. Broadcast the object from the source rank to all ranks
+    # ------------------------------------------------------------------
+    # Use broadcast_object_list which is more robust than all_gather_object
+    obj_list = [obj]
+    pp_ranks = torch.distributed.get_process_group_ranks(pp_group)
+    global_src = pp_ranks[src_rank]
+    torch.distributed.broadcast_object_list(obj_list, src=global_src, group=pp_group)
+
+    return obj_list[0]
+
+
 def setup_megatron_model(
     policy_cfg: PolicyConfig,
     cfg: ConfigContainer,
@@ -139,19 +194,19 @@ def setup_megatron_model(
     state.cfg = cfg
     # TODO: Freeze state.cfg
 
+    cfg.dist.external_gpu_device_mapping = True
     initialize_megatron(
         cfg=cfg,
         get_embedding_ranks=get_embedding_ranks,
         get_position_embedding_ranks=get_position_embedding_ranks,
-        gpu_visibility_externally_set=True,
     )
 
-    if cfg.ft_config and cfg.ft_config.enable_ft_package:
+    if cfg.ft and cfg.ft.enable_ft_package:
         fault_tolerance.setup(cfg, state)
-        fault_tolerance.maybe_setup_simulated_fault(cfg.ft_config)
+        fault_tolerance.maybe_setup_simulated_fault(cfg.ft)
 
     # Set pytorch JIT layer fusion options and warmup JIT functions.
-    set_jit_fusion_options(cfg.model_config, cfg.train_config.micro_batch_size)
+    set_jit_fusion_options(cfg.model, cfg.train.micro_batch_size)
 
     # Adjust the startup time so it reflects the largest value.
     # This will be closer to what scheduler will see (outside of
@@ -170,62 +225,67 @@ def setup_megatron_model(
     torch.distributed.barrier()
 
     # Context used for persisting some state between checkpoint saves.
-    checkpointing_context = _init_checkpointing_context(cfg.checkpoint_config)
+    checkpointing_context = init_checkpointing_context(cfg.checkpoint)
 
     # Tokenizer
     build_tokenizer(
-        cfg.tokenizer_config,
-        make_vocab_size_divisible_by=cfg.model_config.make_vocab_size_divisible_by
-        // cfg.model_config.tensor_model_parallel_size,
-        tensor_model_parallel_size=cfg.model_config.tensor_model_parallel_size,
+        cfg.tokenizer,
+        make_vocab_size_divisible_by=cfg.model.make_vocab_size_divisible_by
+        // cfg.model.tensor_model_parallel_size,
+        tensor_model_parallel_size=cfg.model.tensor_model_parallel_size,
     )
-    if not cfg.model_config.vocab_size:
-        cfg.model_config.vocab_size = cfg.tokenizer_config.padded_vocab_size
+    if not cfg.model.vocab_size:
+        cfg.model.vocab_size = cfg.tokenizer.padded_vocab_size
 
     torch.distributed.barrier()
 
-    model_post_init_fns = []
+    pre_wrap_hook = []
     if policy_cfg["megatron_cfg"]["freeze_moe_router"]:
 
-        def freeze_moe_router(model_module):
-            # Handle both wrapped (Float16Module) and unwrapped models
-            if isinstance(model_module, Float16Module):
-                model_module = model_module.module
-            for layer in model_module.decoder.layers:
-                if hasattr(layer.mlp, "router"):
-                    layer.mlp.router.weight.requires_grad = False
+        def freeze_moe_router(megatron_model):
+            if not isinstance(megatron_model, list):
+                megatron_model = [megatron_model]
+            for model_module in megatron_model:
+                # Handle both wrapped (Float16Module) and unwrapped models
+                if isinstance(model_module, Float16Module):
+                    model_module = model_module.module
+                for layer in model_module.decoder.layers:
+                    if hasattr(layer.mlp, "router"):
+                        layer.mlp.router.weight.requires_grad = False
 
         # Re-enable float32 expert bias for moe router to avoid parameter dtype inconsistency
         # see https://github.com/NVIDIA/Megatron-LM/blob/e6c510ff3c1159f8955589b26f7c395bdf0607d9/megatron/core/transformer/moe/router.py#L149
-        def re_enable_float32_expert_bias(model_module):
-            # Handle both wrapped (Float16Module) and unwrapped models
-            if isinstance(model_module, Float16Module):
-                model_module = model_module.module
-            for layer in model_module.decoder.layers:
-                if hasattr(layer.mlp, "router"):
-                    layer.mlp.router._maintain_float32_expert_bias()
-
-        model_post_init_fns.append(freeze_moe_router)
-        model_post_init_fns.append(re_enable_float32_expert_bias)
+        def re_enable_float32_expert_bias(megatron_model):
+            if not isinstance(megatron_model, list):
+                megatron_model = [megatron_model]
+            for model_module in megatron_model:
+                # Handle both wrapped (Float16Module) and unwrapped models
+                if isinstance(model_module, Float16Module):
+                    model_module = model_module.module
+                for layer in model_module.decoder.layers:
+                    if hasattr(layer.mlp, "router"):
+                        layer.mlp.router._maintain_float32_expert_bias()
+
+        pre_wrap_hook.extend([freeze_moe_router, re_enable_float32_expert_bias])
 
     # Model, optimizer, and learning rate.
-    model = get_model_from_config(
-        cfg.model_config,
-        cfg.ddp_config,
-        use_torch_fsdp2=cfg.dist_config.use_torch_fsdp2,
-        overlap_param_gather_with_optimizer_step=cfg.optimizer_config.overlap_param_gather_with_optimizer_step,
-        data_parallel_random_init=cfg.rng_config.data_parallel_random_init,
-        model_post_init_fns=model_post_init_fns,
+    model = get_model(
+        cfg.model,
+        cfg.ddp,
+        use_torch_fsdp2=cfg.dist.use_torch_fsdp2,
+        overlap_param_gather_with_optimizer_step=cfg.optimizer.overlap_param_gather_with_optimizer_step,
+        data_parallel_random_init=cfg.rng.data_parallel_random_init,
+        pre_wrap_hook=pre_wrap_hook,
         wrap_cast_model_output_to_fp32=(
             not policy_cfg["megatron_cfg"].get("defer_fp32_logits", None)
         ),
     )
     if load_optimizer:
         optimizer, scheduler = setup_optimizer(
-            optimizer_config=cfg.optimizer_config,
-            scheduler_config=cfg.scheduler_config,
+            optimizer_config=cfg.optimizer,
+            scheduler_config=cfg.scheduler,
             model=model,
-            use_gloo_process_groups=cfg.dist_config.use_gloo_process_groups,
+            use_gloo_process_groups=cfg.dist.use_gloo_process_groups,
         )
     else:
         optimizer = None
@@ -236,11 +296,11 @@ def re_enable_float32_expert_bias(model_module):
 
     # Load checkpoint if applicable
     if (
-        cfg.checkpoint_config.load is not None
-        or cfg.checkpoint_config.pretrained_checkpoint is not None
+        cfg.checkpoint.load is not None
+        or cfg.checkpoint.pretrained_checkpoint is not None
     ) and (
-        checkpoint_exists(cfg.checkpoint_config.load)
-        or checkpoint_exists(cfg.checkpoint_config.pretrained_checkpoint)
+        checkpoint_exists(cfg.checkpoint.load)
+        or checkpoint_exists(cfg.checkpoint.pretrained_checkpoint)
     ):
         load_checkpoint(
             state,
@@ -248,7 +308,7 @@ def re_enable_float32_expert_bias(model_module):
             optimizer,
             scheduler,
             checkpointing_context=checkpointing_context,
-            skip_load_to_model_and_opt=HAVE_FSDP2 and cfg.dist_config.use_torch_fsdp2,
+            skip_load_to_model_and_opt=HAVE_FSDP2 and cfg.dist.use_torch_fsdp2,
         )
         print("Checkpoint loaded")
     torch.distributed.barrier()
@@ -279,7 +339,9 @@ def destroy_parallel_state():
     # Reset async calls queue to prevent call_idx mismatches after distributed context recreation
     try:
         import nemo.tron.utils.async_utils as nemo_async_utils
-        from nemo.tron.utils.async_utils import AsyncCallsQueue
+        from megatron.core.dist_checkpointing.strategies.async_utils import (
+            AsyncCallsQueue,
+        )
 
         # Clean up any existing async callers first
         old_call_idx = getattr(nemo_async_utils._async_calls_queue, "call_idx", None)
@@ -425,44 +487,16 @@ def __init__(
         # Ensure clean slate before import
         destroy_parallel_state()
 
+        # Set for rank for non-collocated to check which ranks to broadcast from
         self.rank = get_rank_safe()
-        if self.rank == 0:
-            if pt_checkpoint_exists:
-                print(
-                    f"Checkpoint already exists at {pretrained_path}. Skipping import."
-                )
-            else:
-                try:
-                    # Clean environment to prevent conflicts
-                    env_backup = {}
-                    env_vars_to_clean = [
-                        "MASTER_ADDR",
-                        "MASTER_PORT",
-                        "WORLD_SIZE",
-                        "LOCAL_RANK",
-                    ]
-                    for var in env_vars_to_clean:
-                        if var in os.environ:
-                            env_backup[var] = os.environ[var]
-                            del os.environ[var]
-
-                    import_model_from_hf_name(hf_model_name, pretrained_path)
-
-                    # Restore environment
-                    for var, val in env_backup.items():
-                        os.environ[var] = val
-
-                except Exception as e:
-                    print(f"Error importing model: {e}")
-                    raise
-                finally:
-                    # Force cleanup after import
-                    destroy_parallel_state()
-            pre_init_communication_queue.put(True)
+        # Need to initialize the process group before calling into Megatron-Bridge, otherwise Megatron-Bridge will try to set an incorrect device
+        torch.distributed.init_process_group("nccl")
+        if pt_checkpoint_exists:
+            print(f"Checkpoint already exists at {pretrained_path}. Skipping import.")
         else:
-            pre_init_communication_queue.get()
-            pre_init_communication_queue.put(True)
-        destroy_parallel_state()
+            import_model_from_hf_name(
+                hf_model_name, pretrained_path, self.cfg["megatron_cfg"]
+            )
 
         pretrained_run_config = os.path.join(
             pretrained_path, "iter_0000000/run_config.yaml"
@@ -477,9 +511,11 @@ def __init__(
                 f"Pretrained run config not found at {pretrained_run_config} on rank={get_rank_safe()}. This usually means that the one-time HF->mcore conversion on rank=0 saved to a directory not being mounted on this node. Please check "
             )
 
-        cfg_from_pretrained = ConfigContainer.from_yaml(pretrained_run_config)
-        model_cfg = cfg_from_pretrained.model_config
-        cfg_from_pretrained.logger_config = LoggerConfig()
+        cfg_from_pretrained = ConfigContainer.from_yaml(
+            pretrained_run_config, mode=0
+        )  # strict loading
+        model_cfg = cfg_from_pretrained.model
+        cfg_from_pretrained.logger = LoggerConfig()
 
         model_cfg.tensor_model_parallel_size = self.cfg["megatron_cfg"][
             "tensor_model_parallel_size"
@@ -569,18 +605,18 @@ def __init__(
             load_rng=False,
         )
         self.megatron_cfg = ConfigContainer(
-            model_config=model_cfg,
-            checkpoint_config=checkpoint_config,
-            logger_config=LoggerConfig(logging_level=0),
-            train_config=TrainingConfig(
+            model=model_cfg,
+            checkpoint=checkpoint_config,
+            logger=LoggerConfig(logging_level=0),
+            train=TrainingConfig(
                 micro_batch_size=1,  # ignored
                 global_batch_size=self.cfg["train_global_batch_size"],  # ignored
                 train_iters=1000,  # Default value for inference
             ),
-            optimizer_config=OptimizerConfig(
+            optimizer=OptimizerConfig(
                 **self.cfg["megatron_cfg"]["optimizer"],
             ),
-            ddp_config=DistributedDataParallelConfig(
+            ddp=DistributedDataParallelConfig(
                 check_for_nan_in_grad=True,
                 grad_reduce_in_fp32=self.cfg["megatron_cfg"][
                     "distributed_data_parallel_config"
@@ -601,11 +637,11 @@ def __init__(
                     "distributed_data_parallel_config"
                 ]["data_parallel_sharding_strategy"],
             ),
-            scheduler_config=SchedulerConfig(
+            scheduler=SchedulerConfig(
                 **self.cfg["megatron_cfg"]["scheduler"],
             ),
-            dataset_config=None,
-            tokenizer_config=TokenizerConfig(
+            dataset=None,
+            tokenizer=TokenizerConfig(
                 tokenizer_type="HuggingFaceTokenizer",
                 tokenizer_model=hf_model_name,
             ),
@@ -623,8 +659,8 @@ def __init__(
 
         # Set the param sync function for the model
         if (
-            self.megatron_cfg.ddp_config.overlap_param_gather
-            and self.megatron_cfg.ddp_config.align_param_gather
+            self.megatron_cfg.ddp.overlap_param_gather
+            and self.megatron_cfg.ddp.align_param_gather
         ):
             self.megatron_cfg.param_sync_func = [
                 model_chunk.start_param_sync for model_chunk in self.model
@@ -636,31 +672,31 @@ def __init__(
 
         if init_reference_model:
             self.model = self.move_model(self.model, "cpu")
-            ref_ckpt_context = _init_checkpointing_context(ref_checkpoint_config)
+            ref_ckpt_context = init_checkpointing_context(ref_checkpoint_config)
 
             # Create a separate megatron config for the reference model with the correct checkpoint config
             ref_megatron_cfg = ConfigContainer(
-                model_config=self.megatron_cfg.model_config,
-                checkpoint_config=ref_checkpoint_config,  # Use the reference checkpoint config
-                logger_config=self.megatron_cfg.logger_config,
-                train_config=self.megatron_cfg.train_config,
-                optimizer_config=self.megatron_cfg.optimizer_config,
-                ddp_config=self.megatron_cfg.ddp_config,
-                scheduler_config=self.megatron_cfg.scheduler_config,
-                dataset_config=self.megatron_cfg.dataset_config,
-                tokenizer_config=self.megatron_cfg.tokenizer_config,
+                model=self.megatron_cfg.model,
+                checkpoint=ref_checkpoint_config,  # Use the reference checkpoint config
+                logger=self.megatron_cfg.logger,
+                train=self.megatron_cfg.train,
+                optimizer=self.megatron_cfg.optimizer,
+                ddp=self.megatron_cfg.ddp,
+                scheduler=self.megatron_cfg.scheduler,
+                dataset=self.megatron_cfg.dataset,
+                tokenizer=self.megatron_cfg.tokenizer,
             )
 
             # Create a separate state object for the reference model
             ref_state = GlobalState()
             ref_state.cfg = ref_megatron_cfg
 
-            reference_model = get_model_from_config(
-                self.megatron_cfg.model_config,
-                self.megatron_cfg.ddp_config,
-                use_torch_fsdp2=self.megatron_cfg.dist_config.use_torch_fsdp2,
-                overlap_param_gather_with_optimizer_step=self.megatron_cfg.optimizer_config.overlap_param_gather_with_optimizer_step,
-                data_parallel_random_init=self.megatron_cfg.rng_config.data_parallel_random_init,
+            reference_model = get_model(
+                self.megatron_cfg.model,
+                self.megatron_cfg.ddp,
+                use_torch_fsdp2=self.megatron_cfg.dist.use_torch_fsdp2,
+                overlap_param_gather_with_optimizer_step=self.megatron_cfg.optimizer.overlap_param_gather_with_optimizer_step,
+                pre_wrap_hook=self.megatron_cfg.rng.data_parallel_random_init,
                 wrap_cast_model_output_to_fp32=(
                     not self.cfg["megatron_cfg"].get("defer_fp32_logits", None)
                 ),
@@ -677,7 +713,7 @@ def __init__(
                     None,  # no scheduler
                     checkpointing_context=ref_ckpt_context,
                     skip_load_to_model_and_opt=HAVE_FSDP2
-                    and self.megatron_cfg.dist_config.use_torch_fsdp2,
+                    and self.megatron_cfg.dist.use_torch_fsdp2,
                 )
                 reference_model = reference_model[0]
                 reference_model.eval()
@@ -699,13 +735,13 @@ def __init__(
 
         _update_model_config_funcs(
             [self.model],
-            self.megatron_cfg.model_config,
-            self.megatron_cfg.ddp_config,
+            self.megatron_cfg.model,
+            self.megatron_cfg.ddp,
             self.optimizer,
-            align_grad_reduce=self.megatron_cfg.dist_config.align_grad_reduce,
+            align_grad_reduce=self.megatron_cfg.dist.align_grad_reduce,
         )
 
-        from nemo.tron.tokenizers.tokenizer import build_tokenizer
+        from megatron.bridge.training.tokenizers.tokenizer import build_tokenizer
 
         tokenizer_config = TokenizerConfig(
             tokenizer_type="HuggingFaceTokenizer",
@@ -714,7 +750,7 @@ def __init__(
 
         self.megatron_tokenizer = build_tokenizer(
             tokenizer_config,
-            make_vocab_size_divisible_by=self.megatron_cfg.model_config.make_vocab_size_divisible_by
+            make_vocab_size_divisible_by=self.megatron_cfg.model.make_vocab_size_divisible_by
             // self.cfg["megatron_cfg"]["tensor_model_parallel_size"],
             tensor_model_parallel_size=self.cfg["megatron_cfg"][
                 "tensor_model_parallel_size"
@@ -722,7 +758,9 @@ def __init__(
         )
         self.final_padded_vocab_size = tokenizer_config.padded_vocab_size
         self.dp_size = worker_sharding_annotations.get_axis_size("data_parallel")
-        self.megatron_to_hf_converter = MegatronToHFConverter(hf_model_name, self.model)
+        self.megatron_bridge = AutoBridge.from_hf_pretrained(
+            hf_model_name, trust_remote_code=True
+        )
 
         self.should_disable_forward_pre_hook = (
             self.cfg["megatron_cfg"]["optimizer"]["use_distributed_optimizer"]
@@ -733,8 +771,16 @@ def __init__(
 
         # vars used for refit
         ## will be initialized in prepare_refit_info
+        # refit_param_info_mcore combines the conversion tasks with the param memory
+        # [(mcore_param_name, estimated_memory), ...]
+        # Note: here param name is local param name, with local layer number and
+        # local expert id etc.
+        self.refit_conversion_tasks = (
+            None  # Meta data for conversion params from megatron bridge
+        )
+        self.refit_conversion_tasks_current_index = None
         self.refit_param_info_mcore = None
-        self.local_key_to_global_keys = None
+
         ## used for streaming update inference engine weights
         self._held_gather_buffer = None
 
@@ -1310,7 +1356,7 @@ def generate(
                 f"Input to Megatron Generation worker is not properly right-padded: {error_msg}"
             )
 
-        model_cfg = self.megatron_cfg.model_config
+        model_cfg = self.megatron_cfg.model
         inference_wrapper_config = InferenceWrapperConfig(
             hidden_size=model_cfg.hidden_size,
             inference_batch_times_seqlen_threshold=1000000,
@@ -1424,39 +1470,74 @@ def report_device_id(self) -> str:
     @torch.no_grad()
     @wrap_with_nvtx_name("megatron_policy_worker/prepare_refit_info")
     def prepare_refit_info(self) -> None:
-        # Get parameter info for refit
-        # param_info: list of ((name, shape, dtype), size_in_bytes) tuples
-        self.refit_param_info_mcore = get_param_info(self.model, self.dtype)
-
-        # Create a map that maps any local parameter name to a list of global parameter names.
-        # This map is repeatedly used by parameter gatherring phase during refit of every step.
-        self.local_key_to_global_keys = get_local_key_to_global_keys(
-            self.model, state_dict_info=self.refit_param_info_mcore
-        )
+        # Get parameter info for refit / mcore side info
+        self.refit_param_info_mcore = self._calculate_refit_param_info()
 
-        # Collect tensor metadata for refit
+        # Collect tensor metadata for refit / hf side info
         refit_param_info_hf = {}
-        for key, _ in self.refit_param_info_mcore:
-            # gather megatron params
-            gathered_megatron_params = gather_params(
-                self.model,
-                [key],
-                key_to_global_keys=self.local_key_to_global_keys,
-            )
-            # convert to hf params
-            gathered_hf_params = self.megatron_to_hf_converter.convert(
-                gathered_megatron_params, self.model.config
-            )
-            # collect tensor metadata
-            for name, tensor in gathered_hf_params.items():
-                if self.is_generation_colocated:
-                    metadata = (tensor.shape, tensor.dtype, tensor.numel())
-                else:
-                    metadata = (tensor.shape, tensor.dtype)
-                refit_param_info_hf[name] = metadata
-
+        hf_params_generator = self.megatron_bridge.export_hf_weights(
+            [self.model],
+            show_progress=False,
+        )
+        for name, tensor in hf_params_generator:
+            if self.is_generation_colocated:
+                metadata = (tensor.shape, tensor.dtype, tensor.numel())
+            else:
+                metadata = (tensor.shape, tensor.dtype)
+            refit_param_info_hf[name] = metadata
         return refit_param_info_hf
 
+    def _calculate_refit_param_info(self) -> list[tuple[str, int]]:
+        """Calculate parameter information for refit.
+
+        Each task contains:
+        - param_name: Local parameter name without module prefixes
+        - mapping: MegatronParamMapping instance for weight transformation
+        - pp_rank: Pipeline-parallel rank owning the parameter
+        - vp_stage: Virtual-pipeline stage index
+        - megatron_module: Reference to Megatron model/submodule
+        - param_weight: Target parameter tensor for converted weight
+
+        Returns:
+            List of (parameter_name, size_in_bytes) tuples.
+        """
+        self.refit_conversion_tasks = self.megatron_bridge.get_conversion_tasks(
+            [self.model]
+        )
+        param_info = []
+
+        def calculate_size_in_bytes(param, tp_size, ep_size):
+            if param is None:
+                # need to broadcast for other pp ranks
+                size_in_bytes = None
+            else:
+                # Calculate size for this parameter
+                prec_to_bytes = {
+                    torch.bfloat16: 2,
+                    torch.float16: 2,
+                    torch.float32: 4,
+                }
+                scale = prec_to_bytes[self.dtype] / prec_to_bytes[param.dtype]
+                size_in_bytes = (
+                    param.element_size() * param.numel() * tp_size * ep_size * scale
+                )
+
+            # Broadcast size_in_bytes across pipeline parallel ranks
+            return broadcast_object_across_pp_ranks(size_in_bytes)
+
+        for task in self.refit_conversion_tasks:
+            param_info.append(
+                (
+                    task.param_name,
+                    calculate_size_in_bytes(
+                        task.param_weight,
+                        task.mapping.tp_size,
+                        task.mapping.ep_size if task.mapping.is_expert else 1,
+                    ),
+                )
+            )
+        return param_info
+
     @wrap_with_nvtx_name("megatron_policy_worker/prepare_weights_for_ipc")
     def prepare_weights_for_ipc(self) -> tuple[list[tuple[str, int]], float]:
         """Prepare Megatron model weights for IPC transfer to vLLM.
@@ -1474,7 +1555,7 @@ def prepare_weights_for_ipc(self) -> tuple[list[tuple[str, int]], float]:
         ## default to 20% to get some more speedup than 10%, OOM if set to 30%
         memory_ratio = os.getenv("NRL_REFIT_BUFFER_MEMORY_RATIO", "0.2")
         total_available_bytes *= float(memory_ratio)
-
+        self.refit_conversion_tasks_current_index = 0
         return self.refit_param_info_mcore, total_available_bytes
 
     # Temporary fix, 'keys' is a kwarg due to some sort of ray bug
@@ -1492,15 +1573,19 @@ def get_weights_ipc_handles(self, *, keys: list[str]) -> dict[str, Any]:
             del self._held_gather_buffer
             self._held_gather_buffer = None
 
-        gathered_megatron_params = gather_params(
-            self.model,
-            keys,
-            key_to_global_keys=self.local_key_to_global_keys,
-        )
+        # extract the conversion tasks in this pack
+        conversion_tasks = self.refit_conversion_tasks[
+            self.refit_conversion_tasks_current_index : self.refit_conversion_tasks_current_index
+            + len(keys)
+        ]
+        self.refit_conversion_tasks_current_index += len(keys)
 
-        gathered_hf_params = self.megatron_to_hf_converter.convert(
-            gathered_megatron_params, self.model.config
+        hf_params_generator = self.megatron_bridge.export_hf_weights(
+            [self.model],
+            show_progress=False,
+            conversion_tasks=conversion_tasks,
         )
+        gathered_hf_params = {name: tensor for name, tensor in hf_params_generator}
 
         # Get device UUID for IPC handles
         device_uuid = self.report_device_id()
@@ -1571,21 +1656,14 @@ def get_weights_ipc_handles(self, *, keys: list[str]) -> dict[str, Any]:
     @torch.no_grad()
     def broadcast_weights_for_collective(self) -> None:
         """Broadcast the weights for collective communication."""
-        for key, _ in self.refit_param_info_mcore:
-            # gather megatron params
-            gathered_megatron_params = gather_params(
-                self.model,
-                [key],
-                key_to_global_keys=self.local_key_to_global_keys,
-            )
-            # convert to hf params
-            gathered_hf_params = self.megatron_to_hf_converter.convert(
-                gathered_megatron_params, self.model.config
-            )
-            # broadcast from train rank0 worker to inference workers
+        hf_params_generator = self.megatron_bridge.export_hf_weights(
+            [self.model],
+            show_progress=False,
+        )
+        # broadcast from train rank0 worker to inference workers
+        for _, tensor in hf_params_generator:
             if self.rank == 0:
-                for _, tensor in gathered_hf_params.items():
-                    self.model_update_group.broadcast(tensor, src=0)
+                self.model_update_group.broadcast(tensor, src=0)
 
     def prepare_for_lp_inference(self):
         self.model = self.move_model(self.model, "cuda", move_grads=False)
@@ -1747,15 +1825,17 @@ def save_checkpoint(
                 "Megatron core state or model is not initialized. Cannot save checkpoint."
             )
 
-        original_save_path = self.mcore_state.cfg.checkpoint_config.save
+        original_save_path = self.mcore_state.cfg.checkpoint.save
         # save_dir = os.path.dirname(weights_path)
         release_name = os.path.basename(weights_path)
 
         try:
             maybe_finalize_async_save(
-                ckpt_cfg=self.mcore_state.cfg.checkpoint_config, blocking=False
+                self.mcore_state,
+                ckpt_cfg=self.mcore_state.cfg.checkpoint,
+                blocking=False,
             )
-            self.mcore_state.cfg.checkpoint_config.save = weights_path
+            self.mcore_state.cfg.checkpoint.save = weights_path
 
             optimizer_to_save = None
             scheduler_to_save = None
@@ -1785,7 +1865,8 @@ def save_checkpoint(
             )
             print(f"Saved checkpoint to {weights_path}")
             maybe_finalize_async_save(
-                ckpt_cfg=self.mcore_state.cfg.checkpoint_config,
+                self.mcore_state,
+                ckpt_cfg=self.mcore_state.cfg.checkpoint,
                 blocking=True,
                 terminate=True,
             )
@@ -1799,7 +1880,7 @@ def save_checkpoint(
             print(f"Failed to save checkpoint to {weights_path}: {e}")
             raise
         finally:
-            self.mcore_state.cfg.checkpoint_config.save = original_save_path
+            self.mcore_state.cfg.checkpoint.save = original_save_path
 
     def load_checkpoint(self, weights_path: str, optimizer_path: Optional[str] = None):
         """Load a training checkpoint.
diff --git a/pyproject.toml b/pyproject.toml
index 84df2fb543..31959e4bce 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -82,9 +82,13 @@ mcore = [
     # sudo dpkg -i cuda-keyring_1.1-1_all.deb
     # sudo apt-get update
     # sudo apt-get install cudnn-cuda-12
-    "transformer-engine[pytorch]==2.3.0",
+
+    # This dependency also needs to be compatible with the spec in Megatron-Bridge/pyproject.toml.
+    # It is specified here since we don't directly use Megatron-Bridge/pyproject.toml, but a proxy setup.py+pyproject.toml combo
+    # outside to allow "optionally" installing the megatron path. It's simpler to deal with transformer-engine here in the NeMo RL pyproject.toml
+    "transformer-engine[pytorch]==2.5.0",
     "megatron-core",
-    "nemo-tron",
+    "megatron-bridge",
     # Remove this once https://github.com/NVIDIA-NeMo/RL/issues/501 resolved
     "vllm==0.10.0",
     # Flash-attn version should be selected to satisfy both TE + vLLM requirements (xformers in particular)
@@ -134,9 +138,8 @@ test = [
 
 [tool.uv.sources]
 megatron-core = { workspace = true }
-nemo-tron = { workspace = true }
 nemo-automodel = { workspace = true }
-# The NeMo Run source to be used by nemo-tron
+megatron-bridge = { workspace = true }
 nemo_run = { git = "https://github.com/NVIDIA-NeMo/Run", rev = "414f0077c648fde2c71bb1186e97ccbf96d6844c" }
 # torch/torchvision/triton all come from the torch index in order to pick up aarch64 wheels
 torch = [
@@ -154,8 +157,8 @@ mamba-ssm = { git = "https://github.com/state-spaces/mamba.git", rev = "2e16fc30
 [tool.uv.workspace]
 members = [
     "3rdparty/Megatron-LM-workspace",
-    "3rdparty/NeMo-workspace",
     "3rdparty/Automodel-workspace/Automodel",
+    "3rdparty/Megatron-Bridge-workspace",
 ]
 
 [[tool.uv.index]]
diff --git a/pyrefly.toml b/pyrefly.toml
index e9717a1ed0..1555b2346e 100644
--- a/pyrefly.toml
+++ b/pyrefly.toml
@@ -99,12 +99,6 @@ project-includes = [
     "nemo_rl/models/huggingface/__init__.py",
     "nemo_rl/models/megatron/__init__.py",
     "nemo_rl/models/megatron/community_import.py",
-    "nemo_rl/models/megatron/converters/__init__.py",
-    "nemo_rl/models/megatron/converters/common.py",
-    "nemo_rl/models/megatron/converters/deepseek.py",
-    "nemo_rl/models/megatron/converters/llama.py",
-    "nemo_rl/models/megatron/converters/qwen2.py",
-    "nemo_rl/models/megatron/converters/qwen3.py",
     "nemo_rl/models/policy/__init__.py",
     "nemo_rl/models/policy/interfaces.py",
     "nemo_rl/models/policy/utils.py",
diff --git a/tests/functional/test_mcore_extra_installed_correctly.sh b/tests/functional/test_mcore_extra_installed_correctly.sh
index d150f0ed7f..535765c2fe 100755
--- a/tests/functional/test_mcore_extra_installed_correctly.sh
+++ b/tests/functional/test_mcore_extra_installed_correctly.sh
@@ -37,42 +37,16 @@ EOF
 
 uv run --extra mcore --no-build-isolation python <<"EOF"
 import is_megatron_installed
-import is_nemo_installed
+import is_megatron_bridge_installed
 assert is_megatron_installed.INSTALLED, "Megatron is not installed. Please check if the submodule has been initialized. May need to run `git submodule update --init --recursive`"
-assert is_nemo_installed.INSTALLED, "NeMo is not installed. Please check if the submodule has been initialized. May need to run `git submodule update --init --recursive`"
+assert is_megatron_bridge_installed.INSTALLED, "Megatron Bridge is not installed. Please check if the submodule has been initialized. May need to run `git submodule update --init --recursive`"
 
 # This must be the first import to get all of the megatron non-core packages added to the path
 import nemo_rl
 import megatron.core
 from megatron.training.utils import get_ltor_masks_and_position_ids
-from nemo.tron.init import initialize_megatron
-from nemo.tron.config import (
-    ConfigContainer,
-    TrainingConfig,
-    LoggerConfig,
-    OptimizerConfig,
-    SchedulerConfig,
-    CheckpointConfig,
-    DistributedDataParallelConfig,
-)
-from nemo.tron.utils.common_utils import get_rank_safe
-from nemo.tron.config import TokenizerConfig
-from nemo.tron.model import get_model_from_config
-from nemo.tron.checkpointing import checkpoint_exists, load_checkpoint
-from nemo.tron.init import initialize_megatron, set_jit_fusion_options
-from nemo.tron.setup import _init_checkpointing_context, _update_model_config_funcs
-from nemo.tron.state import GlobalState
-from nemo.tron.optim import setup_optimizer
-from nemo.tron import fault_tolerance
-from nemo.tron.tokenizers.tokenizer import build_tokenizer
-from nemo.tron.utils.train_utils import (
-    calc_params_l2_norm,
-    logical_and_across_model_parallel_group,
-    reduce_max_stat_across_model_parallel_group,
-)
-from nemo.tron.train import train_step
-from nemo.tron.setup import HAVE_FSDP2
-print("[Nemo/Mcore imports successful]")
+from megatron.bridge import AutoBridge
+print("[Megatron-Core/Megatron-Bridge imports successful]")
 EOF
 
 # Sync just to return the environment to the original base state
diff --git a/tests/unit/distributed/test_virtual_cluster.py b/tests/unit/distributed/test_virtual_cluster.py
index 405082b6b9..2f16575b2a 100644
--- a/tests/unit/distributed/test_virtual_cluster.py
+++ b/tests/unit/distributed/test_virtual_cluster.py
@@ -214,7 +214,7 @@ def test_mcore_py_executable():
                     venv_python,
                     "-c",
                     # Importing nemo_rl must be first to ensure all of megatron is importable
-                    "import nemo_rl; print('nemo_rl is imported'); import transformer_engine.pytorch as te; print('te is imported'); import nemo.tron; print('nemo-tron is imported'); import megatron.core; print('megatron-core is imported'); import megatron.training; print('megatron-training is imported');",
+                    "import nemo_rl; print('nemo_rl is imported'); import transformer_engine.pytorch as te; print('te is imported'); import megatron.bridge; print('megatron-bridge is imported'); import megatron.core; print('megatron-core is imported'); import megatron.training; print('megatron-training is imported');",
                 ],
                 capture_output=True,
                 text=True,
@@ -226,6 +226,6 @@ def test_mcore_py_executable():
             )
             assert "nemo_rl is imported" in result.stdout
             assert "te is imported" in result.stdout
-            assert "nemo-tron is imported" in result.stdout
+            assert "megatron-bridge is imported" in result.stdout
             assert "megatron-core is imported" in result.stdout
             assert "megatron-training is imported" in result.stdout
diff --git a/tests/unit/models/megatron/converters/test_converters_common.py b/tests/unit/models/megatron/converters/test_converters_common.py
deleted file mode 100755
index c8731eb573..0000000000
--- a/tests/unit/models/megatron/converters/test_converters_common.py
+++ /dev/null
@@ -1,252 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from unittest.mock import Mock, patch
-
-import pytest
-import torch
-
-try:
-    from nemo_rl.models.megatron.converters.common import (
-        get_global_expert_num,
-        get_global_key_from_local_key,
-        get_global_layer_num,
-        get_local_expert_num,
-        get_local_layer_num,
-        split_fc1_etp,
-        split_fc1_tp,
-        split_qkv_bias_gpu,
-        split_qkv_gpu,
-        update_transforms_for_nemorl,
-    )
-except ImportError:
-    pass
-
-# Apply mcore marker to all tests in this module
-pytestmark = pytest.mark.mcore
-
-
-class TestLayerNumberFunctions:
-    """Test functions related to layer number extraction and conversion."""
-
-    def test_get_local_layer_num_valid(self):
-        """Test get_local_layer_num with valid layer keys."""
-        assert get_local_layer_num("layers.5.attention.weight") == 5
-        assert get_local_layer_num("decoder.layers.10.mlp.weight") == 10
-        assert get_local_layer_num("model.layers.0.self_attn.weight") == 0
-
-    def test_get_local_layer_num_invalid(self):
-        """Test get_local_layer_num with invalid layer keys."""
-        assert get_local_layer_num("attention.weight") is None
-        assert get_local_layer_num("layers.abc.weight") is None
-        assert get_local_layer_num("layers.") is None
-
-    def test_get_global_layer_num_pp(self):
-        """Test get_global_layer_num with simple pipeline configuration."""
-        mock_cfg = Mock()
-        mock_cfg.num_layers = 10
-        mock_cfg.num_layers_in_first_pipeline_stage = 4
-        mock_cfg.num_layers_in_last_pipeline_stage = 3
-
-        with patch(
-            "nemo_rl.models.megatron.converters.common.parallel_state"
-        ) as mock_ps:
-            mock_ps.get_pipeline_model_parallel_rank.return_value = 1
-            mock_ps.get_pipeline_model_parallel_world_size.return_value = 3
-
-            result = get_global_layer_num("layers.2.weight", mock_cfg)
-            assert result == 6
-
-
-class TestExpertNumberFunctions:
-    """Test functions related to expert number extraction and conversion."""
-
-    def test_get_local_expert_num_valid(self):
-        """Test get_local_expert_num with valid expert keys."""
-        assert get_local_expert_num("layers.0.mlp.experts.weight2") == 2
-        assert get_local_expert_num("decoder.layers.1.experts.weight5") == 5
-        assert get_local_expert_num("model.layers.0.experts.weight0") == 0
-
-    def test_get_local_expert_num_invalid(self):
-        """Test get_local_expert_num with invalid expert keys."""
-        assert get_local_expert_num("layers.0.mlp.weight") is None
-        assert get_local_expert_num("layers.0.mlp.experts.2._extra_state") is None
-
-    def test_get_global_expert_num(self):
-        """Test get_global_expert_num with expert parallel configuration."""
-        mock_cfg = Mock()
-        mock_cfg.num_moe_experts = 8
-
-        with patch(
-            "nemo_rl.models.megatron.converters.common.parallel_state"
-        ) as mock_ps:
-            mock_ps.get_expert_model_parallel_rank.return_value = 1
-            mock_ps.get_expert_model_parallel_world_size.return_value = 2
-
-            result = get_global_expert_num("layers.0.mlp.experts.weight2", mock_cfg)
-            assert result == 6  # 8 // 2 + 2
-
-
-class TestKeyConversionFunctions:
-    """Test functions related to key conversion between local and global."""
-
-    def test_get_global_key_from_local_key_layer_only(self):
-        """Test key conversion with only layer numbers."""
-        mock_cfg = Mock()
-        mock_cfg.num_layers = 12
-        mock_cfg.num_layers_in_first_pipeline_stage = None
-        mock_cfg.num_layers_in_last_pipeline_stage = None
-
-        with patch(
-            "nemo_rl.models.megatron.converters.common.parallel_state"
-        ) as mock_ps:
-            mock_ps.get_pipeline_model_parallel_rank.return_value = 1
-            mock_ps.get_pipeline_model_parallel_world_size.return_value = 2
-
-            result = get_global_key_from_local_key(
-                "layers.3.attention.weight", mock_cfg
-            )
-            assert result == "layers.9.attention.weight"
-
-    def test_get_global_key_from_local_key_expert_and_layer(self):
-        """Test key conversion with only expert numbers."""
-        mock_cfg = Mock()
-        mock_cfg.num_moe_experts = 8
-        mock_cfg.num_layers = 12
-        mock_cfg.num_layers_in_first_pipeline_stage = None
-        mock_cfg.num_layers_in_last_pipeline_stage = None
-
-        with patch(
-            "nemo_rl.models.megatron.converters.common.parallel_state"
-        ) as mock_ps:
-            mock_ps.get_expert_model_parallel_rank.return_value = 1
-            mock_ps.get_expert_model_parallel_world_size.return_value = 2
-
-            mock_ps.get_pipeline_model_parallel_rank.return_value = 1
-            mock_ps.get_pipeline_model_parallel_world_size.return_value = 3
-
-            result = get_global_key_from_local_key(
-                "layers.0.mlp.experts.weight2", mock_cfg
-            )
-            assert result == "layers.4.mlp.experts.weight6"
-
-
-class TestTensorSplittingFunctions:
-    """Test functions related to tensor splitting operations."""
-
-    def test_split_fc1_tp(self):
-        """Test split_fc1_tp function."""
-        mock_ctx = Mock()
-        mock_ctx.source.config.tensor_model_parallel_size = 2
-
-        # Create a tensor with shape (4, 10) representing 2 TP ranks with 2 components each
-        linear_fc1 = torch.randn(4, 10)
-
-        gate_proj, up_proj = split_fc1_tp(mock_ctx, linear_fc1)
-
-        assert gate_proj.shape == (2, 10)
-        assert up_proj.shape == (2, 10)
-        assert torch.allclose(gate_proj, linear_fc1[::2])
-        assert torch.allclose(up_proj, linear_fc1[1::2])
-
-    def test_split_fc1_etp(self):
-        """Test split_fc1_etp function."""
-        mock_ctx = Mock()
-        mock_ctx.source.config.expert_tensor_parallel_size = 2
-
-        # Create a tensor with shape (4, 10) representing 2 ETP ranks with 2 components each
-        linear_fc1 = torch.randn(4, 10)
-
-        gate_proj, up_proj = split_fc1_etp(mock_ctx, linear_fc1)
-
-        assert gate_proj.shape == (2, 10)
-        assert up_proj.shape == (2, 10)
-        assert torch.allclose(gate_proj, linear_fc1[::2])
-        assert torch.allclose(up_proj, linear_fc1[1::2])
-
-    def test_split_qkv_gpu(self):
-        """Test split_qkv_gpu function."""
-        mock_ctx = Mock()
-        mock_ctx.source.config.num_attention_heads = 8
-        mock_ctx.source.config.num_query_groups = 2
-        mock_ctx.source.config.kv_channels = 16
-
-        # Create QKV tensor: (heads + 2*groups) * head_size * hidden_size
-        qkv_total_dim = 8 + 2 * 2  # 12
-        linear_qkv = torch.randn(qkv_total_dim, 16, 64)
-
-        q_proj, k_proj, v_proj = split_qkv_gpu(mock_ctx, linear_qkv)
-
-        # Q should have 8 heads * 16 channels = 128
-        assert q_proj.shape == (128, 64)
-        # K and V should have 2 groups * 16 channels = 32 each
-        assert k_proj.shape == (32, 64)
-        assert v_proj.shape == (32, 64)
-
-    def test_split_qkv_bias_gpu(self):
-        """Test split_qkv_bias_gpu function."""
-        mock_ctx = Mock()
-        mock_ctx.source.config.num_attention_heads = 8
-        mock_ctx.source.config.num_query_groups = 2
-        mock_ctx.source.config.kv_channels = 16
-
-        # Create QKV bias tensor: (heads + 2*groups) * head_size
-        qkv_total_dim = 8 + 2 * 2  # 12
-        qkv_bias = torch.randn(qkv_total_dim, 16)
-
-        q_bias, k_bias, v_bias = split_qkv_bias_gpu(mock_ctx, qkv_bias)
-
-        # Q should have 8 heads * 16 channels = 128
-        assert q_bias.shape == (128,)
-        # K and V should have 2 groups * 16 channels = 32 each
-        assert k_bias.shape == (32,)
-        assert v_bias.shape == (32,)
-
-
-class TestTransformUpdateFunctions:
-    """Test functions related to transform updates."""
-
-    def test_update_transforms_for_nemorl(self):
-        """Test update_transforms_for_nemorl function."""
-        # Create mock transforms
-        mock_transform1 = Mock()
-        mock_transform1.transform.__name__ = "split_fc1"
-        mock_transform1.source_key = "layers.0.mlp.experts.0.linear_fc1.weight"
-
-        mock_transform2 = Mock()
-        mock_transform2.transform.__name__ = "split_fc1"
-        mock_transform2.source_key = "layers.0.mlp.shared_experts.linear_fc1.weight"
-
-        mock_transform3 = Mock()
-        mock_transform3.transform.__name__ = "split_qkv"
-
-        mock_transform4 = Mock()
-        mock_transform4.transform.__name__ = "split_qkv_bias"
-
-        transforms = [
-            mock_transform1,
-            mock_transform2,
-            mock_transform3,
-            mock_transform4,
-        ]
-
-        updated_transforms = update_transforms_for_nemorl(transforms)
-
-        # Check that expert transforms use split_fc1_etp
-        assert updated_transforms[0].transform == split_fc1_etp
-        # Check that non-expert transforms use split_fc1_tp
-        assert updated_transforms[1].transform == split_fc1_tp
-        # Check that qkv transforms are updated
-        assert updated_transforms[2].transform == split_qkv_gpu
-        assert updated_transforms[3].transform == split_qkv_bias_gpu
diff --git a/tests/unit/models/megatron/converters/test_qwen_conversion.py b/tests/unit/models/megatron/converters/test_qwen_conversion.py
deleted file mode 100644
index 2debebaee0..0000000000
--- a/tests/unit/models/megatron/converters/test_qwen_conversion.py
+++ /dev/null
@@ -1,284 +0,0 @@
-# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import socket
-from contextlib import contextmanager
-from tempfile import TemporaryDirectory
-
-import pytest
-import torch
-import torch.distributed as dist
-from transformers import AutoConfig, AutoModelForCausalLM
-
-
-@contextmanager
-def temporary_distributed_context():
-    if "MASTER_ADDR" in os.environ and "MASTER_PORT" in os.environ:
-        init_method = None
-    else:
-        # Find an available port dynamically
-        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
-            s.bind(("localhost", 0))
-            addr, port = s.getsockname()
-
-        init_method = f"tcp://{addr}:{port}"
-
-    dist.init_process_group(
-        backend="gloo", init_method=init_method, world_size=1, rank=0
-    )
-
-    from megatron.core import parallel_state
-
-    parallel_state.initialize_model_parallel()
-
-    from megatron.core.tensor_parallel.random import model_parallel_cuda_manual_seed
-
-    model_parallel_cuda_manual_seed(42)
-
-    try:
-        yield
-    finally:
-        parallel_state.destroy_model_parallel()
-        dist.destroy_process_group()
-
-
-def dummy_qwen3_megatron_moe_config():
-    from nemo.collections.llm.gpt.model.qwen3 import Qwen3MoEConfig
-
-    return Qwen3MoEConfig(
-        num_layers=2,
-        hidden_size=64,
-        num_attention_heads=4,
-        num_query_groups=2,
-        ffn_hidden_size=128,
-        moe_ffn_hidden_size=32,
-        num_moe_experts=2,
-        share_embeddings_and_output_weights=True,
-        kv_channels=16,
-    )
-
-
-def dummy_qwen3_megatron_dense_config():
-    from nemo.collections.llm.gpt.model.qwen3 import Qwen3Config
-
-    return Qwen3Config(
-        num_layers=2,
-        hidden_size=64,
-        num_attention_heads=4,
-        num_query_groups=2,
-        ffn_hidden_size=128,
-        share_embeddings_and_output_weights=False,
-        kv_channels=16,
-    )
-
-
-def create_dummy_hf_moe_config():
-    """Create a dummy HF MoE config and save it to a temporary directory."""
-    # Create a minimal HF config that matches the megatron config
-    hf_config = AutoConfig.from_pretrained("Qwen/Qwen3-30B-A3B", trust_remote_code=True)
-
-    # Update config to match our dummy megatron config
-    hf_config.num_hidden_layers = 2
-    hf_config.hidden_size = 64
-    hf_config.num_attention_heads = 4
-    hf_config.num_key_value_heads = 2
-    hf_config.intermediate_size = 128
-    hf_config.moe_intermediate_size = 32
-    hf_config.num_experts = 2
-    hf_config.tie_word_embeddings = True
-    hf_config.head_dim = 16
-
-    return hf_config
-
-
-def create_dummy_hf_dense_config():
-    """Create a dummy HF dense config and save it to a temporary directory."""
-    # Create a minimal HF config that matches the megatron config
-    hf_config = AutoConfig.from_pretrained("Qwen/Qwen3-4B", trust_remote_code=True)
-
-    # Update config to match our dummy megatron config
-    hf_config.num_hidden_layers = 2
-    hf_config.hidden_size = 64
-    hf_config.num_attention_heads = 4
-    hf_config.num_key_value_heads = 2
-    hf_config.intermediate_size = 128
-    hf_config.tie_word_embeddings = False
-    hf_config.head_dim = 16
-
-    return hf_config
-
-
-def create_model_and_converter(megatron_config, hf_config, model_name):
-    """Create megatron model and converter for testing."""
-
-    from nemo.collections.llm.gpt.model.qwen3 import Qwen3Model
-
-    from nemo_rl.models.megatron.converters.common import MegatronToHFConverter
-
-    # Create megatron model
-    model = Qwen3Model(megatron_config)
-    model.configure_model()
-
-    # Create dummy HF config and save to temporary directory
-    with TemporaryDirectory() as tmp_dir:
-        hf_dir = os.path.join(tmp_dir, model_name)
-        hf_config.save_pretrained(hf_dir)
-
-        # Create a dummy HF model to get the model class
-        dummy_model = AutoModelForCausalLM.from_config(
-            hf_config, trust_remote_code=True
-        )
-        dummy_model.save_pretrained(hf_dir)
-
-        original_state_dict = model.module.state_dict()
-
-        converter = MegatronToHFConverter(
-            hf_model_name=hf_dir,
-            megatron_model=model.module,
-        )
-
-        converted_state_dict = converter.convert(original_state_dict, model.config)
-
-        # Filter out _extra_state keys
-        original_state_dict = {
-            k: v for k, v in original_state_dict.items() if "_extra_state" not in k
-        }
-
-        return original_state_dict, converted_state_dict, hf_config, model
-
-
-def calculate_chunk_sizes(hf_config):
-    """Calculate chunk sizes for QKV tensor splitting."""
-    q_chunk_size = hf_config.head_dim * (
-        hf_config.num_attention_heads // hf_config.num_key_value_heads
-    )
-    kv_chunk_size = hf_config.head_dim * 2
-    return q_chunk_size, kv_chunk_size
-
-
-def assert_attention_tensors_match(
-    original_state_dict, converted_state_dict, q_chunk_size, kv_chunk_size
-):
-    """Assert that attention tensors match between original and converted state dicts."""
-    # Check q_layernorm
-    torch.testing.assert_close(
-        original_state_dict["decoder.layers.0.self_attention.q_layernorm.weight"],
-        converted_state_dict["model.layers.0.self_attn.q_norm.weight"],
-    )
-
-    # Check first layer q_proj
-    torch.testing.assert_close(
-        original_state_dict["decoder.layers.0.self_attention.linear_qkv.weight"][
-            :q_chunk_size
-        ],
-        converted_state_dict["model.layers.0.self_attn.q_proj.weight"][:q_chunk_size],
-    )
-
-    # Check second layer q_proj
-    torch.testing.assert_close(
-        original_state_dict["decoder.layers.1.self_attention.linear_qkv.weight"][
-            (q_chunk_size + kv_chunk_size) : (2 * q_chunk_size + kv_chunk_size)
-        ],
-        converted_state_dict["model.layers.1.self_attn.q_proj.weight"][
-            q_chunk_size : (2 * q_chunk_size)
-        ],
-    )
-
-
-@pytest.mark.mcore
-def test_conversion_to_hf_moe():
-    """Test conversion of Qwen3 MoE model to HF format."""
-    with temporary_distributed_context():
-        mcore_config = dummy_qwen3_megatron_moe_config()
-        hf_config = create_dummy_hf_moe_config()
-
-        original_state_dict, converted_state_dict, hf_config, model = (
-            create_model_and_converter(mcore_config, hf_config, "Qwen3-tiny-test-moe")
-        )
-
-        # Check that the number of keys in the original state dict is equal to the number of keys in the converted state dict minus the number of extra state keys
-        # taking into account the qkv merging and the merging of the up and gate projections
-        assert len(original_state_dict) == len(converted_state_dict) - (
-            2 * hf_config.num_hidden_layers
-            + (hf_config.num_hidden_layers * hf_config.num_experts)
-        )
-
-        q_chunk_size, kv_chunk_size = calculate_chunk_sizes(hf_config)
-
-        # Check attention tensors
-        assert_attention_tensors_match(
-            original_state_dict, converted_state_dict, q_chunk_size, kv_chunk_size
-        )
-
-        # Check MoE MLP tensors
-        torch.testing.assert_close(
-            original_state_dict["decoder.layers.1.mlp.experts.linear_fc1.weight0"][
-                mcore_config.moe_ffn_hidden_size :
-            ],
-            converted_state_dict["model.layers.1.mlp.experts.0.up_proj.weight"],
-        )
-        torch.testing.assert_close(
-            original_state_dict["decoder.layers.1.mlp.experts.linear_fc1.weight0"][
-                : mcore_config.moe_ffn_hidden_size
-            ],
-            converted_state_dict["model.layers.1.mlp.experts.0.gate_proj.weight"],
-        )
-        torch.testing.assert_close(
-            original_state_dict["decoder.layers.0.mlp.experts.linear_fc2.weight1"],
-            converted_state_dict["model.layers.0.mlp.experts.1.down_proj.weight"],
-        )
-
-
-@pytest.mark.mcore
-def test_conversion_to_hf_dense():
-    """Test conversion of Qwen3 dense model to HF format."""
-    with temporary_distributed_context():
-        mcore_config = dummy_qwen3_megatron_dense_config()
-        hf_config = create_dummy_hf_dense_config()
-
-        original_state_dict, converted_state_dict, hf_config, model = (
-            create_model_and_converter(mcore_config, hf_config, "Qwen3-tiny-test-dense")
-        )
-
-        # Check that the number of keys in the original state dict is equal to the number of keys in the converted state dict minus the number of extra state keys
-        # taking into account the qkv merging and the merging of the up and gate projections
-        assert len(original_state_dict) == len(converted_state_dict) - (
-            3 * hf_config.num_hidden_layers
-        )
-
-        q_chunk_size, kv_chunk_size = calculate_chunk_sizes(hf_config)
-
-        # Check attention tensors
-        assert_attention_tensors_match(
-            original_state_dict, converted_state_dict, q_chunk_size, kv_chunk_size
-        )
-
-        # Check dense MLP tensors
-        torch.testing.assert_close(
-            original_state_dict["decoder.layers.1.mlp.linear_fc1.weight"][
-                mcore_config.ffn_hidden_size :
-            ],
-            converted_state_dict["model.layers.1.mlp.up_proj.weight"],
-        )
-        torch.testing.assert_close(
-            original_state_dict["decoder.layers.1.mlp.linear_fc1.weight"][
-                : mcore_config.ffn_hidden_size
-            ],
-            converted_state_dict["model.layers.1.mlp.gate_proj.weight"],
-        )
-        torch.testing.assert_close(
-            original_state_dict["decoder.layers.0.mlp.linear_fc2.weight"],
-            converted_state_dict["model.layers.0.mlp.down_proj.weight"],
-        )
diff --git a/tools/refit_verifier.py b/tools/refit_verifier.py
new file mode 100644
index 0000000000..67321beb4b
--- /dev/null
+++ b/tools/refit_verifier.py
@@ -0,0 +1,618 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Refitted Policy Comparison Script.
+
+This script compares logprobs between a Megatron policy and a vLLM policy
+after performing model weight refitting. It demonstrates the workflow for
+getting consistent logprobs across different inference backends.
+
+Usage:
+    uv run --extra mcore python3 tools/refit_verifier.py --model_name /path/to/model
+
+
+Example Output:
+
+--- Comparing Logprobs ---
+
+Input prompt: The following are multiple choice questions (with answers) about world religions.
+
+When was the first Buddhist temple constructed in Japan?
+A. 325 CE
+B. 119 CE
+C. 451 CE
+D. 596 CE
+Answer:
+Input tokens: tensor([200000,    954,   2182,    583,   6146,   9031,   5808,    330,   5992,
+      8860,     21,   1509,   3817,  99867,   1574,   7022,    812,    290,
+      1660, 120819,  55594,  24043,    310,  11197,   1044,     45,     26,
+       220,  23325,  13607,    198,     46,     26,    220,  12860,  13607,
+       198,     47,     26,    220,  34518,  13607,    198,     48,     26,
+       220,  43145,  13607,    198,   4984,     38])
+
+Comparing 10 generated tokens (from position 51 to 60):
+vLLM generated logprobs: tensor([-7.0227, -7.1559, -6.4603, -6.7419, -6.3026, -6.8391, -6.3128, -6.6454,
+    -7.1514, -6.8304])
+Megatron generated logprobs: tensor([-7.0225, -7.1873, -6.4600, -6.7418, -6.3027, -6.8704, -6.2502, -6.6453,
+    -7.1518, -6.8304])
+Absolute difference: tensor([2.0981e-04, 3.1348e-02, 2.6035e-04, 1.6689e-04, 1.4973e-04, 3.1272e-02,
+    6.2590e-02, 1.7643e-04, 3.2902e-04, 4.1485e-05])
+Mean absolute difference: 0.012654399499297142
+Max absolute difference: 0.06259012222290039
+
+--- Token-by-Token Comparison (Generated Tokens Only) ---
+Token           Token ID   Position   vLLM         Megatron     Diff
+---------------------------------------------------------------------------
+tok_51          pos_51     51         -7.022674    -7.022464    0.000210
+tok_52          pos_52     52         -7.155923    -7.187271    0.031348
+tok_53          pos_53     53         -6.460307    -6.460047    0.000260
+tok_54          pos_54     54         -6.741926    -6.741759    0.000167
+tok_55          pos_55     55         -6.302569    -6.302719    0.000150
+tok_56          pos_56     56         -6.839099    -6.870371    0.031272
+tok_57          pos_57     57         -6.312774    -6.250184    0.062590
+tok_58          pos_58     58         -6.645445    -6.645269    0.000176
+tok_59          pos_59     59         -7.151441    -7.151770    0.000329
+tok_60          pos_60     60         -6.830355    -6.830397    0.000041
+"""
+
+import argparse
+import copy
+
+import ray
+import torch
+from transformers import AutoTokenizer
+
+from nemo_rl.algorithms.grpo import refit_policy_generation
+from nemo_rl.distributed.batched_data_dict import BatchedDataDict
+from nemo_rl.distributed.virtual_cluster import RayVirtualCluster
+from nemo_rl.models.generation import configure_generation_config
+from nemo_rl.models.generation.vllm import VllmGeneration
+from nemo_rl.models.policy.lm_policy import Policy
+
+
+def parse_args():
+    """Parse command line arguments."""
+    parser = argparse.ArgumentParser(
+        description="Compare Megatron and vLLM policy logprobs after refitting"
+    )
+
+    parser.add_argument(
+        "--model_name",
+        type=str,
+        default="/root/checkpoints/llama4-scout-custom-init",
+        help="Path to the model checkpoint",
+    )
+    parser.add_argument(
+        "--tp_size",
+        type=int,
+        default=1,
+        help="Tensor parallelism size (TP) for Megatron",
+    )
+    parser.add_argument(
+        "--ep_size",
+        type=int,
+        default=1,
+        help="Expert parallelism size (EP) for Megatron",
+    )
+    parser.add_argument(
+        "--pp_size",
+        type=int,
+        default=1,
+        help="Pipeline parallelism size (PP) for Megatron",
+    )
+    parser.add_argument(
+        "--max_new_tokens",
+        type=int,
+        default=10,
+        help="Maximum number of new tokens to generate",
+    )
+    parser.add_argument(
+        "--max_sequence_length",
+        type=int,
+        default=256,
+        help="Maximum total sequence length",
+    )
+    parser.add_argument(
+        "--refit_buffer_size_gb", type=int, default=4, help="Refit buffer size in GB"
+    )
+    parser.add_argument(
+        "--prompt",
+        type=str,
+        default="Here is a short introduction to me:",
+        help="Input prompt for generation",
+    )
+
+    return parser.parse_args()
+
+
+def setup_configs(args, tokenizer):
+    """Setup configuration dictionaries for Megatron and vLLM.
+
+    Args:
+        args: Parsed command line arguments
+        tokenizer: HuggingFace tokenizer
+
+    Returns:
+        tuple: (megatron_config, vllm_config)
+    """
+    # Megatron Configuration
+    megatron_config = {
+        "model_name": args.model_name,
+        "training_backend": "megatron",
+        "train_global_batch_size": 1,
+        "train_micro_batch_size": 1,
+        "generation_batch_size": 2,
+        "learning_rate": 0.0001,
+        "logprob_batch_size": 1,
+        "generation": {
+            "max_total_sequence_length": args.max_sequence_length,
+            "max_new_tokens": args.max_sequence_length,
+            "do_sample": False,
+            "pad_token_id": tokenizer.eos_token_id,
+            "colocated": {
+                "enabled": True,
+                "resources": {
+                    "gpus_per_node": None,
+                    "num_nodes": None,
+                },
+            },
+        },
+        "precision": "bfloat16",
+        "pipeline_dtype": "bfloat16",
+        "parallel_output": True,
+        "max_total_sequence_length": args.max_sequence_length,
+        "fsdp_offload_enabled": False,
+        "max_grad_norm": 1.0,
+        "refit_buffer_size_gb": args.refit_buffer_size_gb,
+        "make_sequence_length_divisible_by": args.tp_size,
+        "optimizer": {
+            "type": "adam",
+            "kwargs": {
+                "lr": 0.0001,
+                "weight_decay": 0.0,
+                "eps": 1e-8,
+            },
+        },
+        "dtensor_cfg": {
+            "enabled": False,
+        },
+        "dynamic_batching": {
+            "enabled": False,
+            "train_mb_tokens": 256,
+            "logprob_mb_tokens": 256,
+            "sequence_length_round": 64,
+        },
+        "sequence_packing": {
+            "enabled": False,
+        },
+        "megatron_cfg": {
+            "enabled": True,
+            "empty_unused_memory_level": 1,
+            "tensor_model_parallel_size": args.tp_size,
+            "sequence_parallel": False,
+            "expert_tensor_parallel_size": args.tp_size,
+            "expert_model_parallel_size": args.ep_size,
+            "pipeline_model_parallel_size": args.pp_size,
+            "context_parallel_size": 1,
+            "num_layers_in_first_pipeline_stage": None,
+            "num_layers_in_last_pipeline_stage": None,
+            "activation_checkpointing": False,
+            "moe_router_dtype": "fp64",
+            "moe_router_load_balancing_type": "none",
+            "moe_router_bias_update_rate": 0.0,
+            "pipeline_dtype": "bfloat16",
+            "freeze_moe_router": False,
+            "apply_rope_fusion": False,
+            "optimizer": {
+                "optimizer": "adam",
+                "lr": 5.0e-6,
+                "min_lr": 5.0e-7,
+                "weight_decay": 0.01,
+                "bf16": False,
+                "fp16": False,
+                "params_dtype": "float32",
+                # Adam optimizer settings
+                "adam_beta1": 0.9,
+                "adam_beta2": 0.999,
+                "adam_eps": 1e-8,
+                # SGD optimizer settings
+                "sgd_momentum": 0.9,
+                # Distributed optimizer settings
+                "use_distributed_optimizer": True,
+                "use_precision_aware_optimizer": True,
+                "clip_grad": 1.0,
+            },
+            "scheduler": {
+                "start_weight_decay": 0.01,
+                "end_weight_decay": 0.01,
+                "weight_decay_incr_style": "constant",
+                "lr_decay_style": "constant",
+                "lr_decay_iters": None,
+                "lr_warmup_iters": 50,
+                "lr_warmup_init": 5.0e-7,
+            },
+            "distributed_data_parallel_config": {
+                "grad_reduce_in_fp32": False,
+                "overlap_grad_reduce": False,
+                "overlap_param_gather": False,
+                "average_in_collective": False,
+                "use_custom_fsdp": False,
+                "data_parallel_sharding_strategy": "optim_grads_params",
+            },
+        },
+    }
+
+    # vLLM Configuration
+    vllm_tp = args.tp_size * args.ep_size * args.pp_size
+    vllm_config = {
+        "backend": "vllm",
+        "model_name": args.model_name,
+        "tokenizer": {
+            "name": args.model_name,
+        },
+        "dtype": "bfloat16",
+        "max_new_tokens": args.max_new_tokens,
+        "temperature": 1.0,
+        "top_p": 1.0,
+        "top_k": None,
+        "stop_token_ids": None,
+        "stop_strings": None,
+        "vllm_cfg": {
+            "tensor_parallel_size": vllm_tp,
+            "pipeline_parallel_size": 1,
+            "gpu_memory_utilization": 0.6,
+            "max_model_len": args.max_sequence_length,
+            "precision": "bfloat16",
+            "async_engine": False,
+            "skip_tokenizer_init": False,
+            "load_format": "dummy",
+            "enforce_eager": "False",
+        },
+        "colocated": {
+            "enabled": True,
+            "resources": {
+                "gpus_per_node": None,
+                "num_nodes": None,
+            },
+        },
+        "vllm_kwargs": {},
+    }
+
+    # Configure vLLM with tokenizer
+    vllm_config = configure_generation_config(vllm_config, tokenizer)
+
+    return megatron_config, vllm_config
+
+
+def setup_clusters_and_policies(args, megatron_config, vllm_config, tokenizer):
+    """Setup Ray clusters and initialize policies.
+
+    Args:
+        args: Parsed command line arguments
+        megatron_config: Megatron configuration dictionary
+        vllm_config: vLLM configuration dictionary
+        tokenizer: HuggingFace tokenizer
+
+    Returns:
+        tuple: (megatron_cluster, policy, vllm_inference_policy)
+    """
+    gpus_per_node = args.tp_size * args.ep_size * args.pp_size
+    print(f"Setting up Megatron Cluster with TP={gpus_per_node}")
+    megatron_cluster = RayVirtualCluster(
+        name="megatron_cluster",
+        bundle_ct_per_node_list=[gpus_per_node],
+        use_gpus=True,
+        num_gpus_per_node=gpus_per_node,
+        max_colocated_worker_groups=2,
+    )
+
+    print("Instantiating Policy with Megatron backend...")
+    policy = Policy(
+        cluster=megatron_cluster,
+        config=megatron_config,
+        tokenizer=tokenizer,
+        init_reference_model=False,
+        init_optimizer=False,
+    )
+
+    # Create vLLM inference configuration with limited generation
+    vllm_inference_config = vllm_config.copy()
+    vllm_inference_config["max_new_tokens"] = args.max_new_tokens
+    vllm_inference_config = configure_generation_config(
+        vllm_inference_config, tokenizer
+    )
+
+    # Create vLLM policy for inference-only logprobs
+    vllm_inference_policy = VllmGeneration(
+        cluster=megatron_cluster, config=vllm_inference_config
+    )
+
+    return megatron_cluster, policy, vllm_inference_policy
+
+
+def prepare_input_data(prompt, tokenizer):
+    """Tokenize the input prompt and prepare generation data.
+
+    Args:
+        prompt: Input text prompt
+        tokenizer: HuggingFace tokenizer
+
+    Returns:
+        BatchedDataDict: Prepared input data
+    """
+    print("Preparing input data...")
+
+    # Tokenize the prompt
+    tokenized = tokenizer(
+        [prompt],
+        padding=True,
+        truncation=True,
+        return_tensors="pt",
+        padding_side="right",
+    )
+
+    # Calculate input lengths from attention mask
+    input_ids = tokenized["input_ids"]
+    attention_mask = tokenized["attention_mask"]
+    input_lengths = attention_mask.sum(dim=1).to(torch.int32)
+
+    generation_data = BatchedDataDict(
+        {
+            "input_ids": input_ids,
+            "input_lengths": input_lengths,
+        }
+    )
+
+    return generation_data
+
+
+def run_model_refitting(policy, vllm_inference_policy, refit_buffer_size_gb):
+    """Perform model weight refitting between Megatron and vLLM policies.
+
+    Args:
+        policy: Megatron policy
+        vllm_inference_policy: vLLM inference policy
+        refit_buffer_size_gb: Buffer size for refitting in GB
+    """
+    print("\n--- Performing Model Refitting ---")
+
+    # Perform the refitting between policies using GRPO's refit function
+    # Note: colocated_inference=True since we're using the same cluster
+    refit_policy_generation(
+        policy,
+        vllm_inference_policy,
+        colocated_inference=True,
+        _refit_buffer_size_gb=refit_buffer_size_gb,
+    )
+    print("Model refitting completed")
+
+
+def generate_and_compare_logprobs(policy, vllm_inference_policy, generation_data):
+    """Generate outputs and compare logprobs between vLLM and Megatron policies.
+
+    Args:
+        policy: Megatron policy
+        vllm_inference_policy: vLLM inference policy
+        generation_data: Input data for generation
+
+    Returns:
+        tuple: (vllm_logprobs_data, megatron_generation_data)
+    """
+    # Generate with vLLM for logprobs
+    print("\n--- Getting vLLM Policy Logprobs ---")
+    vllm_logprobs_data = vllm_inference_policy.generate(generation_data, greedy=True)
+    print(f"vLLM Logprobs shape: {vllm_logprobs_data['logprobs'].shape}")
+    print(f"vLLM Logprobs sample: {vllm_logprobs_data['logprobs'][0, -10:]}")
+
+    # Generate with Megatron policy
+    print("\n--- Getting Megatron Generation ---")
+    policy.prepare_for_generation()
+
+    # Prepare input data for Megatron using vLLM outputs
+    megatron_input_data = copy.deepcopy(generation_data)
+    print("=" * 100)
+    print(megatron_input_data)
+    print(vllm_logprobs_data)
+    megatron_input_data["input_ids"] = vllm_logprobs_data["output_ids"]
+    megatron_input_data["input_lengths"] = vllm_logprobs_data[
+        "unpadded_sequence_lengths"
+    ]
+
+    # Get logprobs from Megatron
+    policy.prepare_for_lp_inference()
+    megatron_generation_data = policy.get_logprobs(megatron_input_data)
+    print(f"Megatron Generation shape: {megatron_generation_data['logprobs'].shape}")
+    print(
+        f"Megatron Generation sample: {megatron_generation_data['logprobs'][0, -10:]}"
+    )
+
+    return vllm_logprobs_data, megatron_generation_data
+
+
+def analyze_logprob_differences(
+    vllm_logprobs_data, megatron_generation_data, generation_data, tokenizer, prompt
+):
+    """Analyze and display differences between vLLM and Megatron logprobs.
+
+    Args:
+        vllm_logprobs_data: vLLM generation results
+        megatron_generation_data: Megatron generation results
+        generation_data: Original input data
+        tokenizer: HuggingFace tokenizer
+        prompt: Original input prompt
+    """
+    print("\n--- Comparing Logprobs ---")
+    print(f"Input prompt: {prompt}")
+    print(
+        f"Input tokens: {generation_data['input_ids'][0, : generation_data['input_lengths'][0]]}"
+    )
+
+    # Extract generation parameters
+    input_length = generation_data["input_lengths"][0].item()
+    total_length = vllm_logprobs_data["logprobs"].shape[1]
+    generated_length = vllm_logprobs_data["generation_lengths"][0].item()
+
+    if generated_length > 0:
+        print(
+            f"\nComparing {generated_length} generated tokens (from position {input_length} to {total_length - 1}):"
+        )
+
+        # Extract generated logprobs
+        vllm_gen_logprobs = vllm_logprobs_data["logprobs"][0, input_length:total_length]
+        megatron_gen_logprobs = megatron_generation_data["logprobs"][
+            0, input_length:total_length
+        ]
+
+        print(f"vLLM generated logprobs: {vllm_gen_logprobs}")
+        print(f"Megatron generated logprobs: {megatron_gen_logprobs}")
+
+        # Calculate and display differences
+        abs_diff = torch.abs(vllm_gen_logprobs - megatron_gen_logprobs)
+        print(f"Absolute difference: {abs_diff}")
+        print(f"Mean absolute difference: {torch.mean(abs_diff)}")
+        print(f"Max absolute difference: {torch.max(abs_diff)}")
+
+        # Detailed token-by-token comparison
+        _detailed_token_comparison(
+            vllm_gen_logprobs,
+            megatron_gen_logprobs,
+            vllm_logprobs_data,
+            input_length,
+            total_length,
+            tokenizer,
+        )
+    else:
+        print(
+            f"No generated tokens to compare (input_length: {input_length}, total_length: {total_length})"
+        )
+
+
+def _detailed_token_comparison(
+    vllm_logprobs,
+    megatron_logprobs,
+    vllm_logprobs_data,
+    input_length,
+    total_length,
+    tokenizer,
+):
+    """Display detailed token-by-token comparison of logprobs.
+
+    Args:
+        vllm_logprobs: vLLM logprobs for generated tokens
+        megatron_logprobs: Megatron logprobs for generated tokens
+        vllm_logprobs_data: Vllm generation data
+        input_length: Length of input sequence
+        total_length: Total sequence length
+        tokenizer: HuggingFace tokenizer
+    """
+    print("\n--- Token-by-Token Comparison (Generated Tokens Only) ---")
+
+    if total_length > input_length:
+        # Get generated tokens if available
+        if "output_ids" in vllm_logprobs_data:
+            generated_tokens = vllm_logprobs_data["output_ids"][
+                0, input_length:total_length
+            ]
+        else:
+            generated_tokens = torch.arange(input_length, total_length)
+
+        # Display header
+        print(
+            f"{'Token':<15} {'Token ID':<10} {'Position':<10} {'vLLM':<12} {'Megatron':<12} {'Diff':<12}"
+        )
+        print("-" * 75)
+
+        # Display each token comparison
+        for i, pos in enumerate(range(input_length, total_length)):
+            if "output_ids" in vllm_logprobs_data:
+                token_id = generated_tokens[i].item()
+                token_text = tokenizer.decode([token_id])
+            else:
+                token_id = f"pos_{pos}"
+                token_text = f"tok_{pos}"
+
+            vllm_lp = vllm_logprobs[i].item()
+            megatron_lp = megatron_logprobs[i].item()
+            diff = abs(vllm_lp - megatron_lp)
+
+            print(
+                f"{token_text:<15} {token_id:<10} {pos:<10} {vllm_lp:<12.6f} {megatron_lp:<12.6f} {diff:<12.6f}"
+            )
+    else:
+        print("No generated tokens to compare in detail.")
+
+
+def cleanup_resources(vllm_inference_policy):
+    """Clean up resources and shutdown policies.
+
+    Args:
+        vllm_inference_policy: vLLM policy to shutdown
+    """
+    print("\n--- Cleaning up ---")
+    vllm_inference_policy.shutdown()
+    print("Cleanup completed successfully!")
+
+
+def main():
+    """Main execution function."""
+    # Parse command line arguments
+    args = parse_args()
+
+    # Initialize Ray
+    ray.init()
+
+    # Setup tokenizer
+    tokenizer = AutoTokenizer.from_pretrained(args.model_name)
+    if tokenizer.pad_token_id is None:
+        tokenizer.pad_token_id = tokenizer.eos_token_id
+
+    # Setup configurations
+    megatron_config, vllm_config = setup_configs(args, tokenizer)
+
+    # Setup clusters and policies
+    megatron_cluster, policy, vllm_inference_policy = setup_clusters_and_policies(
+        args, megatron_config, vllm_config, tokenizer
+    )
+
+    # Prepare input data
+    generation_data = prepare_input_data(args.prompt, tokenizer)
+
+    # prepare refit info
+    state_dict_info = policy.prepare_refit_info()
+    vllm_inference_policy.prepare_refit_info(state_dict_info)
+
+    # Perform model refitting
+    run_model_refitting(policy, vllm_inference_policy, args.refit_buffer_size_gb)
+
+    # Generate and compare logprobs
+    vllm_logprobs_data, megatron_generation_data = generate_and_compare_logprobs(
+        policy, vllm_inference_policy, generation_data
+    )
+
+    # Analyze differences
+    analyze_logprob_differences(
+        vllm_logprobs_data,
+        megatron_generation_data,
+        generation_data,
+        tokenizer,
+        args.prompt,
+    )
+
+    # Cleanup
+    cleanup_resources(vllm_inference_policy)
+
+    print("Script completed successfully!")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/uv.lock b/uv.lock
index 336c0853c9..91580d7d37 100644
--- a/uv.lock
+++ b/uv.lock
@@ -4,18 +4,24 @@ requires-python = ">=3.12"
 resolution-markers = [
     "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'",
     "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'",
-    "python_full_version >= '3.13' and sys_platform != 'linux'",
-    "python_full_version < '3.13' and sys_platform != 'linux'",
+    "python_full_version >= '3.13' and sys_platform == 'win32'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin'",
+    "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version < '3.13' and sys_platform == 'win32'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version < '3.13' and sys_platform == 'darwin'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
     "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
     "python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
 ]
 
 [manifest]
 members = [
+    "megatron-bridge",
     "megatron-core",
     "nemo-automodel",
     "nemo-rl",
-    "nemo-tron",
 ]
 
 [[manifest.dependency-metadata]]
@@ -278,56 +284,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/df/73/b6e24bd22e6720ca8ee9a85a0c4a2971af8497d8f3193fa05390cbd46e09/backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8", size = 15148, upload-time = "2022-10-05T19:19:30.546Z" },
 ]
 
-[[package]]
-name = "bcrypt"
-version = "4.3.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/bb/5d/6d7433e0f3cd46ce0b43cd65e1db465ea024dbb8216fb2404e919c2ad77b/bcrypt-4.3.0.tar.gz", hash = "sha256:3a3fd2204178b6d2adcf09cb4f6426ffef54762577a7c9b54c159008cb288c18", size = 25697, upload-time = "2025-02-28T01:24:09.174Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/bf/2c/3d44e853d1fe969d229bd58d39ae6902b3d924af0e2b5a60d17d4b809ded/bcrypt-4.3.0-cp313-cp313t-macosx_10_12_universal2.whl", hash = "sha256:f01e060f14b6b57bbb72fc5b4a83ac21c443c9a2ee708e04a10e9192f90a6281", size = 483719, upload-time = "2025-02-28T01:22:34.539Z" },
-    { url = "https://files.pythonhosted.org/packages/a1/e2/58ff6e2a22eca2e2cff5370ae56dba29d70b1ea6fc08ee9115c3ae367795/bcrypt-4.3.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5eeac541cefd0bb887a371ef73c62c3cd78535e4887b310626036a7c0a817bb", size = 272001, upload-time = "2025-02-28T01:22:38.078Z" },
-    { url = "https://files.pythonhosted.org/packages/37/1f/c55ed8dbe994b1d088309e366749633c9eb90d139af3c0a50c102ba68a1a/bcrypt-4.3.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:59e1aa0e2cd871b08ca146ed08445038f42ff75968c7ae50d2fdd7860ade2180", size = 277451, upload-time = "2025-02-28T01:22:40.787Z" },
-    { url = "https://files.pythonhosted.org/packages/d7/1c/794feb2ecf22fe73dcfb697ea7057f632061faceb7dcf0f155f3443b4d79/bcrypt-4.3.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:0042b2e342e9ae3d2ed22727c1262f76cc4f345683b5c1715f0250cf4277294f", size = 272792, upload-time = "2025-02-28T01:22:43.144Z" },
-    { url = "https://files.pythonhosted.org/packages/13/b7/0b289506a3f3598c2ae2bdfa0ea66969812ed200264e3f61df77753eee6d/bcrypt-4.3.0-cp313-cp313t-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74a8d21a09f5e025a9a23e7c0fd2c7fe8e7503e4d356c0a2c1486ba010619f09", size = 289752, upload-time = "2025-02-28T01:22:45.56Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/24/d0fb023788afe9e83cc118895a9f6c57e1044e7e1672f045e46733421fe6/bcrypt-4.3.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:0142b2cb84a009f8452c8c5a33ace5e3dfec4159e7735f5afe9a4d50a8ea722d", size = 277762, upload-time = "2025-02-28T01:22:47.023Z" },
-    { url = "https://files.pythonhosted.org/packages/e4/38/cde58089492e55ac4ef6c49fea7027600c84fd23f7520c62118c03b4625e/bcrypt-4.3.0-cp313-cp313t-manylinux_2_34_aarch64.whl", hash = "sha256:12fa6ce40cde3f0b899729dbd7d5e8811cb892d31b6f7d0334a1f37748b789fd", size = 272384, upload-time = "2025-02-28T01:22:49.221Z" },
-    { url = "https://files.pythonhosted.org/packages/de/6a/d5026520843490cfc8135d03012a413e4532a400e471e6188b01b2de853f/bcrypt-4.3.0-cp313-cp313t-manylinux_2_34_x86_64.whl", hash = "sha256:5bd3cca1f2aa5dbcf39e2aa13dd094ea181f48959e1071265de49cc2b82525af", size = 277329, upload-time = "2025-02-28T01:22:51.603Z" },
-    { url = "https://files.pythonhosted.org/packages/b3/a3/4fc5255e60486466c389e28c12579d2829b28a527360e9430b4041df4cf9/bcrypt-4.3.0-cp313-cp313t-musllinux_1_1_aarch64.whl", hash = "sha256:335a420cfd63fc5bc27308e929bee231c15c85cc4c496610ffb17923abf7f231", size = 305241, upload-time = "2025-02-28T01:22:53.283Z" },
-    { url = "https://files.pythonhosted.org/packages/c7/15/2b37bc07d6ce27cc94e5b10fd5058900eb8fb11642300e932c8c82e25c4a/bcrypt-4.3.0-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:0e30e5e67aed0187a1764911af023043b4542e70a7461ad20e837e94d23e1d6c", size = 309617, upload-time = "2025-02-28T01:22:55.461Z" },
-    { url = "https://files.pythonhosted.org/packages/5f/1f/99f65edb09e6c935232ba0430c8c13bb98cb3194b6d636e61d93fe60ac59/bcrypt-4.3.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:3b8d62290ebefd49ee0b3ce7500f5dbdcf13b81402c05f6dafab9a1e1b27212f", size = 335751, upload-time = "2025-02-28T01:22:57.81Z" },
-    { url = "https://files.pythonhosted.org/packages/00/1b/b324030c706711c99769988fcb694b3cb23f247ad39a7823a78e361bdbb8/bcrypt-4.3.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:2ef6630e0ec01376f59a006dc72918b1bf436c3b571b80fa1968d775fa02fe7d", size = 355965, upload-time = "2025-02-28T01:22:59.181Z" },
-    { url = "https://files.pythonhosted.org/packages/aa/dd/20372a0579dd915dfc3b1cd4943b3bca431866fcb1dfdfd7518c3caddea6/bcrypt-4.3.0-cp313-cp313t-win32.whl", hash = "sha256:7a4be4cbf241afee43f1c3969b9103a41b40bcb3a3f467ab19f891d9bc4642e4", size = 155316, upload-time = "2025-02-28T01:23:00.763Z" },
-    { url = "https://files.pythonhosted.org/packages/6d/52/45d969fcff6b5577c2bf17098dc36269b4c02197d551371c023130c0f890/bcrypt-4.3.0-cp313-cp313t-win_amd64.whl", hash = "sha256:5c1949bf259a388863ced887c7861da1df681cb2388645766c89fdfd9004c669", size = 147752, upload-time = "2025-02-28T01:23:02.908Z" },
-    { url = "https://files.pythonhosted.org/packages/11/22/5ada0b9af72b60cbc4c9a399fdde4af0feaa609d27eb0adc61607997a3fa/bcrypt-4.3.0-cp38-abi3-macosx_10_12_universal2.whl", hash = "sha256:f81b0ed2639568bf14749112298f9e4e2b28853dab50a8b357e31798686a036d", size = 498019, upload-time = "2025-02-28T01:23:05.838Z" },
-    { url = "https://files.pythonhosted.org/packages/b8/8c/252a1edc598dc1ce57905be173328eda073083826955ee3c97c7ff5ba584/bcrypt-4.3.0-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:864f8f19adbe13b7de11ba15d85d4a428c7e2f344bac110f667676a0ff84924b", size = 279174, upload-time = "2025-02-28T01:23:07.274Z" },
-    { url = "https://files.pythonhosted.org/packages/29/5b/4547d5c49b85f0337c13929f2ccbe08b7283069eea3550a457914fc078aa/bcrypt-4.3.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3e36506d001e93bffe59754397572f21bb5dc7c83f54454c990c74a468cd589e", size = 283870, upload-time = "2025-02-28T01:23:09.151Z" },
-    { url = "https://files.pythonhosted.org/packages/be/21/7dbaf3fa1745cb63f776bb046e481fbababd7d344c5324eab47f5ca92dd2/bcrypt-4.3.0-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:842d08d75d9fe9fb94b18b071090220697f9f184d4547179b60734846461ed59", size = 279601, upload-time = "2025-02-28T01:23:11.461Z" },
-    { url = "https://files.pythonhosted.org/packages/6d/64/e042fc8262e971347d9230d9abbe70d68b0a549acd8611c83cebd3eaec67/bcrypt-4.3.0-cp38-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:7c03296b85cb87db865d91da79bf63d5609284fc0cab9472fdd8367bbd830753", size = 297660, upload-time = "2025-02-28T01:23:12.989Z" },
-    { url = "https://files.pythonhosted.org/packages/50/b8/6294eb84a3fef3b67c69b4470fcdd5326676806bf2519cda79331ab3c3a9/bcrypt-4.3.0-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:62f26585e8b219cdc909b6a0069efc5e4267e25d4a3770a364ac58024f62a761", size = 284083, upload-time = "2025-02-28T01:23:14.5Z" },
-    { url = "https://files.pythonhosted.org/packages/62/e6/baff635a4f2c42e8788fe1b1633911c38551ecca9a749d1052d296329da6/bcrypt-4.3.0-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:beeefe437218a65322fbd0069eb437e7c98137e08f22c4660ac2dc795c31f8bb", size = 279237, upload-time = "2025-02-28T01:23:16.686Z" },
-    { url = "https://files.pythonhosted.org/packages/39/48/46f623f1b0c7dc2e5de0b8af5e6f5ac4cc26408ac33f3d424e5ad8da4a90/bcrypt-4.3.0-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:97eea7408db3a5bcce4a55d13245ab3fa566e23b4c67cd227062bb49e26c585d", size = 283737, upload-time = "2025-02-28T01:23:18.897Z" },
-    { url = "https://files.pythonhosted.org/packages/49/8b/70671c3ce9c0fca4a6cc3cc6ccbaa7e948875a2e62cbd146e04a4011899c/bcrypt-4.3.0-cp38-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:191354ebfe305e84f344c5964c7cd5f924a3bfc5d405c75ad07f232b6dffb49f", size = 312741, upload-time = "2025-02-28T01:23:21.041Z" },
-    { url = "https://files.pythonhosted.org/packages/27/fb/910d3a1caa2d249b6040a5caf9f9866c52114d51523ac2fb47578a27faee/bcrypt-4.3.0-cp38-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:41261d64150858eeb5ff43c753c4b216991e0ae16614a308a15d909503617732", size = 316472, upload-time = "2025-02-28T01:23:23.183Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/cf/7cf3a05b66ce466cfb575dbbda39718d45a609daa78500f57fa9f36fa3c0/bcrypt-4.3.0-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:33752b1ba962ee793fa2b6321404bf20011fe45b9afd2a842139de3011898fef", size = 343606, upload-time = "2025-02-28T01:23:25.361Z" },
-    { url = "https://files.pythonhosted.org/packages/e3/b8/e970ecc6d7e355c0d892b7f733480f4aa8509f99b33e71550242cf0b7e63/bcrypt-4.3.0-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:50e6e80a4bfd23a25f5c05b90167c19030cf9f87930f7cb2eacb99f45d1c3304", size = 362867, upload-time = "2025-02-28T01:23:26.875Z" },
-    { url = "https://files.pythonhosted.org/packages/a9/97/8d3118efd8354c555a3422d544163f40d9f236be5b96c714086463f11699/bcrypt-4.3.0-cp38-abi3-win32.whl", hash = "sha256:67a561c4d9fb9465ec866177e7aebcad08fe23aaf6fbd692a6fab69088abfc51", size = 160589, upload-time = "2025-02-28T01:23:28.381Z" },
-    { url = "https://files.pythonhosted.org/packages/29/07/416f0b99f7f3997c69815365babbc2e8754181a4b1899d921b3c7d5b6f12/bcrypt-4.3.0-cp38-abi3-win_amd64.whl", hash = "sha256:584027857bc2843772114717a7490a37f68da563b3620f78a849bcb54dc11e62", size = 152794, upload-time = "2025-02-28T01:23:30.187Z" },
-    { url = "https://files.pythonhosted.org/packages/6e/c1/3fa0e9e4e0bfd3fd77eb8b52ec198fd6e1fd7e9402052e43f23483f956dd/bcrypt-4.3.0-cp39-abi3-macosx_10_12_universal2.whl", hash = "sha256:0d3efb1157edebfd9128e4e46e2ac1a64e0c1fe46fb023158a407c7892b0f8c3", size = 498969, upload-time = "2025-02-28T01:23:31.945Z" },
-    { url = "https://files.pythonhosted.org/packages/ce/d4/755ce19b6743394787fbd7dff6bf271b27ee9b5912a97242e3caf125885b/bcrypt-4.3.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:08bacc884fd302b611226c01014eca277d48f0a05187666bca23aac0dad6fe24", size = 279158, upload-time = "2025-02-28T01:23:34.161Z" },
-    { url = "https://files.pythonhosted.org/packages/9b/5d/805ef1a749c965c46b28285dfb5cd272a7ed9fa971f970435a5133250182/bcrypt-4.3.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f6746e6fec103fcd509b96bacdfdaa2fbde9a553245dbada284435173a6f1aef", size = 284285, upload-time = "2025-02-28T01:23:35.765Z" },
-    { url = "https://files.pythonhosted.org/packages/ab/2b/698580547a4a4988e415721b71eb45e80c879f0fb04a62da131f45987b96/bcrypt-4.3.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:afe327968aaf13fc143a56a3360cb27d4ad0345e34da12c7290f1b00b8fe9a8b", size = 279583, upload-time = "2025-02-28T01:23:38.021Z" },
-    { url = "https://files.pythonhosted.org/packages/f2/87/62e1e426418204db520f955ffd06f1efd389feca893dad7095bf35612eec/bcrypt-4.3.0-cp39-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:d9af79d322e735b1fc33404b5765108ae0ff232d4b54666d46730f8ac1a43676", size = 297896, upload-time = "2025-02-28T01:23:39.575Z" },
-    { url = "https://files.pythonhosted.org/packages/cb/c6/8fedca4c2ada1b6e889c52d2943b2f968d3427e5d65f595620ec4c06fa2f/bcrypt-4.3.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:f1e3ffa1365e8702dc48c8b360fef8d7afeca482809c5e45e653af82ccd088c1", size = 284492, upload-time = "2025-02-28T01:23:40.901Z" },
-    { url = "https://files.pythonhosted.org/packages/4d/4d/c43332dcaaddb7710a8ff5269fcccba97ed3c85987ddaa808db084267b9a/bcrypt-4.3.0-cp39-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:3004df1b323d10021fda07a813fd33e0fd57bef0e9a480bb143877f6cba996fe", size = 279213, upload-time = "2025-02-28T01:23:42.653Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/7f/1e36379e169a7df3a14a1c160a49b7b918600a6008de43ff20d479e6f4b5/bcrypt-4.3.0-cp39-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:531457e5c839d8caea9b589a1bcfe3756b0547d7814e9ce3d437f17da75c32b0", size = 284162, upload-time = "2025-02-28T01:23:43.964Z" },
-    { url = "https://files.pythonhosted.org/packages/1c/0a/644b2731194b0d7646f3210dc4d80c7fee3ecb3a1f791a6e0ae6bb8684e3/bcrypt-4.3.0-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:17a854d9a7a476a89dcef6c8bd119ad23e0f82557afbd2c442777a16408e614f", size = 312856, upload-time = "2025-02-28T01:23:46.011Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/62/2a871837c0bb6ab0c9a88bf54de0fc021a6a08832d4ea313ed92a669d437/bcrypt-4.3.0-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:6fb1fd3ab08c0cbc6826a2e0447610c6f09e983a281b919ed721ad32236b8b23", size = 316726, upload-time = "2025-02-28T01:23:47.575Z" },
-    { url = "https://files.pythonhosted.org/packages/0c/a1/9898ea3faac0b156d457fd73a3cb9c2855c6fd063e44b8522925cdd8ce46/bcrypt-4.3.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:e965a9c1e9a393b8005031ff52583cedc15b7884fce7deb8b0346388837d6cfe", size = 343664, upload-time = "2025-02-28T01:23:49.059Z" },
-    { url = "https://files.pythonhosted.org/packages/40/f2/71b4ed65ce38982ecdda0ff20c3ad1b15e71949c78b2c053df53629ce940/bcrypt-4.3.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:79e70b8342a33b52b55d93b3a59223a844962bef479f6a0ea318ebbcadf71505", size = 363128, upload-time = "2025-02-28T01:23:50.399Z" },
-    { url = "https://files.pythonhosted.org/packages/11/99/12f6a58eca6dea4be992d6c681b7ec9410a1d9f5cf368c61437e31daa879/bcrypt-4.3.0-cp39-abi3-win32.whl", hash = "sha256:b4d4e57f0a63fd0b358eb765063ff661328f69a04494427265950c71b992a39a", size = 160598, upload-time = "2025-02-28T01:23:51.775Z" },
-    { url = "https://files.pythonhosted.org/packages/a9/cf/45fb5261ece3e6b9817d3d82b2f343a505fd58674a92577923bc500bd1aa/bcrypt-4.3.0-cp39-abi3-win_amd64.whl", hash = "sha256:e53e074b120f2877a35cc6c736b8eb161377caae8925c17688bd46ba56daaa5b", size = 152799, upload-time = "2025-02-28T01:23:53.139Z" },
-]
-
 [[package]]
 name = "beautifulsoup4"
 version = "4.13.4"
@@ -422,15 +378,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ed/4d/1392562369b1139e741b30d624f09fe7091d17dd5579fae5732f044b12bb/blobfile-3.0.0-py3-none-any.whl", hash = "sha256:48ecc3307e622804bd8fe13bf6f40e6463c4439eba7a1f9ad49fd78aa63cc658", size = 75413, upload-time = "2024-08-27T00:02:51.518Z" },
 ]
 
-[[package]]
-name = "braceexpand"
-version = "0.1.7"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/54/93/badd4f5ccf25209f3fef2573073da9fe4a45a3da99fca2f800f942130c0f/braceexpand-0.1.7.tar.gz", hash = "sha256:e6e539bd20eaea53547472ff94f4fb5c3d3bf9d0a89388c4b56663aba765f705", size = 7777, upload-time = "2021-05-07T13:49:07.323Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/fa/93/e8c04e80e82391a6e51f218ca49720f64236bc824e92152a2633b74cf7ab/braceexpand-0.1.7-py2.py3-none-any.whl", hash = "sha256:91332d53de7828103dcae5773fb43bc34950b0c8160e35e0f44c4427a3b85014", size = 5923, upload-time = "2021-05-07T13:49:05.146Z" },
-]
-
 [[package]]
 name = "cachetools"
 version = "5.5.2"
@@ -440,15 +387,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/72/76/20fa66124dbe6be5cafeb312ece67de6b61dd91a0247d1ea13db4ebb33c2/cachetools-5.5.2-py3-none-any.whl", hash = "sha256:d26a22bcc62eb95c3beabd9f1ee5e820d3d2704fe2967cbe350e20c8ffcd3f0a", size = 10080, upload-time = "2025-02-20T21:01:16.647Z" },
 ]
 
-[[package]]
-name = "catalogue"
-version = "2.0.10"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/38/b4/244d58127e1cdf04cf2dc7d9566f0d24ef01d5ce21811bab088ecc62b5ea/catalogue-2.0.10.tar.gz", hash = "sha256:4f56daa940913d3f09d589c191c74e5a6d51762b3a9e37dd53b7437afd6cda15", size = 19561, upload-time = "2023-09-25T06:29:24.962Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/9e/96/d32b941a501ab566a16358d68b6eb4e4acc373fab3c3c4d7d9e649f7b4bb/catalogue-2.0.10-py3-none-any.whl", hash = "sha256:58c2de0020aa90f4a2da7dfad161bf7b3b054c86a5f09fcedc0b2b740c109a9f", size = 17325, upload-time = "2023-09-25T06:29:23.337Z" },
-]
-
 [[package]]
 name = "causal-conv1d"
 version = "1.5.0.post8"
@@ -624,6 +562,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/6f/0d/a10351ef1a98e0b03d66887ec2d87c261f9a0fbff8f2bdb75614cc0a2850/colored-2.2.3-py3-none-any.whl", hash = "sha256:1318b2fb8e0313d39724b8ab0707af79d1e2c0e60710b608a00e70fe0f84ff5d", size = 16108, upload-time = "2023-07-12T17:47:49.682Z" },
 ]
 
+[[package]]
+name = "coloredlogs"
+version = "15.0.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "humanfriendly" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/cc/c7/eed8f27100517e8c0e6b923d5f0845d0cb99763da6fdee00478f91db7325/coloredlogs-15.0.1.tar.gz", hash = "sha256:7c991aa71a4577af2f82600d8f8f3a89f936baeaf9b50a9c197da014e5bf16b0", size = 278520, upload-time = "2021-06-11T10:22:45.202Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a7/06/3d6badcf13db419e25b07041d9c7b4a2c331d3f4e7134445ec5df57714cd/coloredlogs-15.0.1-py2.py3-none-any.whl", hash = "sha256:612ee75c546f53e92e70049c9dbfcc18c935a2b9a53b66085ce9ef6a6e5c0934", size = 46018, upload-time = "2021-06-11T10:22:42.561Z" },
+]
+
 [[package]]
 name = "colorful"
 version = "0.5.7"
@@ -780,6 +730,17 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/bb/78/983efd23200921d9edb6bd40512e1aa04af553d7d5a171e50f9b2b45d109/coverage-7.10.4-py3-none-any.whl", hash = "sha256:065d75447228d05121e5c938ca8f0e91eed60a1eb2d1258d42d5084fecfc3302", size = 208365, upload-time = "2025-08-17T00:26:41.479Z" },
 ]
 
+[[package]]
+name = "cppimport"
+version = "22.8.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "filelock" },
+    { name = "mako" },
+    { name = "pybind11" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/54/27/01d9078a77b9e31b79b9716e66ca4db74f4744c5232bcb3e8769395c4280/cppimport-22.8.2.tar.gz", hash = "sha256:bbb4957102db41bc99ad72c233bce92f9d1fd91be352fc07878c4361033a401f", size = 26635, upload-time = "2022-08-02T16:50:36.872Z" }
+
 [[package]]
 name = "crc32c"
 version = "2.7.1"
@@ -821,40 +782,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/19/c4/0b3eee04dac195f4730d102d7a9fbea894ae7a32ce075f84336df96a385d/crc32c-2.7.1-cp313-cp313t-win_amd64.whl", hash = "sha256:eee2a43b663feb6c79a6c1c6e5eae339c2b72cfac31ee54ec0209fa736cf7ee5", size = 39781, upload-time = "2024-09-24T06:19:08.182Z" },
 ]
 
-[[package]]
-name = "cryptography"
-version = "42.0.8"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "cffi", marker = "platform_python_implementation != 'PyPy'" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/93/a7/1498799a2ea06148463a9a2c10ab2f6a921a74fb19e231b27dc412a748e2/cryptography-42.0.8.tar.gz", hash = "sha256:8d09d05439ce7baa8e9e95b07ec5b6c886f548deb7e0f69ef25f64b3bce842f2", size = 671250, upload-time = "2024-06-04T19:55:08.609Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/f9/8b/1b929ba8139430e09e140e6939c2b29c18df1f2fc2149e41bdbdcdaf5d1f/cryptography-42.0.8-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:81d8a521705787afe7a18d5bfb47ea9d9cc068206270aad0b96a725022e18d2e", size = 5899961, upload-time = "2024-06-04T19:53:57.933Z" },
-    { url = "https://files.pythonhosted.org/packages/fa/5d/31d833daa800e4fab33209843095df7adb4a78ea536929145534cbc15026/cryptography-42.0.8-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:961e61cefdcb06e0c6d7e3a1b22ebe8b996eb2bf50614e89384be54c48c6b63d", size = 3114353, upload-time = "2024-06-04T19:54:12.171Z" },
-    { url = "https://files.pythonhosted.org/packages/5d/32/f6326c70a9f0f258a201d3b2632bca586ea24d214cec3cf36e374040e273/cryptography-42.0.8-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e3ec3672626e1b9e55afd0df6d774ff0e953452886e06e0f1eb7eb0c832e8902", size = 3647773, upload-time = "2024-06-04T19:54:07.051Z" },
-    { url = "https://files.pythonhosted.org/packages/35/66/2d87e9ca95c82c7ee5f2c09716fc4c4242c1ae6647b9bd27e55e920e9f10/cryptography-42.0.8-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e599b53fd95357d92304510fb7bda8523ed1f79ca98dce2f43c115950aa78801", size = 3839763, upload-time = "2024-06-04T19:54:30.383Z" },
-    { url = "https://files.pythonhosted.org/packages/c2/de/8083fa2e68d403553a01a9323f4f8b9d7ffed09928ba25635c29fb28c1e7/cryptography-42.0.8-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:5226d5d21ab681f432a9c1cf8b658c0cb02533eece706b155e5fbd8a0cdd3949", size = 3632661, upload-time = "2024-06-04T19:54:32.955Z" },
-    { url = "https://files.pythonhosted.org/packages/07/40/d6f6819c62e808ea74639c3c640f7edd636b86cce62cb14943996a15df92/cryptography-42.0.8-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:6b7c4f03ce01afd3b76cf69a5455caa9cfa3de8c8f493e0d3ab7d20611c8dae9", size = 3851536, upload-time = "2024-06-04T19:53:53.131Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/46/de71d48abf2b6d3c808f4fbb0f4dc44a4e72786be23df0541aa2a3f6fd7e/cryptography-42.0.8-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:2346b911eb349ab547076f47f2e035fc8ff2c02380a7cbbf8d87114fa0f1c583", size = 3754209, upload-time = "2024-06-04T19:54:55.259Z" },
-    { url = "https://files.pythonhosted.org/packages/25/c9/86f04e150c5d5d5e4a731a2c1e0e43da84d901f388e3fea3d5de98d689a7/cryptography-42.0.8-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:ad803773e9df0b92e0a817d22fd8a3675493f690b96130a5e24f1b8fabbea9c7", size = 3923551, upload-time = "2024-06-04T19:54:16.46Z" },
-    { url = "https://files.pythonhosted.org/packages/53/c2/903014dafb7271fb148887d4355b2e90319cad6e810663be622b0c933fc9/cryptography-42.0.8-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:2f66d9cd9147ee495a8374a45ca445819f8929a3efcd2e3df6428e46c3cbb10b", size = 3739265, upload-time = "2024-06-04T19:54:23.194Z" },
-    { url = "https://files.pythonhosted.org/packages/95/26/82d704d988a193cbdc69ac3b41c687c36eaed1642cce52530ad810c35645/cryptography-42.0.8-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:d45b940883a03e19e944456a558b67a41160e367a719833c53de6911cabba2b7", size = 3937371, upload-time = "2024-06-04T19:55:04.303Z" },
-    { url = "https://files.pythonhosted.org/packages/cf/71/4e0d05c9acd638a225f57fb6162aa3d03613c11b76893c23ea4675bb28c5/cryptography-42.0.8-cp37-abi3-win32.whl", hash = "sha256:a0c5b2b0585b6af82d7e385f55a8bc568abff8923af147ee3c07bd8b42cda8b2", size = 2438849, upload-time = "2024-06-04T19:54:27.39Z" },
-    { url = "https://files.pythonhosted.org/packages/06/0f/78da3cad74f2ba6c45321dc90394d70420ea846730dc042ef527f5a224b5/cryptography-42.0.8-cp37-abi3-win_amd64.whl", hash = "sha256:57080dee41209e556a9a4ce60d229244f7a66ef52750f813bfbe18959770cfba", size = 2889090, upload-time = "2024-06-04T19:54:14.245Z" },
-    { url = "https://files.pythonhosted.org/packages/60/12/f064af29190cdb1d38fe07f3db6126091639e1dece7ec77c4ff037d49193/cryptography-42.0.8-cp39-abi3-macosx_10_12_universal2.whl", hash = "sha256:dea567d1b0e8bc5764b9443858b673b734100c2871dc93163f58c46a97a83d28", size = 5901232, upload-time = "2024-06-04T19:54:52.722Z" },
-    { url = "https://files.pythonhosted.org/packages/43/c2/4a3eef67e009a522711ebd8ac89424c3a7fe591ece7035d964419ad52a1d/cryptography-42.0.8-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c4783183f7cb757b73b2ae9aed6599b96338eb957233c58ca8f49a49cc32fd5e", size = 3648711, upload-time = "2024-06-04T19:54:44.323Z" },
-    { url = "https://files.pythonhosted.org/packages/49/1c/9f6d13cc8041c05eebff1154e4e71bedd1db8e174fff999054435994187a/cryptography-42.0.8-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a0608251135d0e03111152e41f0cc2392d1e74e35703960d4190b2e0f4ca9c70", size = 3841968, upload-time = "2024-06-04T19:54:57.911Z" },
-    { url = "https://files.pythonhosted.org/packages/5f/f9/c3d4f19b82bdb25a3d857fe96e7e571c981810e47e3f299cc13ac429066a/cryptography-42.0.8-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:dc0fdf6787f37b1c6b08e6dfc892d9d068b5bdb671198c72072828b80bd5fe4c", size = 3633032, upload-time = "2024-06-04T19:54:48.518Z" },
-    { url = "https://files.pythonhosted.org/packages/fa/e2/b7e6e8c261536c489d9cf908769880d94bd5d9a187e166b0dc838d2e6a56/cryptography-42.0.8-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:9c0c1716c8447ee7dbf08d6db2e5c41c688544c61074b54fc4564196f55c25a7", size = 3852478, upload-time = "2024-06-04T19:54:50.599Z" },
-    { url = "https://files.pythonhosted.org/packages/a2/68/e16751f6b859bc120f53fddbf3ebada5c34f0e9689d8af32884d8b2e4b4c/cryptography-42.0.8-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:fff12c88a672ab9c9c1cf7b0c80e3ad9e2ebd9d828d955c126be4fd3e5578c9e", size = 3754102, upload-time = "2024-06-04T19:54:46.231Z" },
-    { url = "https://files.pythonhosted.org/packages/0f/38/85c74d0ac4c540780e072b1e6f148ecb718418c1062edcb20d22f3ec5bbb/cryptography-42.0.8-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:cafb92b2bc622cd1aa6a1dce4b93307792633f4c5fe1f46c6b97cf67073ec961", size = 3925042, upload-time = "2024-06-04T19:54:34.767Z" },
-    { url = "https://files.pythonhosted.org/packages/89/f4/a8b982e88eb5350407ebdbf4717b55043271d878705329e107f4783555f2/cryptography-42.0.8-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:31f721658a29331f895a5a54e7e82075554ccfb8b163a18719d342f5ffe5ecb1", size = 3738833, upload-time = "2024-06-04T19:54:05.231Z" },
-    { url = "https://files.pythonhosted.org/packages/fd/2b/be327b580645927bb1a1f32d5a175b897a9b956bc085b095e15c40bac9ed/cryptography-42.0.8-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:b297f90c5723d04bcc8265fc2a0f86d4ea2e0f7ab4b6994459548d3a6b992a14", size = 3938751, upload-time = "2024-06-04T19:54:37.837Z" },
-    { url = "https://files.pythonhosted.org/packages/3c/d5/c6a78ffccdbe4516711ebaa9ed2c7eb6ac5dfa3dc920f2c7e920af2418b0/cryptography-42.0.8-cp39-abi3-win32.whl", hash = "sha256:2f88d197e66c65be5e42cd72e5c18afbfae3f741742070e3019ac8f4ac57262c", size = 2439281, upload-time = "2024-06-04T19:53:55.903Z" },
-    { url = "https://files.pythonhosted.org/packages/a2/7b/b0d330852dd5953daee6b15f742f15d9f18e9c0154eb4cfcc8718f0436da/cryptography-42.0.8-cp39-abi3-win_amd64.whl", hash = "sha256:fa76fbb7596cc5839320000cdd5d0955313696d9511debab7ee7278fc8b5c84a", size = 2886038, upload-time = "2024-06-04T19:54:18.707Z" },
-]
-
 [[package]]
 name = "cuda-bindings"
 version = "13.0.1"
@@ -982,15 +909,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/52/57/ecc9ae29fa5b2d90107cd1d9bf8ed19aacb74b2264d986ae9d44fe9bdf87/debugpy-1.8.16-py2.py3-none-any.whl", hash = "sha256:19c9521962475b87da6f673514f7fd610328757ec993bf7ec0d8c96f9a325f9e", size = 5287700, upload-time = "2025-08-06T18:00:42.333Z" },
 ]
 
-[[package]]
-name = "decorator"
-version = "5.2.1"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/43/fa/6d96a0978d19e17b68d634497769987b16c8f4cd0a7a05048bec693caa6b/decorator-5.2.1.tar.gz", hash = "sha256:65f266143752f734b0a7cc83c46f4618af75b8c5911b00ccb61d0ac9b6da0360", size = 56711, upload-time = "2025-02-24T04:41:34.073Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/4e/8c/f3147f5c4b73e7550fe5f9352eaa956ae838d5c51eb58e7a25b9f3e2643b/decorator-5.2.1-py3-none-any.whl", hash = "sha256:d316bb415a2d9e2d2b3abcc4084c6502fc09240e292cd76a76afc106a1c8e04a", size = 9190, upload-time = "2025-02-24T04:41:32.565Z" },
-]
-
 [[package]]
 name = "decord"
 version = "0.6.0"
@@ -1009,15 +927,12 @@ version = "2.0.0+7b6b556"
 source = { git = "https://github.com/deepseek-ai/DeepGEMM.git?rev=7b6b5563b9d4c1ae07ffbce7f78ad3ac9204827c#7b6b5563b9d4c1ae07ffbce7f78ad3ac9204827c" }
 
 [[package]]
-name = "deprecated"
-version = "1.2.18"
+name = "defusedxml"
+version = "0.7.1"
 source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "wrapt" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/98/97/06afe62762c9a8a86af0cfb7bfdab22a43ad17138b07af5b1a58442690a2/deprecated-1.2.18.tar.gz", hash = "sha256:422b6f6d859da6f2ef57857761bfb392480502a64c3028ca9bbe86085d72115d", size = 2928744, upload-time = "2025-01-27T10:46:25.7Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/0f/d5/c66da9b79e5bdb124974bfe172b4daf3c984ebd9c2a06e2b8a4dc7331c72/defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69", size = 75520, upload-time = "2021-03-08T10:59:26.269Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/6e/c6/ac0b6c1e2d138f1002bcf799d330bd6d85084fece321e662a14223794041/Deprecated-1.2.18-py2.py3-none-any.whl", hash = "sha256:bd5011788200372a32418f888e326a09ff80d0214bd961147cfed01b5c018eec", size = 9998, upload-time = "2025-01-27T10:46:09.186Z" },
+    { url = "https://files.pythonhosted.org/packages/07/6c/aa3f2f849e01cb6a001cd8554a88d4c77c5c1a31c95bdf1cf9301e6d9ef4/defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61", size = 25604, upload-time = "2021-03-08T10:59:24.45Z" },
 ]
 
 [[package]]
@@ -1098,15 +1013,6 @@ version = "0.6.2"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/a2/55/8f8cab2afd404cf578136ef2cc5dfb50baa1761b68c9da1fb1e4eed343c9/docopt-0.6.2.tar.gz", hash = "sha256:49b3a825280bd66b3aa83585ef59c4a8c82f2c8a522dbe754a8bc8d08c85c491", size = 25901, upload-time = "2014-06-16T11:18:57.406Z" }
 
-[[package]]
-name = "docstring-parser"
-version = "0.17.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/b2/9d/c3b43da9515bd270df0f80548d9944e389870713cc1fe2b8fb35fe2bcefd/docstring_parser-0.17.0.tar.gz", hash = "sha256:583de4a309722b3315439bb31d64ba3eebada841f2e2cee23b99df001434c912", size = 27442, upload-time = "2025-07-21T07:35:01.868Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/55/e2/2537ebcff11c1ee1ff17d8d0b6f4db75873e3b0fb32c2d4a2ee31ecb310a/docstring_parser-0.17.0-py3-none-any.whl", hash = "sha256:cf2569abd23dce8099b300f9b4fa8191e9582dda731fd533daf54c4551658708", size = 36896, upload-time = "2025-07-21T07:35:00.684Z" },
-]
-
 [[package]]
 name = "docutils"
 version = "0.21.2"
@@ -1150,21 +1056,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/d7/ee/bf0adb559ad3c786f12bcbc9296b3f5675f529199bef03e2df281fa1fadb/email_validator-2.2.0-py3-none-any.whl", hash = "sha256:561977c2d73ce3611850a06fa56b414621e0c8faa9d66f2611407d87465da631", size = 33521, upload-time = "2024-06-20T11:30:28.248Z" },
 ]
 
-[[package]]
-name = "fabric"
-version = "3.2.2"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "decorator" },
-    { name = "deprecated" },
-    { name = "invoke" },
-    { name = "paramiko" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/0d/3f/337f278b70ba339c618a490f6b8033b7006c583bd197a897f12fbc468c51/fabric-3.2.2.tar.gz", hash = "sha256:8783ca42e3b0076f08b26901aac6b9d9b1f19c410074e7accfab902c184ff4a3", size = 183215, upload-time = "2023-08-31T01:42:05.55Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/d6/1f/e99e23ee01847147fa194e8d41cfcf2535a2dbfcb51414c541cadb15c5d7/fabric-3.2.2-py3-none-any.whl", hash = "sha256:91c47c0be68b14936c88b34da8a1f55e5710fd28397dac5d4ff2e21558113a6f", size = 59417, upload-time = "2023-08-31T01:42:03.917Z" },
-]
-
 [[package]]
 name = "fastapi"
 version = "0.116.1"
@@ -1245,21 +1136,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/28/a3/2ad0a0a69662fd4cf556ab8074f0de978ee9b56bff6ddb4e656df4aa9e8e/fastrlock-0.8.3-cp313-cp313-win_amd64.whl", hash = "sha256:8d1d6a28291b4ace2a66bd7b49a9ed9c762467617febdd9ab356b867ed901af8", size = 30472, upload-time = "2024-12-17T11:02:37.983Z" },
 ]
 
-[[package]]
-name = "fiddle"
-version = "0.3.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "absl-py" },
-    { name = "graphviz" },
-    { name = "libcst" },
-    { name = "typing-extensions" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/73/36/7a4fac76351619b36bbc7937abf59f7b601326dc4efc253b3c16819f782a/fiddle-0.3.0.tar.gz", hash = "sha256:5d083d3299a479868345513385a6c5546141bd92086c15d3dcbf8008a90075d3", size = 277884, upload-time = "2024-04-09T17:23:58.974Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/3b/98/a38e949a91ff9e15874487fd8329ff53c25f3413c0cfc809eb6ff7eb7fa1/fiddle-0.3.0-py3-none-any.whl", hash = "sha256:f4824541c103a94a2f33f6c93eeddf6007c3a7300440087a95907f3e74362e61", size = 419830, upload-time = "2024-04-09T17:23:56.7Z" },
-]
-
 [[package]]
 name = "filelock"
 version = "3.19.1"
@@ -1314,6 +1190,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/d7/7b/f0b45f0df7d2978e5ae51804bb5939b7897b2ace24306009da0cc34d8d1f/Flask_RESTful-0.3.10-py2.py3-none-any.whl", hash = "sha256:1cf93c535172f112e080b0d4503a8d15f93a48c88bdd36dd87269bdaf405051b", size = 26217, upload-time = "2023-05-21T03:58:54.004Z" },
 ]
 
+[[package]]
+name = "flatbuffers"
+version = "25.2.10"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e4/30/eb5dce7994fc71a2f685d98ec33cc660c0a5887db5610137e60d8cbc4489/flatbuffers-25.2.10.tar.gz", hash = "sha256:97e451377a41262f8d9bd4295cc836133415cc03d8cb966410a4af92eb00d26e", size = 22170, upload-time = "2025-02-11T04:26:46.257Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b8/25/155f9f080d5e4bc0082edfda032ea2bc2b8fab3f4d25d46c1e9dd22a1a89/flatbuffers-25.2.10-py2.py3-none-any.whl", hash = "sha256:ebba5f4d5ea615af3f7fd70fc310636fbb2bbd1f566ac0a23d98dd412de50051", size = 30953, upload-time = "2025-02-11T04:26:44.484Z" },
+]
+
 [[package]]
 name = "fonttools"
 version = "4.59.1"
@@ -1545,15 +1430,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/74/16/a4cf06adbc711bd364a73ce043b0b08d8fa5aae3df11b6ee4248bcdad2e0/graphql_relay-3.2.0-py3-none-any.whl", hash = "sha256:c9b22bd28b170ba1fe674c74384a8ff30a76c8e26f88ac3aa1584dd3179953e5", size = 16940, upload-time = "2022-04-16T11:03:43.895Z" },
 ]
 
-[[package]]
-name = "graphviz"
-version = "0.21"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/f8/b3/3ac91e9be6b761a4b30d66ff165e54439dcd48b83f4e20d644867215f6ca/graphviz-0.21.tar.gz", hash = "sha256:20743e7183be82aaaa8ad6c93f8893c923bd6658a04c32ee115edb3c8a835f78", size = 200434, upload-time = "2025-06-15T09:35:05.824Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/91/4c/e0ce1ef95d4000ebc1c11801f9b944fa5910ecc15b5e351865763d8657f8/graphviz-0.21-py3-none-any.whl", hash = "sha256:54f33de9f4f911d7e84e4191749cac8cc5653f815b06738c54db9a15ab8b1e42", size = 47300, upload-time = "2025-06-15T09:35:04.433Z" },
-]
-
 [[package]]
 name = "greenlet"
 version = "3.2.4"
@@ -1668,7 +1544,7 @@ name = "gunicorn"
 version = "23.0.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "packaging" },
+    { name = "packaging", marker = "sys_platform != 'win32'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/34/72/9614c465dc206155d93eff0ca20d42e1e35afc533971379482de953521a4/gunicorn-23.0.0.tar.gz", hash = "sha256:f014447a0101dc57e294f6c18ca6b40227a4c90e9bdb586042628030cba004ec", size = 375031, upload-time = "2024-08-10T20:25:27.378Z" }
 wheels = [
@@ -1684,27 +1560,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" },
 ]
 
-[[package]]
-name = "h5py"
-version = "3.14.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "numpy" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/5d/57/dfb3c5c3f1bf5f5ef2e59a22dec4ff1f3d7408b55bfcefcfb0ea69ef21c6/h5py-3.14.0.tar.gz", hash = "sha256:2372116b2e0d5d3e5e705b7f663f7c8d96fa79a4052d250484ef91d24d6a08f4", size = 424323, upload-time = "2025-06-06T14:06:15.01Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/3e/77/8f651053c1843391e38a189ccf50df7e261ef8cd8bfd8baba0cbe694f7c3/h5py-3.14.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:e0045115d83272090b0717c555a31398c2c089b87d212ceba800d3dc5d952e23", size = 3312740, upload-time = "2025-06-06T14:05:01.193Z" },
-    { url = "https://files.pythonhosted.org/packages/ff/10/20436a6cf419b31124e59fefc78d74cb061ccb22213226a583928a65d715/h5py-3.14.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6da62509b7e1d71a7d110478aa25d245dd32c8d9a1daee9d2a42dba8717b047a", size = 2829207, upload-time = "2025-06-06T14:05:05.061Z" },
-    { url = "https://files.pythonhosted.org/packages/3f/19/c8bfe8543bfdd7ccfafd46d8cfd96fce53d6c33e9c7921f375530ee1d39a/h5py-3.14.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:554ef0ced3571366d4d383427c00c966c360e178b5fb5ee5bb31a435c424db0c", size = 4708455, upload-time = "2025-06-06T14:05:11.528Z" },
-    { url = "https://files.pythonhosted.org/packages/86/f9/f00de11c82c88bfc1ef22633557bfba9e271e0cb3189ad704183fc4a2644/h5py-3.14.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0cbd41f4e3761f150aa5b662df991868ca533872c95467216f2bec5fcad84882", size = 4929422, upload-time = "2025-06-06T14:05:18.399Z" },
-    { url = "https://files.pythonhosted.org/packages/7a/6d/6426d5d456f593c94b96fa942a9b3988ce4d65ebaf57d7273e452a7222e8/h5py-3.14.0-cp312-cp312-win_amd64.whl", hash = "sha256:bf4897d67e613ecf5bdfbdab39a1158a64df105827da70ea1d90243d796d367f", size = 2862845, upload-time = "2025-06-06T14:05:23.699Z" },
-    { url = "https://files.pythonhosted.org/packages/6c/c2/7efe82d09ca10afd77cd7c286e42342d520c049a8c43650194928bcc635c/h5py-3.14.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:aa4b7bbce683379b7bf80aaba68e17e23396100336a8d500206520052be2f812", size = 3289245, upload-time = "2025-06-06T14:05:28.24Z" },
-    { url = "https://files.pythonhosted.org/packages/4f/31/f570fab1239b0d9441024b92b6ad03bb414ffa69101a985e4c83d37608bd/h5py-3.14.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ef9603a501a04fcd0ba28dd8f0995303d26a77a980a1f9474b3417543d4c6174", size = 2807335, upload-time = "2025-06-06T14:05:31.997Z" },
-    { url = "https://files.pythonhosted.org/packages/0d/ce/3a21d87896bc7e3e9255e0ad5583ae31ae9e6b4b00e0bcb2a67e2b6acdbc/h5py-3.14.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8cbaf6910fa3983c46172666b0b8da7b7bd90d764399ca983236f2400436eeb", size = 4700675, upload-time = "2025-06-06T14:05:37.38Z" },
-    { url = "https://files.pythonhosted.org/packages/e7/ec/86f59025306dcc6deee5fda54d980d077075b8d9889aac80f158bd585f1b/h5py-3.14.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d90e6445ab7c146d7f7981b11895d70bc1dd91278a4f9f9028bc0c95e4a53f13", size = 4921632, upload-time = "2025-06-06T14:05:43.464Z" },
-    { url = "https://files.pythonhosted.org/packages/3f/6d/0084ed0b78d4fd3e7530c32491f2884140d9b06365dac8a08de726421d4a/h5py-3.14.0-cp313-cp313-win_amd64.whl", hash = "sha256:ae18e3de237a7a830adb76aaa68ad438d85fe6e19e0d99944a3ce46b772c69b3", size = 2852929, upload-time = "2025-06-06T14:05:47.659Z" },
-]
-
 [[package]]
 name = "hatchling"
 version = "1.27.0"
@@ -1809,6 +1664,18 @@ hf-xet = [
     { name = "hf-xet" },
 ]
 
+[[package]]
+name = "humanfriendly"
+version = "10.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pyreadline3", marker = "sys_platform == 'win32'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/cc/3f/2c29224acb2e2df4d2046e4c73ee2662023c58ff5b113c4c1adac0886c43/humanfriendly-10.0.tar.gz", hash = "sha256:6b0b831ce8f15f7300721aa49829fc4e83921a9a301cc7f606be6686a2288ddc", size = 360702, upload-time = "2021-09-17T21:40:43.31Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f0/0f/310fb31e39e2d734ccaa2c0fb981ee41f7bd5056ce9bc29b2248bd569169/humanfriendly-10.0-py2.py3-none-any.whl", hash = "sha256:1697e1a8a8f550fd43c2865cd84542fc175a61dcb779b6fee18cf6b6ccba1477", size = 86794, upload-time = "2021-09-17T21:40:39.897Z" },
+]
+
 [[package]]
 name = "hydra-core"
 version = "1.3.2"
@@ -1841,47 +1708,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442, upload-time = "2024-09-15T18:07:37.964Z" },
 ]
 
-[[package]]
-name = "ijson"
-version = "3.4.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/a3/4f/1cfeada63f5fce87536651268ddf5cca79b8b4bbb457aee4e45777964a0a/ijson-3.4.0.tar.gz", hash = "sha256:5f74dcbad9d592c428d3ca3957f7115a42689ee7ee941458860900236ae9bb13", size = 65782, upload-time = "2025-05-08T02:37:20.135Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/f8/ec/317ee5b2d13e50448833ead3aa906659a32b376191f6abc2a7c6112d2b27/ijson-3.4.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:956b148f88259a80a9027ffbe2d91705fae0c004fbfba3e5a24028fbe72311a9", size = 87212, upload-time = "2025-05-08T02:35:51.835Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/43/b06c96ced30cacecc5d518f89b0fd1c98c294a30ff88848b70ed7b7f72a1/ijson-3.4.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:06b89960f5c721106394c7fba5760b3f67c515b8eb7d80f612388f5eca2f4621", size = 59175, upload-time = "2025-05-08T02:35:52.988Z" },
-    { url = "https://files.pythonhosted.org/packages/e9/df/b4aeafb7ecde463130840ee9be36130823ec94a00525049bf700883378b8/ijson-3.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9a0bb591cf250dd7e9dfab69d634745a7f3272d31cfe879f9156e0a081fd97ee", size = 59011, upload-time = "2025-05-08T02:35:54.394Z" },
-    { url = "https://files.pythonhosted.org/packages/e3/7c/a80b8e361641609507f62022089626d4b8067f0826f51e1c09e4ba86eba8/ijson-3.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:72e92de999977f4c6b660ffcf2b8d59604ccd531edcbfde05b642baf283e0de8", size = 146094, upload-time = "2025-05-08T02:35:55.601Z" },
-    { url = "https://files.pythonhosted.org/packages/01/44/fa416347b9a802e3646c6ff377fc3278bd7d6106e17beb339514b6a3184e/ijson-3.4.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9e9602157a5b869d44b6896e64f502c712a312fcde044c2e586fccb85d3e316e", size = 137903, upload-time = "2025-05-08T02:35:56.814Z" },
-    { url = "https://files.pythonhosted.org/packages/24/c6/41a9ad4d42df50ff6e70fdce79b034f09b914802737ebbdc141153d8d791/ijson-3.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1e83660edb931a425b7ff662eb49db1f10d30ca6d4d350e5630edbed098bc01", size = 148339, upload-time = "2025-05-08T02:35:58.595Z" },
-    { url = "https://files.pythonhosted.org/packages/5f/6f/7d01efda415b8502dce67e067ed9e8a124f53e763002c02207e542e1a2f1/ijson-3.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:49bf8eac1c7b7913073865a859c215488461f7591b4fa6a33c14b51cb73659d0", size = 149383, upload-time = "2025-05-08T02:36:00.197Z" },
-    { url = "https://files.pythonhosted.org/packages/95/6c/0d67024b9ecb57916c5e5ab0350251c9fe2f86dc9c8ca2b605c194bdad6a/ijson-3.4.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:160b09273cb42019f1811469508b0a057d19f26434d44752bde6f281da6d3f32", size = 141580, upload-time = "2025-05-08T02:36:01.998Z" },
-    { url = "https://files.pythonhosted.org/packages/06/43/e10edcc1c6a3b619294de835e7678bfb3a1b8a75955f3689fd66a1e9e7b4/ijson-3.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2019ff4e6f354aa00c76c8591bd450899111c61f2354ad55cc127e2ce2492c44", size = 150280, upload-time = "2025-05-08T02:36:03.926Z" },
-    { url = "https://files.pythonhosted.org/packages/07/84/1cbeee8e8190a1ebe6926569a92cf1fa80ddb380c129beb6f86559e1bb24/ijson-3.4.0-cp312-cp312-win32.whl", hash = "sha256:931c007bf6bb8330705429989b2deed6838c22b63358a330bf362b6e458ba0bf", size = 51512, upload-time = "2025-05-08T02:36:05.595Z" },
-    { url = "https://files.pythonhosted.org/packages/66/13/530802bc391c95be6fe9f96e9aa427d94067e7c0b7da7a9092344dc44c4b/ijson-3.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:71523f2b64cb856a820223e94d23e88369f193017ecc789bb4de198cc9d349eb", size = 54081, upload-time = "2025-05-08T02:36:07.099Z" },
-    { url = "https://files.pythonhosted.org/packages/77/b3/b1d2eb2745e5204ec7a25365a6deb7868576214feb5e109bce368fb692c9/ijson-3.4.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e8d96f88d75196a61c9d9443de2b72c2d4a7ba9456ff117b57ae3bba23a54256", size = 87216, upload-time = "2025-05-08T02:36:08.414Z" },
-    { url = "https://files.pythonhosted.org/packages/b1/cd/cd6d340087617f8cc9bedbb21d974542fe2f160ed0126b8288d3499a469b/ijson-3.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c45906ce2c1d3b62f15645476fc3a6ca279549127f01662a39ca5ed334a00cf9", size = 59170, upload-time = "2025-05-08T02:36:09.604Z" },
-    { url = "https://files.pythonhosted.org/packages/3e/4d/32d3a9903b488d3306e3c8288f6ee4217d2eea82728261db03a1045eb5d1/ijson-3.4.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:4ab4bc2119b35c4363ea49f29563612237cae9413d2fbe54b223be098b97bc9e", size = 59013, upload-time = "2025-05-08T02:36:10.696Z" },
-    { url = "https://files.pythonhosted.org/packages/d5/c8/db15465ab4b0b477cee5964c8bfc94bf8c45af8e27a23e1ad78d1926e587/ijson-3.4.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:97b0a9b5a15e61dfb1f14921ea4e0dba39f3a650df6d8f444ddbc2b19b479ff1", size = 146564, upload-time = "2025-05-08T02:36:11.916Z" },
-    { url = "https://files.pythonhosted.org/packages/c4/d8/0755545bc122473a9a434ab90e0f378780e603d75495b1ca3872de757873/ijson-3.4.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e3047bb994dabedf11de11076ed1147a307924b6e5e2df6784fb2599c4ad8c60", size = 137917, upload-time = "2025-05-08T02:36:13.532Z" },
-    { url = "https://files.pythonhosted.org/packages/d0/c6/aeb89c8939ebe3f534af26c8c88000c5e870dbb6ae33644c21a4531f87d2/ijson-3.4.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:68c83161b052e9f5dc8191acbc862bb1e63f8a35344cb5cd0db1afd3afd487a6", size = 148897, upload-time = "2025-05-08T02:36:14.813Z" },
-    { url = "https://files.pythonhosted.org/packages/be/0e/7ef6e9b372106f2682a4a32b3c65bf86bb471a1670e4dac242faee4a7d3f/ijson-3.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1eebd9b6c20eb1dffde0ae1f0fbb4aeacec2eb7b89adb5c7c0449fc9fd742760", size = 149711, upload-time = "2025-05-08T02:36:16.476Z" },
-    { url = "https://files.pythonhosted.org/packages/d1/5d/9841c3ed75bcdabf19b3202de5f862a9c9c86ce5c7c9d95fa32347fdbf5f/ijson-3.4.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:13fb6d5c35192c541421f3ee81239d91fc15a8d8f26c869250f941f4b346a86c", size = 141691, upload-time = "2025-05-08T02:36:18.044Z" },
-    { url = "https://files.pythonhosted.org/packages/d5/d2/ce74e17218dba292e9be10a44ed0c75439f7958cdd263adb0b5b92d012d5/ijson-3.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:28b7196ff7b37c4897c547a28fa4876919696739fc91c1f347651c9736877c69", size = 150738, upload-time = "2025-05-08T02:36:19.483Z" },
-    { url = "https://files.pythonhosted.org/packages/4e/43/dcc480f94453b1075c9911d4755b823f3ace275761bb37b40139f22109ca/ijson-3.4.0-cp313-cp313-win32.whl", hash = "sha256:3c2691d2da42629522140f77b99587d6f5010440d58d36616f33bc7bdc830cc3", size = 51512, upload-time = "2025-05-08T02:36:20.99Z" },
-    { url = "https://files.pythonhosted.org/packages/35/dd/d8c5f15efd85ba51e6e11451ebe23d779361a9ec0d192064c2a8c3cdfcb8/ijson-3.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:c4554718c275a044c47eb3874f78f2c939f300215d9031e785a6711cc51b83fc", size = 54074, upload-time = "2025-05-08T02:36:22.075Z" },
-    { url = "https://files.pythonhosted.org/packages/79/73/24ad8cd106203419c4d22bed627e02e281d66b83e91bc206a371893d0486/ijson-3.4.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:915a65e3f3c0eee2ea937bc62aaedb6c14cc1e8f0bb9f3f4fb5a9e2bbfa4b480", size = 91694, upload-time = "2025-05-08T02:36:23.289Z" },
-    { url = "https://files.pythonhosted.org/packages/17/2d/f7f680984bcb7324a46a4c2df3bd73cf70faef0acfeb85a3f811abdfd590/ijson-3.4.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:afbe9748707684b6c5adc295c4fdcf27765b300aec4d484e14a13dca4e5c0afa", size = 61390, upload-time = "2025-05-08T02:36:24.42Z" },
-    { url = "https://files.pythonhosted.org/packages/09/a1/f3ca7bab86f95bdb82494739e71d271410dfefce4590785d511669127145/ijson-3.4.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:d823f8f321b4d8d5fa020d0a84f089fec5d52b7c0762430476d9f8bf95bbc1a9", size = 61140, upload-time = "2025-05-08T02:36:26.708Z" },
-    { url = "https://files.pythonhosted.org/packages/51/79/dd340df3d4fc7771c95df29997956b92ed0570fe7b616d1792fea9ad93f2/ijson-3.4.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b8a0a2c54f3becf76881188beefd98b484b1d3bd005769a740d5b433b089fa23", size = 214739, upload-time = "2025-05-08T02:36:27.973Z" },
-    { url = "https://files.pythonhosted.org/packages/59/f0/85380b7f51d1f5fb7065d76a7b623e02feca920cc678d329b2eccc0011e0/ijson-3.4.0-cp313-cp313t-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ced19a83ab09afa16257a0b15bc1aa888dbc555cb754be09d375c7f8d41051f2", size = 198338, upload-time = "2025-05-08T02:36:29.496Z" },
-    { url = "https://files.pythonhosted.org/packages/a5/cd/313264cf2ec42e0f01d198c49deb7b6fadeb793b3685e20e738eb6b3fa13/ijson-3.4.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8100f9885eff1f38d35cef80ef759a1bbf5fc946349afa681bd7d0e681b7f1a0", size = 207515, upload-time = "2025-05-08T02:36:30.981Z" },
-    { url = "https://files.pythonhosted.org/packages/12/94/bf14457aa87ea32641f2db577c9188ef4e4ae373478afef422b31fc7f309/ijson-3.4.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:d7bcc3f7f21b0f703031ecd15209b1284ea51b2a329d66074b5261de3916c1eb", size = 210081, upload-time = "2025-05-08T02:36:32.403Z" },
-    { url = "https://files.pythonhosted.org/packages/7d/b4/eaee39e290e40e52d665db9bd1492cfdce86bd1e47948e0440db209c6023/ijson-3.4.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:2dcb190227b09dd171bdcbfe4720fddd574933c66314818dfb3960c8a6246a77", size = 199253, upload-time = "2025-05-08T02:36:33.861Z" },
-    { url = "https://files.pythonhosted.org/packages/c5/9c/e09c7b9ac720a703ab115b221b819f149ed54c974edfff623c1e925e57da/ijson-3.4.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:eda4cfb1d49c6073a901735aaa62e39cb7ab47f3ad7bb184862562f776f1fa8a", size = 203816, upload-time = "2025-05-08T02:36:35.348Z" },
-    { url = "https://files.pythonhosted.org/packages/7c/14/acd304f412e32d16a2c12182b9d78206bb0ae35354d35664f45db05c1b3b/ijson-3.4.0-cp313-cp313t-win32.whl", hash = "sha256:0772638efa1f3b72b51736833404f1cbd2f5beeb9c1a3d392e7d385b9160cba7", size = 53760, upload-time = "2025-05-08T02:36:36.608Z" },
-    { url = "https://files.pythonhosted.org/packages/2f/24/93dd0a467191590a5ed1fc2b35842bca9d09900d001e00b0b497c0208ef6/ijson-3.4.0-cp313-cp313t-win_amd64.whl", hash = "sha256:3d8a0d67f36e4fb97c61a724456ef0791504b16ce6f74917a31c2e92309bbeb9", size = 56948, upload-time = "2025-05-08T02:36:37.849Z" },
-]
-
 [[package]]
 name = "imagesize"
 version = "1.4.1"
@@ -1926,19 +1752,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050, upload-time = "2025-03-19T20:10:01.071Z" },
 ]
 
-[[package]]
-name = "inquirerpy"
-version = "0.3.4"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "pfzy" },
-    { name = "prompt-toolkit" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/64/73/7570847b9da026e07053da3bbe2ac7ea6cde6bb2cbd3c7a5a950fa0ae40b/InquirerPy-0.3.4.tar.gz", hash = "sha256:89d2ada0111f337483cb41ae31073108b2ec1e618a49d7110b0d7ade89fc197e", size = 44431, upload-time = "2022-06-27T23:11:20.598Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/ce/ff/3b59672c47c6284e8005b42e84ceba13864aa0f39f067c973d1af02f5d91/InquirerPy-0.3.4-py3-none-any.whl", hash = "sha256:c65fdfbac1fa00e3ee4fb10679f4d3ed7a012abf4833910e63c295827fe2a7d4", size = 67677, upload-time = "2022-06-27T23:11:17.723Z" },
-]
-
 [[package]]
 name = "interegular"
 version = "0.3.3"
@@ -1948,15 +1761,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/c4/01/72d6472f80651673716d1deda2a5bbb633e563ecf94f4479da5519d69d25/interegular-0.3.3-py37-none-any.whl", hash = "sha256:b0c07007d48c89d6d19f7204972d369b2a77222722e126b6aa63aa721dc3b19c", size = 23635, upload-time = "2024-01-06T23:01:20.829Z" },
 ]
 
-[[package]]
-name = "invoke"
-version = "2.2.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/f9/42/127e6d792884ab860defc3f4d80a8f9812e48ace584ffc5a346de58cdc6c/invoke-2.2.0.tar.gz", hash = "sha256:ee6cbb101af1a859c7fe84f2a264c059020b0cb7fe3535f9424300ab568f6bd5", size = 299835, upload-time = "2023-07-12T18:05:17.998Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/0a/66/7f8c48009c72d73bc6bbe6eb87ac838d6a526146f7dab14af671121eb379/invoke-2.2.0-py3-none-any.whl", hash = "sha256:6ea924cc53d4f78e3d98bc436b08069a03077e6f85ad1ddaa8a116d7dad15820", size = 160274, upload-time = "2023-07-12T18:05:16.294Z" },
-]
-
 [[package]]
 name = "itsdangerous"
 version = "2.2.0"
@@ -2156,48 +1960,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ab/60/dfbbf40e3a371388c0e03ff65b01319b7d4023e883df6d7261125772ffdc/latex2sympy2_extended-1.10.2-py3-none-any.whl", hash = "sha256:f910442c5b02a466c1046f47d05cc5285181068b882399281f30102715337fb7", size = 207855, upload-time = "2025-07-02T15:26:04.88Z" },
 ]
 
-[[package]]
-name = "libcst"
-version = "1.8.2"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "pyyaml", marker = "python_full_version < '3.13'" },
-    { name = "pyyaml-ft", marker = "python_full_version >= '3.13'" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/89/aa/b52d195b167958fe1bd106a260f64cc80ec384f6ac2a9cda874d8803df06/libcst-1.8.2.tar.gz", hash = "sha256:66e82cedba95a6176194a817be4232c720312f8be6d2c8f3847f3317d95a0c7f", size = 881534, upload-time = "2025-06-13T20:56:37.915Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/31/2d/8726bf8ea8252e8fd1e48980753eef5449622c5f6cf731102bc43dcdc2c6/libcst-1.8.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2e8c1dfa854e700fcf6cd79b2796aa37d55697a74646daf5ea47c7c764bac31c", size = 2185942, upload-time = "2025-06-13T20:55:26.105Z" },
-    { url = "https://files.pythonhosted.org/packages/99/b3/565d24db8daed66eae7653c1fc1bc97793d49d5d3bcef530450ee8da882c/libcst-1.8.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2b5c57a3c1976c365678eb0730bcb140d40510990cb77df9a91bb5c41d587ba6", size = 2072622, upload-time = "2025-06-13T20:55:27.548Z" },
-    { url = "https://files.pythonhosted.org/packages/8c/d6/5a433e8a58eeb5c5d46635cfe958d0605f598d87977d4560484e3662d438/libcst-1.8.2-cp312-cp312-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:0f23409add2aaebbb6d8e881babab43c2d979f051b8bd8aed5fe779ea180a4e8", size = 2402738, upload-time = "2025-06-13T20:55:29.539Z" },
-    { url = "https://files.pythonhosted.org/packages/85/e4/0dd752c1880b570118fa91ac127589e6cf577ddcb2eef1aaf8b81ecc3f79/libcst-1.8.2-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:b88e9104c456590ad0ef0e82851d4fc03e9aa9d621fa8fdd4cd0907152a825ae", size = 2219932, upload-time = "2025-06-13T20:55:31.17Z" },
-    { url = "https://files.pythonhosted.org/packages/42/bc/fceae243c6a329477ac6d4edb887bcaa2ae7a3686158d8d9b9abb3089c37/libcst-1.8.2-cp312-cp312-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:e5ba3ea570c8fb6fc44f71aa329edc7c668e2909311913123d0d7ab8c65fc357", size = 2191891, upload-time = "2025-06-13T20:55:33.066Z" },
-    { url = "https://files.pythonhosted.org/packages/7d/7d/eb341bdc11f1147e7edeccffd0f2f785eff014e72134f5e46067472012b0/libcst-1.8.2-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:460fcf3562f078781e1504983cb11909eb27a1d46eaa99e65c4b0fafdc298298", size = 2311927, upload-time = "2025-06-13T20:55:34.614Z" },
-    { url = "https://files.pythonhosted.org/packages/d8/19/78bfc7aa5a542574d2ab0768210d084901dec5fc373103ca119905408cf2/libcst-1.8.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c1381ddbd1066d543e05d580c15beacf671e1469a0b2adb6dba58fec311f4eed", size = 2281098, upload-time = "2025-06-13T20:55:36.089Z" },
-    { url = "https://files.pythonhosted.org/packages/83/37/a41788a72dc06ed3566606f7cf50349c9918cee846eeae45d1bac03d54c2/libcst-1.8.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a70e40ce7600e1b32e293bb9157e9de3b69170e2318ccb219102f1abb826c94a", size = 2387649, upload-time = "2025-06-13T20:55:37.797Z" },
-    { url = "https://files.pythonhosted.org/packages/bb/df/7a49576c9fd55cdfd8bcfb725273aa4ee7dc41e87609f3451a4901d68057/libcst-1.8.2-cp312-cp312-win_amd64.whl", hash = "sha256:3ece08ba778b6eeea74d9c705e9af2d1b4e915e9bc6de67ad173b962e575fcc0", size = 2094574, upload-time = "2025-06-13T20:55:39.833Z" },
-    { url = "https://files.pythonhosted.org/packages/29/60/27381e194d2af08bfd0fed090c905b2732907b69da48d97d86c056d70790/libcst-1.8.2-cp312-cp312-win_arm64.whl", hash = "sha256:5efd1bf6ee5840d1b0b82ec8e0b9c64f182fa5a7c8aad680fbd918c4fa3826e0", size = 1984568, upload-time = "2025-06-13T20:55:41.511Z" },
-    { url = "https://files.pythonhosted.org/packages/11/9c/e3d4c7f1eb5c23907f905f84a4da271b60cd15b746ac794d42ea18bb105e/libcst-1.8.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:08e9dca4ab6f8551794ce7ec146f86def6a82da41750cbed2c07551345fa10d3", size = 2185848, upload-time = "2025-06-13T20:55:43.653Z" },
-    { url = "https://files.pythonhosted.org/packages/59/e0/635cbb205d42fd296c01ab5cd1ba485b0aee92bffe061de587890c81f1bf/libcst-1.8.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:8310521f2ccb79b5c4345750d475b88afa37bad930ab5554735f85ad5e3add30", size = 2072510, upload-time = "2025-06-13T20:55:45.287Z" },
-    { url = "https://files.pythonhosted.org/packages/fe/45/8911cfe9413fd690a024a1ff2c8975f060dd721160178679d3f6a21f939e/libcst-1.8.2-cp313-cp313-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:da2d8b008aff72acd5a4a588491abdda1b446f17508e700f26df9be80d8442ae", size = 2403226, upload-time = "2025-06-13T20:55:46.927Z" },
-    { url = "https://files.pythonhosted.org/packages/38/83/819d2b1b1fd870ad34ce4f34ec68704ca69bf48ef2d7665483115f267ec4/libcst-1.8.2-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:be821d874ce8b26cbadd7277fa251a9b37f6d2326f8b5682b6fc8966b50a3a59", size = 2220669, upload-time = "2025-06-13T20:55:48.597Z" },
-    { url = "https://files.pythonhosted.org/packages/d4/2f/2c4742bf834f88a9803095915c4f41cafefb7b04bde66ea86f74668b4b7b/libcst-1.8.2-cp313-cp313-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:f74b0bc7378ad5afcf25ac9d0367b4dbba50f6f6468faa41f5dfddcf8bf9c0f8", size = 2191919, upload-time = "2025-06-13T20:55:50.092Z" },
-    { url = "https://files.pythonhosted.org/packages/64/f4/107e13815f1ee5aad642d4eb4671c0273ee737f3832e3dbca9603b39f8d9/libcst-1.8.2-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:b68ea4a6018abfea1f68d50f74de7d399172684c264eb09809023e2c8696fc23", size = 2311965, upload-time = "2025-06-13T20:55:51.974Z" },
-    { url = "https://files.pythonhosted.org/packages/03/63/2948b6e4be367ad375d273a8ad00df573029cffe5ac8f6c09398c250de5b/libcst-1.8.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:2e264307ec49b2c72480422abafe80457f90b4e6e693b7ddf8a23d24b5c24001", size = 2281704, upload-time = "2025-06-13T20:55:54.036Z" },
-    { url = "https://files.pythonhosted.org/packages/c8/d3/590cde9c8c386d5f4f05fdef3394c437ea51060478a5141ff4a1f289e747/libcst-1.8.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a5d5519962ce7c72d81888fb0c09e58e308ba4c376e76bcd853b48151063d6a8", size = 2387511, upload-time = "2025-06-13T20:55:55.538Z" },
-    { url = "https://files.pythonhosted.org/packages/96/3d/ba5e36c663028043fc607dc33e5c390c7f73136fb15a890fb3710ee9d158/libcst-1.8.2-cp313-cp313-win_amd64.whl", hash = "sha256:b62aa11d6b74ed5545e58ac613d3f63095e5fd0254b3e0d1168fda991b9a6b41", size = 2094526, upload-time = "2025-06-13T20:55:57.486Z" },
-    { url = "https://files.pythonhosted.org/packages/a5/34/530ca3b972dddad562f266c81190bea29376f8ba70054ea7b45b114504cd/libcst-1.8.2-cp313-cp313-win_arm64.whl", hash = "sha256:9c2bd4ac288a9cdb7ffc3229a9ce8027a66a3fd3f2ab9e13da60f5fbfe91f3b2", size = 1984627, upload-time = "2025-06-13T20:55:59.017Z" },
-    { url = "https://files.pythonhosted.org/packages/19/9f/491f7b8d9d93444cd9bf711156ee1f122c38d25b903599e363d669acc8ab/libcst-1.8.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:08a8c7d9922ca6eed24e2c13a3c552b3c186af8fc78e5d4820b58487d780ec19", size = 2175415, upload-time = "2025-06-13T20:56:01.157Z" },
-    { url = "https://files.pythonhosted.org/packages/2e/fe/4d13437f453f92687246aa7c5138e102ee5186fe96609ee4c598bb9f9ecb/libcst-1.8.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:bba7c2b5063e8ada5a5477f9fa0c01710645426b5a8628ec50d558542a0a292e", size = 2063719, upload-time = "2025-06-13T20:56:02.787Z" },
-    { url = "https://files.pythonhosted.org/packages/94/59/758ae142c6607f275269021362b731e0f22ff5c9aa7cc67b0ed3a6bc930f/libcst-1.8.2-cp313-cp313t-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:d97c9fe13aacfbefded6861f5200dcb8e837da7391a9bdeb44ccb133705990af", size = 2380624, upload-time = "2025-06-13T20:56:04.909Z" },
-    { url = "https://files.pythonhosted.org/packages/ac/c5/31d214a0bcb3523243a9b5643b597ff653d6ec9e1f3326cfcc16bcbf185d/libcst-1.8.2-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:d2194ae959630aae4176a4b75bd320b3274c20bef2a5ca6b8d6fc96d3c608edf", size = 2208801, upload-time = "2025-06-13T20:56:06.983Z" },
-    { url = "https://files.pythonhosted.org/packages/70/16/a53f852322b266c63b492836a5c4968f192ee70fb52795a79feb4924e9ed/libcst-1.8.2-cp313-cp313t-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:0be639f5b2e1999a4b4a82a0f4633969f97336f052d0c131627983589af52f56", size = 2179557, upload-time = "2025-06-13T20:56:09.09Z" },
-    { url = "https://files.pythonhosted.org/packages/fa/49/12a5664c73107187ba3af14869d3878fca1fd4c37f6fbb9adb943cb7a791/libcst-1.8.2-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:6753e50904e05c27915933da41518ecd7a8ca4dd3602112ba44920c6e353a455", size = 2302499, upload-time = "2025-06-13T20:56:10.751Z" },
-    { url = "https://files.pythonhosted.org/packages/e9/46/2d62552a9346a040c045d6619b645d59bb707a586318121f099abd0cd5c4/libcst-1.8.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:706d07106af91c343150be86caeae1ea3851b74aa0730fcbbf8cd089e817f818", size = 2271070, upload-time = "2025-06-13T20:56:12.445Z" },
-    { url = "https://files.pythonhosted.org/packages/af/67/b625fd6ae22575255aade0a24f45e1d430b7e7279729c9c51d4faac982d2/libcst-1.8.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:dd4310ea8ddc49cc8872e083737cf806299b17f93159a1f354d59aa08993e876", size = 2380767, upload-time = "2025-06-13T20:56:13.995Z" },
-    { url = "https://files.pythonhosted.org/packages/e6/84/fb88f2ffdb045ff7323a6c05dd3d243a9eb3cb3517a6269dee43fbfb9990/libcst-1.8.2-cp313-cp313t-win_amd64.whl", hash = "sha256:51bbafdd847529e8a16d1965814ed17831af61452ee31943c414cb23451de926", size = 2083403, upload-time = "2025-06-13T20:56:15.959Z" },
-    { url = "https://files.pythonhosted.org/packages/d3/8f/da755d6d517eb8ec9664afae967b00a9b8dd567bbbb350e261359c1b47fc/libcst-1.8.2-cp313-cp313t-win_arm64.whl", hash = "sha256:4f14f5045766646ed9e8826b959c6d07194788babed1e0ba08c94ea4f39517e3", size = 1974355, upload-time = "2025-06-13T20:56:18.064Z" },
-]
-
 [[package]]
 name = "liger-kernel"
 version = "0.5.8"
@@ -2211,40 +1973,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/3d/40/75d82d90062b60e2aedd0b1741fe5b3dfbfd250aedd25933ef0b177b640e/liger_kernel-0.5.8-py3-none-any.whl", hash = "sha256:3102f99f89e9b9da249c83ea3f12b68680a8e8df0e477d4513e232da9af7d1a0", size = 150758, upload-time = "2025-04-12T16:44:30.791Z" },
 ]
 
-[[package]]
-name = "lightning"
-version = "2.5.3"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "fsspec", extra = ["http"] },
-    { name = "lightning-utilities" },
-    { name = "packaging" },
-    { name = "pytorch-lightning" },
-    { name = "pyyaml" },
-    { name = "torch" },
-    { name = "torchmetrics" },
-    { name = "tqdm" },
-    { name = "typing-extensions" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/01/80/dddb5a382aa0ff18045aee6491f81e40371102cb05da2ad5a8436a51c475/lightning-2.5.3.tar.gz", hash = "sha256:4ed3e12369a1e0f928beecf5c9f5efdabda60a9216057954851e2d89f1abecde", size = 636577, upload-time = "2025-08-13T20:29:32.361Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/00/6b/00e9c2b03a449c21d7a4d73a7104ac94f56c37a1e6eae77b1c702d8dddf0/lightning-2.5.3-py3-none-any.whl", hash = "sha256:c551111fda0db0bce267791f9a90cd4f9cf94bc327d36348af0ef79ec752d666", size = 824181, upload-time = "2025-08-13T20:29:30.244Z" },
-]
-
-[[package]]
-name = "lightning-utilities"
-version = "0.15.2"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "packaging" },
-    { name = "setuptools" },
-    { name = "typing-extensions" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/b8/39/6fc58ca81492db047149b4b8fd385aa1bfb8c28cd7cacb0c7eb0c44d842f/lightning_utilities-0.15.2.tar.gz", hash = "sha256:cdf12f530214a63dacefd713f180d1ecf5d165338101617b4742e8f22c032e24", size = 31090, upload-time = "2025-08-06T13:57:39.242Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/de/73/3d757cb3fc16f0f9794dd289bcd0c4a031d9cf54d8137d6b984b2d02edf3/lightning_utilities-0.15.2-py3-none-any.whl", hash = "sha256:ad3ab1703775044bbf880dbf7ddaaac899396c96315f3aa1779cec9d618a9841", size = 29431, upload-time = "2025-08-06T13:57:38.046Z" },
-]
-
 [[package]]
 name = "llguidance"
 version = "0.7.30"
@@ -2503,6 +2231,62 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" },
 ]
 
+[[package]]
+name = "megatron-bridge"
+source = { editable = "3rdparty/Megatron-Bridge-workspace" }
+dependencies = [
+    { name = "accelerate" },
+    { name = "datasets" },
+    { name = "einops" },
+    { name = "hydra-core" },
+    { name = "megatron-core" },
+    { name = "numpy" },
+    { name = "nvidia-modelopt", extra = ["onnx"], marker = "sys_platform != 'darwin'" },
+    { name = "nvidia-resiliency-ext", marker = "sys_platform != 'darwin'" },
+    { name = "omegaconf" },
+    { name = "packaging" },
+    { name = "pyyaml" },
+    { name = "regex" },
+    { name = "rich" },
+    { name = "sentencepiece" },
+    { name = "six" },
+    { name = "tensorboard" },
+    { name = "tiktoken" },
+    { name = "torch" },
+    { name = "tqdm" },
+    { name = "transformer-engine", extra = ["pytorch"], marker = "sys_platform != 'darwin'" },
+    { name = "transformers" },
+    { name = "typing-extensions" },
+    { name = "wandb" },
+]
+
+[package.metadata]
+requires-dist = [
+    { name = "accelerate", specifier = ">=1.6.0" },
+    { name = "datasets" },
+    { name = "einops", specifier = ">=0.8.1" },
+    { name = "hydra-core", specifier = ">1.3,<=1.3.2" },
+    { name = "megatron-core", editable = "3rdparty/Megatron-LM-workspace" },
+    { name = "numpy", specifier = "<2" },
+    { name = "nvidia-modelopt", extras = ["onnx", "torch"], marker = "sys_platform != 'darwin'", specifier = ">=0.33.0a0,<0.34.0" },
+    { name = "nvidia-resiliency-ext", marker = "sys_platform != 'darwin'", specifier = ">=0.4.0a0,<0.5.0" },
+    { name = "omegaconf", specifier = ">=2.3.0" },
+    { name = "packaging" },
+    { name = "pyyaml", specifier = ">=6.0.2" },
+    { name = "regex", specifier = ">=2024.11.6" },
+    { name = "rich" },
+    { name = "sentencepiece", specifier = ">=0.2.0" },
+    { name = "six", specifier = ">=1.17.0" },
+    { name = "tensorboard", specifier = ">=2.19.0" },
+    { name = "tiktoken", specifier = ">=0.9.0" },
+    { name = "torch", index = "https://download.pytorch.org/whl/cu128" },
+    { name = "tqdm", specifier = ">=4.67.1" },
+    { name = "transformer-engine", extras = ["pytorch"], marker = "sys_platform != 'darwin'", specifier = ">=2.5.0a0,<2.6.0" },
+    { name = "transformers", specifier = ">=4.51.3" },
+    { name = "typing-extensions" },
+    { name = "wandb", specifier = ">=0.19.10" },
+]
+
 [[package]]
 name = "megatron-core"
 source = { editable = "3rdparty/Megatron-LM-workspace" }
@@ -2517,7 +2301,8 @@ dependencies = [
     { name = "pytest-mock" },
     { name = "pytest-random-order" },
     { name = "sentencepiece" },
-    { name = "tensorstore" },
+    { name = "tensorstore", version = "0.1.74", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.13'" },
+    { name = "tensorstore", version = "0.1.76", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.13'" },
     { name = "tiktoken" },
     { name = "torch" },
     { name = "wandb" },
@@ -2576,12 +2361,43 @@ opencv = [
     { name = "opencv-python-headless" },
 ]
 
+[[package]]
+name = "ml-dtypes"
+version = "0.4.1"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'",
+    "python_full_version >= '3.13' and sys_platform == 'win32'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin'",
+    "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
+]
+dependencies = [
+    { name = "numpy", marker = "python_full_version >= '3.13'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/fd/15/76f86faa0902836cc133939732f7611ace68cf54148487a99c539c272dc8/ml_dtypes-0.4.1.tar.gz", hash = "sha256:fad5f2de464fd09127e49b7fd1252b9006fb43d2edc1ff112d390c324af5ca7a", size = 692594, upload-time = "2024-09-13T19:07:11.624Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ba/1a/99e924f12e4b62139fbac87419698c65f956d58de0dbfa7c028fa5b096aa/ml_dtypes-0.4.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:827d3ca2097085cf0355f8fdf092b888890bb1b1455f52801a2d7756f056f54b", size = 405077, upload-time = "2024-09-13T19:06:57.538Z" },
+    { url = "https://files.pythonhosted.org/packages/8f/8c/7b610bd500617854c8cc6ed7c8cfb9d48d6a5c21a1437a36a4b9bc8a3598/ml_dtypes-0.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:772426b08a6172a891274d581ce58ea2789cc8abc1c002a27223f314aaf894e7", size = 2181554, upload-time = "2024-09-13T19:06:59.196Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/c6/f89620cecc0581dc1839e218c4315171312e46c62a62da6ace204bda91c0/ml_dtypes-0.4.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:126e7d679b8676d1a958f2651949fbfa182832c3cd08020d8facd94e4114f3e9", size = 2160488, upload-time = "2024-09-13T19:07:03.131Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/11/a742d3c31b2cc8557a48efdde53427fd5f9caa2fa3c9c27d826e78a66f51/ml_dtypes-0.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:df0fb650d5c582a9e72bb5bd96cfebb2cdb889d89daff621c8fbc60295eba66c", size = 127462, upload-time = "2024-09-13T19:07:04.916Z" },
+]
+
 [[package]]
 name = "ml-dtypes"
 version = "0.5.3"
 source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'",
+    "python_full_version < '3.13' and sys_platform == 'win32'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version < '3.13' and sys_platform == 'darwin'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
+]
 dependencies = [
-    { name = "numpy" },
+    { name = "numpy", marker = "python_full_version < '3.13'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/78/a7/aad060393123cfb383956dca68402aff3db1e1caffd5764887ed5153f41b/ml_dtypes-0.5.3.tar.gz", hash = "sha256:95ce33057ba4d05df50b1f3cfefab22e351868a843b3b15a46c65836283670c9", size = 692316, upload-time = "2025-07-29T18:39:19.454Z" }
 wheels = [
@@ -2861,15 +2677,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/da/d9/f7f9379981e39b8c2511c9e0326d212accacb82f12fbfdc1aa2ce2a7b2b6/multiprocess-0.70.16-py39-none-any.whl", hash = "sha256:a0bafd3ae1b732eac64be2e72038231c1ba97724b60b09400d68f229fcc2fbf3", size = 133351, upload-time = "2024-01-28T18:52:31.981Z" },
 ]
 
-[[package]]
-name = "mypy-extensions"
-version = "1.1.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/a2/6e/371856a3fb9d31ca8dac321cda606860fa4548858c0cc45d9d1d4ca2628b/mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558", size = 6343, upload-time = "2025-04-22T14:54:24.164Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" },
-]
-
 [[package]]
 name = "myst-parser"
 version = "4.0.1"
@@ -3049,8 +2856,8 @@ automodel = [
 ]
 mcore = [
     { name = "flash-attn" },
+    { name = "megatron-bridge" },
     { name = "megatron-core" },
-    { name = "nemo-tron" },
     { name = "transformer-engine", extra = ["pytorch"] },
     { name = "vllm" },
 ]
@@ -3115,10 +2922,10 @@ requires-dist = [
     { name = "mamba-ssm", marker = "extra == 'vllm'", git = "https://github.com/state-spaces/mamba.git?rev=2e16fc3062cdcd4ebef27a9aa4442676e1c7edf4" },
     { name = "math-verify" },
     { name = "matplotlib" },
+    { name = "megatron-bridge", marker = "extra == 'mcore'", editable = "3rdparty/Megatron-Bridge-workspace" },
     { name = "megatron-core", marker = "extra == 'mcore'", editable = "3rdparty/Megatron-LM-workspace" },
     { name = "mlflow" },
     { name = "nemo-automodel", marker = "extra == 'automodel'", editable = "3rdparty/Automodel-workspace/Automodel" },
-    { name = "nemo-tron", marker = "extra == 'mcore'", editable = "3rdparty/NeMo-workspace" },
     { name = "ninja" },
     { name = "num2words", specifier = ">=0.5.14" },
     { name = "num2words", marker = "extra == 'vllm'", specifier = ">=0.5.14" },
@@ -3137,7 +2944,7 @@ requires-dist = [
     { name = "torch", specifier = "==2.7.1", index = "https://download.pytorch.org/whl/cu128" },
     { name = "torchdata" },
     { name = "torchvision", specifier = ">=0.22.0", index = "https://download.pytorch.org/whl/cu128" },
-    { name = "transformer-engine", extras = ["pytorch"], marker = "extra == 'mcore'", specifier = "==2.3.0" },
+    { name = "transformer-engine", extras = ["pytorch"], marker = "extra == 'mcore'", specifier = "==2.5.0" },
     { name = "transformers", specifier = ">=4.51.0,<4.54.0" },
     { name = "triton", index = "https://download.pytorch.org/whl/cu128" },
     { name = "vllm", marker = "extra == 'automodel'", specifier = "==0.10.0" },
@@ -3179,62 +2986,6 @@ test = [
     { name = "pytest-timeout" },
 ]
 
-[[package]]
-name = "nemo-run"
-version = "0.5.0rc0.dev0"
-source = { git = "https://github.com/NVIDIA-NeMo/Run?rev=414f0077c648fde2c71bb1186e97ccbf96d6844c#414f0077c648fde2c71bb1186e97ccbf96d6844c" }
-dependencies = [
-    { name = "catalogue" },
-    { name = "cryptography" },
-    { name = "fabric" },
-    { name = "fiddle" },
-    { name = "inquirerpy" },
-    { name = "jinja2" },
-    { name = "networkx" },
-    { name = "omegaconf" },
-    { name = "packaging" },
-    { name = "rich" },
-    { name = "toml" },
-    { name = "torchx" },
-    { name = "typer" },
-]
-
-[[package]]
-name = "nemo-tron"
-source = { editable = "3rdparty/NeMo-workspace" }
-dependencies = [
-    { name = "braceexpand" },
-    { name = "cloudpickle" },
-    { name = "fiddle" },
-    { name = "h5py" },
-    { name = "hatchling" },
-    { name = "ijson" },
-    { name = "lightning" },
-    { name = "matplotlib" },
-    { name = "nemo-run" },
-    { name = "onnx" },
-    { name = "scikit-learn" },
-    { name = "webdataset" },
-    { name = "wget" },
-]
-
-[package.metadata]
-requires-dist = [
-    { name = "braceexpand" },
-    { name = "cloudpickle" },
-    { name = "fiddle" },
-    { name = "h5py" },
-    { name = "hatchling" },
-    { name = "ijson" },
-    { name = "lightning" },
-    { name = "matplotlib" },
-    { name = "nemo-run", git = "https://github.com/NVIDIA-NeMo/Run?rev=414f0077c648fde2c71bb1186e97ccbf96d6844c" },
-    { name = "onnx" },
-    { name = "scikit-learn" },
-    { name = "webdataset" },
-    { name = "wget" },
-]
-
 [[package]]
 name = "networkx"
 version = "3.5"
@@ -3357,40 +3108,18 @@ crc32c = [
 
 [[package]]
 name = "numpy"
-version = "2.2.6"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/76/21/7d2a95e4bba9dc13d043ee156a356c0a8f0c6309dff6b21b4d71a073b8a8/numpy-2.2.6.tar.gz", hash = "sha256:e29554e2bef54a90aa5cc07da6ce955accb83f21ab5de01a62c8478897b264fd", size = 20276440, upload-time = "2025-05-17T22:38:04.611Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/82/5d/c00588b6cf18e1da539b45d3598d3557084990dcc4331960c15ee776ee41/numpy-2.2.6-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:41c5a21f4a04fa86436124d388f6ed60a9343a6f767fced1a8a71c3fbca038ff", size = 20875348, upload-time = "2025-05-17T21:34:39.648Z" },
-    { url = "https://files.pythonhosted.org/packages/66/ee/560deadcdde6c2f90200450d5938f63a34b37e27ebff162810f716f6a230/numpy-2.2.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:de749064336d37e340f640b05f24e9e3dd678c57318c7289d222a8a2f543e90c", size = 14119362, upload-time = "2025-05-17T21:35:01.241Z" },
-    { url = "https://files.pythonhosted.org/packages/3c/65/4baa99f1c53b30adf0acd9a5519078871ddde8d2339dc5a7fde80d9d87da/numpy-2.2.6-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:894b3a42502226a1cac872f840030665f33326fc3dac8e57c607905773cdcde3", size = 5084103, upload-time = "2025-05-17T21:35:10.622Z" },
-    { url = "https://files.pythonhosted.org/packages/cc/89/e5a34c071a0570cc40c9a54eb472d113eea6d002e9ae12bb3a8407fb912e/numpy-2.2.6-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:71594f7c51a18e728451bb50cc60a3ce4e6538822731b2933209a1f3614e9282", size = 6625382, upload-time = "2025-05-17T21:35:21.414Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/35/8c80729f1ff76b3921d5c9487c7ac3de9b2a103b1cd05e905b3090513510/numpy-2.2.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f2618db89be1b4e05f7a1a847a9c1c0abd63e63a1607d892dd54668dd92faf87", size = 14018462, upload-time = "2025-05-17T21:35:42.174Z" },
-    { url = "https://files.pythonhosted.org/packages/8c/3d/1e1db36cfd41f895d266b103df00ca5b3cbe965184df824dec5c08c6b803/numpy-2.2.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd83c01228a688733f1ded5201c678f0c53ecc1006ffbc404db9f7a899ac6249", size = 16527618, upload-time = "2025-05-17T21:36:06.711Z" },
-    { url = "https://files.pythonhosted.org/packages/61/c6/03ed30992602c85aa3cd95b9070a514f8b3c33e31124694438d88809ae36/numpy-2.2.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:37c0ca431f82cd5fa716eca9506aefcabc247fb27ba69c5062a6d3ade8cf8f49", size = 15505511, upload-time = "2025-05-17T21:36:29.965Z" },
-    { url = "https://files.pythonhosted.org/packages/b7/25/5761d832a81df431e260719ec45de696414266613c9ee268394dd5ad8236/numpy-2.2.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fe27749d33bb772c80dcd84ae7e8df2adc920ae8297400dabec45f0dedb3f6de", size = 18313783, upload-time = "2025-05-17T21:36:56.883Z" },
-    { url = "https://files.pythonhosted.org/packages/57/0a/72d5a3527c5ebffcd47bde9162c39fae1f90138c961e5296491ce778e682/numpy-2.2.6-cp312-cp312-win32.whl", hash = "sha256:4eeaae00d789f66c7a25ac5f34b71a7035bb474e679f410e5e1a94deb24cf2d4", size = 6246506, upload-time = "2025-05-17T21:37:07.368Z" },
-    { url = "https://files.pythonhosted.org/packages/36/fa/8c9210162ca1b88529ab76b41ba02d433fd54fecaf6feb70ef9f124683f1/numpy-2.2.6-cp312-cp312-win_amd64.whl", hash = "sha256:c1f9540be57940698ed329904db803cf7a402f3fc200bfe599334c9bd84a40b2", size = 12614190, upload-time = "2025-05-17T21:37:26.213Z" },
-    { url = "https://files.pythonhosted.org/packages/f9/5c/6657823f4f594f72b5471f1db1ab12e26e890bb2e41897522d134d2a3e81/numpy-2.2.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0811bb762109d9708cca4d0b13c4f67146e3c3b7cf8d34018c722adb2d957c84", size = 20867828, upload-time = "2025-05-17T21:37:56.699Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/9e/14520dc3dadf3c803473bd07e9b2bd1b69bc583cb2497b47000fed2fa92f/numpy-2.2.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:287cc3162b6f01463ccd86be154f284d0893d2b3ed7292439ea97eafa8170e0b", size = 14143006, upload-time = "2025-05-17T21:38:18.291Z" },
-    { url = "https://files.pythonhosted.org/packages/4f/06/7e96c57d90bebdce9918412087fc22ca9851cceaf5567a45c1f404480e9e/numpy-2.2.6-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:f1372f041402e37e5e633e586f62aa53de2eac8d98cbfb822806ce4bbefcb74d", size = 5076765, upload-time = "2025-05-17T21:38:27.319Z" },
-    { url = "https://files.pythonhosted.org/packages/73/ed/63d920c23b4289fdac96ddbdd6132e9427790977d5457cd132f18e76eae0/numpy-2.2.6-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:55a4d33fa519660d69614a9fad433be87e5252f4b03850642f88993f7b2ca566", size = 6617736, upload-time = "2025-05-17T21:38:38.141Z" },
-    { url = "https://files.pythonhosted.org/packages/85/c5/e19c8f99d83fd377ec8c7e0cf627a8049746da54afc24ef0a0cb73d5dfb5/numpy-2.2.6-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f92729c95468a2f4f15e9bb94c432a9229d0d50de67304399627a943201baa2f", size = 14010719, upload-time = "2025-05-17T21:38:58.433Z" },
-    { url = "https://files.pythonhosted.org/packages/19/49/4df9123aafa7b539317bf6d342cb6d227e49f7a35b99c287a6109b13dd93/numpy-2.2.6-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1bc23a79bfabc5d056d106f9befb8d50c31ced2fbc70eedb8155aec74a45798f", size = 16526072, upload-time = "2025-05-17T21:39:22.638Z" },
-    { url = "https://files.pythonhosted.org/packages/b2/6c/04b5f47f4f32f7c2b0e7260442a8cbcf8168b0e1a41ff1495da42f42a14f/numpy-2.2.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e3143e4451880bed956e706a3220b4e5cf6172ef05fcc397f6f36a550b1dd868", size = 15503213, upload-time = "2025-05-17T21:39:45.865Z" },
-    { url = "https://files.pythonhosted.org/packages/17/0a/5cd92e352c1307640d5b6fec1b2ffb06cd0dabe7d7b8227f97933d378422/numpy-2.2.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b4f13750ce79751586ae2eb824ba7e1e8dba64784086c98cdbbcc6a42112ce0d", size = 18316632, upload-time = "2025-05-17T21:40:13.331Z" },
-    { url = "https://files.pythonhosted.org/packages/f0/3b/5cba2b1d88760ef86596ad0f3d484b1cbff7c115ae2429678465057c5155/numpy-2.2.6-cp313-cp313-win32.whl", hash = "sha256:5beb72339d9d4fa36522fc63802f469b13cdbe4fdab4a288f0c441b74272ebfd", size = 6244532, upload-time = "2025-05-17T21:43:46.099Z" },
-    { url = "https://files.pythonhosted.org/packages/cb/3b/d58c12eafcb298d4e6d0d40216866ab15f59e55d148a5658bb3132311fcf/numpy-2.2.6-cp313-cp313-win_amd64.whl", hash = "sha256:b0544343a702fa80c95ad5d3d608ea3599dd54d4632df855e4c8d24eb6ecfa1c", size = 12610885, upload-time = "2025-05-17T21:44:05.145Z" },
-    { url = "https://files.pythonhosted.org/packages/6b/9e/4bf918b818e516322db999ac25d00c75788ddfd2d2ade4fa66f1f38097e1/numpy-2.2.6-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0bca768cd85ae743b2affdc762d617eddf3bcf8724435498a1e80132d04879e6", size = 20963467, upload-time = "2025-05-17T21:40:44Z" },
-    { url = "https://files.pythonhosted.org/packages/61/66/d2de6b291507517ff2e438e13ff7b1e2cdbdb7cb40b3ed475377aece69f9/numpy-2.2.6-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:fc0c5673685c508a142ca65209b4e79ed6740a4ed6b2267dbba90f34b0b3cfda", size = 14225144, upload-time = "2025-05-17T21:41:05.695Z" },
-    { url = "https://files.pythonhosted.org/packages/e4/25/480387655407ead912e28ba3a820bc69af9adf13bcbe40b299d454ec011f/numpy-2.2.6-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:5bd4fc3ac8926b3819797a7c0e2631eb889b4118a9898c84f585a54d475b7e40", size = 5200217, upload-time = "2025-05-17T21:41:15.903Z" },
-    { url = "https://files.pythonhosted.org/packages/aa/4a/6e313b5108f53dcbf3aca0c0f3e9c92f4c10ce57a0a721851f9785872895/numpy-2.2.6-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:fee4236c876c4e8369388054d02d0e9bb84821feb1a64dd59e137e6511a551f8", size = 6712014, upload-time = "2025-05-17T21:41:27.321Z" },
-    { url = "https://files.pythonhosted.org/packages/b7/30/172c2d5c4be71fdf476e9de553443cf8e25feddbe185e0bd88b096915bcc/numpy-2.2.6-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e1dda9c7e08dc141e0247a5b8f49cf05984955246a327d4c48bda16821947b2f", size = 14077935, upload-time = "2025-05-17T21:41:49.738Z" },
-    { url = "https://files.pythonhosted.org/packages/12/fb/9e743f8d4e4d3c710902cf87af3512082ae3d43b945d5d16563f26ec251d/numpy-2.2.6-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f447e6acb680fd307f40d3da4852208af94afdfab89cf850986c3ca00562f4fa", size = 16600122, upload-time = "2025-05-17T21:42:14.046Z" },
-    { url = "https://files.pythonhosted.org/packages/12/75/ee20da0e58d3a66f204f38916757e01e33a9737d0b22373b3eb5a27358f9/numpy-2.2.6-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:389d771b1623ec92636b0786bc4ae56abafad4a4c513d36a55dce14bd9ce8571", size = 15586143, upload-time = "2025-05-17T21:42:37.464Z" },
-    { url = "https://files.pythonhosted.org/packages/76/95/bef5b37f29fc5e739947e9ce5179ad402875633308504a52d188302319c8/numpy-2.2.6-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8e9ace4a37db23421249ed236fdcdd457d671e25146786dfc96835cd951aa7c1", size = 18385260, upload-time = "2025-05-17T21:43:05.189Z" },
-    { url = "https://files.pythonhosted.org/packages/09/04/f2f83279d287407cf36a7a8053a5abe7be3622a4363337338f2585e4afda/numpy-2.2.6-cp313-cp313t-win32.whl", hash = "sha256:038613e9fb8c72b0a41f025a7e4c3f0b7a1b5d768ece4796b674c8f3fe13efff", size = 6377225, upload-time = "2025-05-17T21:43:16.254Z" },
-    { url = "https://files.pythonhosted.org/packages/67/0e/35082d13c09c02c011cf21570543d202ad929d961c02a147493cb0c2bdf5/numpy-2.2.6-cp313-cp313t-win_amd64.whl", hash = "sha256:6031dd6dfecc0cf9f668681a37648373bddd6421fff6c66ec1624eed0180ee06", size = 12771374, upload-time = "2025-05-17T21:43:35.479Z" },
+version = "1.26.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/65/6e/09db70a523a96d25e115e71cc56a6f9031e7b8cd166c1ac8438307c14058/numpy-1.26.4.tar.gz", hash = "sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010", size = 15786129, upload-time = "2024-02-06T00:26:44.495Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/95/12/8f2020a8e8b8383ac0177dc9570aad031a3beb12e38847f7129bacd96228/numpy-1.26.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b3ce300f3644fb06443ee2222c2201dd3a89ea6040541412b8fa189341847218", size = 20335901, upload-time = "2024-02-05T23:55:32.801Z" },
+    { url = "https://files.pythonhosted.org/packages/75/5b/ca6c8bd14007e5ca171c7c03102d17b4f4e0ceb53957e8c44343a9546dcc/numpy-1.26.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:03a8c78d01d9781b28a6989f6fa1bb2c4f2d51201cf99d3dd875df6fbd96b23b", size = 13685868, upload-time = "2024-02-05T23:55:56.28Z" },
+    { url = "https://files.pythonhosted.org/packages/79/f8/97f10e6755e2a7d027ca783f63044d5b1bc1ae7acb12afe6a9b4286eac17/numpy-1.26.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9fad7dcb1aac3c7f0584a5a8133e3a43eeb2fe127f47e3632d43d677c66c102b", size = 13925109, upload-time = "2024-02-05T23:56:20.368Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/50/de23fde84e45f5c4fda2488c759b69990fd4512387a8632860f3ac9cd225/numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:675d61ffbfa78604709862923189bad94014bef562cc35cf61d3a07bba02a7ed", size = 17950613, upload-time = "2024-02-05T23:56:56.054Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/0c/9c603826b6465e82591e05ca230dfc13376da512b25ccd0894709b054ed0/numpy-1.26.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ab47dbe5cc8210f55aa58e4805fe224dac469cde56b9f731a4c098b91917159a", size = 13572172, upload-time = "2024-02-05T23:57:21.56Z" },
+    { url = "https://files.pythonhosted.org/packages/76/8c/2ba3902e1a0fc1c74962ea9bb33a534bb05984ad7ff9515bf8d07527cadd/numpy-1.26.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:1dda2e7b4ec9dd512f84935c5f126c8bd8b9f2fc001e9f54af255e8c5f16b0e0", size = 17786643, upload-time = "2024-02-05T23:57:56.585Z" },
+    { url = "https://files.pythonhosted.org/packages/28/4a/46d9e65106879492374999e76eb85f87b15328e06bd1550668f79f7b18c6/numpy-1.26.4-cp312-cp312-win32.whl", hash = "sha256:50193e430acfc1346175fcbdaa28ffec49947a06918b7b92130744e81e640110", size = 5677803, upload-time = "2024-02-05T23:58:08.963Z" },
+    { url = "https://files.pythonhosted.org/packages/16/2e/86f24451c2d530c88daf997cb8d6ac622c1d40d19f5a031ed68a4b73a374/numpy-1.26.4-cp312-cp312-win_amd64.whl", hash = "sha256:08beddf13648eb95f8d867350f6a018a4be2e5ad54c8d8caed89ebca558b2818", size = 15517754, upload-time = "2024-02-05T23:58:36.364Z" },
 ]
 
 [[package]]
@@ -3497,11 +3226,11 @@ wheels = [
 
 [[package]]
 name = "nvidia-ml-py"
-version = "13.580.65"
+version = "12.575.51"
 source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/8f/76/ff4a297c82b463ef17e7d0100d1bee5dbe6d1416721a9170e51ffcb8ecf3/nvidia_ml_py-13.580.65.tar.gz", hash = "sha256:7bf18b03c7d3658727011cf5f0c6c2155b36ce439e65359a0a4a906214f6a3c9", size = 47864, upload-time = "2025-08-05T16:11:49.71Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/d2/4d/6f017814ed5ac28e08e1b8a62e3a258957da27582c89b7f8f8b15ac3d2e7/nvidia_ml_py-12.575.51.tar.gz", hash = "sha256:6490e93fea99eb4e966327ae18c6eec6256194c921f23459c8767aee28c54581", size = 46597, upload-time = "2025-05-06T20:46:37.962Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/f9/96/88a5cb161c61cab2ee65b5aa61e612901fbcb1660024f0ccb26fcb02a17c/nvidia_ml_py-13.580.65-py3-none-any.whl", hash = "sha256:f0c65306ed999d2d4ff793918bfd17d1e30895d1c4606413ef95a0ea42460792", size = 48866, upload-time = "2025-08-05T16:11:48.387Z" },
+    { url = "https://files.pythonhosted.org/packages/db/24/552ebea28f0570b9e65e62b50287a273804c9f997cc1c2dcd4e2d64b9e7d/nvidia_ml_py-12.575.51-py3-none-any.whl", hash = "sha256:eb8641800d98ce40a22f479873f34b482e214a7e80349c63be51c3919845446e", size = 47547, upload-time = "2025-05-06T20:46:36.457Z" },
 ]
 
 [[package]]
@@ -3531,6 +3260,22 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/0a/b1/fc2f468d140ef58e90fac584759d0cc449db9bc4f64668cdff750ef38fef/nvidia_modelopt-0.33.1-py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:ef78a98901890f265596ec413dffac177d4a1865201d89a14f29f4fa0cf8e710", size = 751683, upload-time = "2025-08-12T18:36:59.964Z" },
 ]
 
+[package.optional-dependencies]
+onnx = [
+    { name = "cppimport" },
+    { name = "cupy-cuda12x", marker = "platform_machine != 'aarch64' and sys_platform != 'darwin'" },
+    { name = "ml-dtypes", version = "0.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.13'" },
+    { name = "ml-dtypes", version = "0.5.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.13'" },
+    { name = "onnx" },
+    { name = "onnx-graphsurgeon" },
+    { name = "onnxconverter-common" },
+    { name = "onnxruntime", marker = "platform_machine == 'aarch64' or sys_platform == 'darwin'" },
+    { name = "onnxruntime-gpu", version = "1.20.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform == 'win32'" },
+    { name = "onnxruntime-gpu", version = "1.22.0", source = { registry = "https://pypi.org/simple" }, marker = "platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'win32'" },
+    { name = "onnxscript" },
+    { name = "polygraphy" },
+]
+
 [[package]]
 name = "nvidia-modelopt-core"
 version = "0.33.1"
@@ -3564,6 +3309,24 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/8d/cd/0e8c51b2ae3a58f054f2e7fe91b82d201abfb30167f2431e9bd92d532f42/nvidia_nvtx_cu12-12.8.55-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2dd0780f1a55c21d8e06a743de5bd95653de630decfff40621dbde78cc307102", size = 89896, upload-time = "2025-01-23T17:50:44.487Z" },
 ]
 
+[[package]]
+name = "nvidia-resiliency-ext"
+version = "0.4.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "defusedxml" },
+    { name = "nvidia-ml-py" },
+    { name = "packaging" },
+    { name = "psutil" },
+    { name = "pynvml" },
+    { name = "pyyaml" },
+    { name = "torch" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/70/05/38d491962273c7905708762279f440520eb79f3c00b67a023497215ad023/nvidia_resiliency_ext-0.4.1-cp312-cp312-manylinux_2_31_aarch64.whl", hash = "sha256:b3bd5f01535574b16d0f38bca6e39afe3806c4a2896eee1b321cd944e00025a7", size = 444570, upload-time = "2025-07-17T03:50:58.877Z" },
+    { url = "https://files.pythonhosted.org/packages/18/8b/4cb8aa2bbdf3705d3034c3f3dacdadb03b3b7dd3dc7f5200e64663fb477f/nvidia_resiliency_ext-0.4.1-cp312-cp312-manylinux_2_31_x86_64.whl", hash = "sha256:ca9f8de465af345952bedbea53c90c0e2323d88cfd830ded0e806fad91845c0e", size = 450280, upload-time = "2025-07-17T03:49:55.327Z" },
+]
+
 [[package]]
 name = "nvidia-sphinx-theme"
 version = "0.0.8"
@@ -3614,22 +3377,142 @@ dependencies = [
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/3d/60/e56e8ec44ed34006e6d4a73c92a04d9eea6163cc12440e35045aec069175/onnx-1.18.0.tar.gz", hash = "sha256:3d8dbf9e996629131ba3aa1afd1d8239b660d1f830c6688dd7e03157cccd6b9c", size = 12563009, upload-time = "2025-05-12T22:03:09.626Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/a7/fe/16228aca685392a7114625b89aae98b2dc4058a47f0f467a376745efe8d0/onnx-1.18.0-cp312-cp312-macosx_12_0_universal2.whl", hash = "sha256:521bac578448667cbb37c50bf05b53c301243ede8233029555239930996a625b", size = 18285770, upload-time = "2025-05-12T22:02:26.116Z" },
     { url = "https://files.pythonhosted.org/packages/1e/77/ba50a903a9b5e6f9be0fa50f59eb2fca4a26ee653375408fbc72c3acbf9f/onnx-1.18.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e4da451bf1c5ae381f32d430004a89f0405bc57a8471b0bddb6325a5b334aa40", size = 17421291, upload-time = "2025-05-12T22:02:29.645Z" },
     { url = "https://files.pythonhosted.org/packages/11/23/25ec2ba723ac62b99e8fed6d7b59094dadb15e38d4c007331cc9ae3dfa5f/onnx-1.18.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:99afac90b4cdb1471432203c3c1f74e16549c526df27056d39f41a9a47cfb4af", size = 17584084, upload-time = "2025-05-12T22:02:32.789Z" },
     { url = "https://files.pythonhosted.org/packages/6a/4d/2c253a36070fb43f340ff1d2c450df6a9ef50b938adcd105693fee43c4ee/onnx-1.18.0-cp312-cp312-win32.whl", hash = "sha256:ee159b41a3ae58d9c7341cf432fc74b96aaf50bd7bb1160029f657b40dc69715", size = 15734892, upload-time = "2025-05-12T22:02:35.527Z" },
     { url = "https://files.pythonhosted.org/packages/e8/92/048ba8fafe6b2b9a268ec2fb80def7e66c0b32ab2cae74de886981f05a27/onnx-1.18.0-cp312-cp312-win_amd64.whl", hash = "sha256:102c04edc76b16e9dfeda5a64c1fccd7d3d2913b1544750c01d38f1ac3c04e05", size = 15850336, upload-time = "2025-05-12T22:02:38.545Z" },
     { url = "https://files.pythonhosted.org/packages/a1/66/bbc4ffedd44165dcc407a51ea4c592802a5391ce3dc94aa5045350f64635/onnx-1.18.0-cp312-cp312-win_arm64.whl", hash = "sha256:911b37d724a5d97396f3c2ef9ea25361c55cbc9aa18d75b12a52b620b67145af", size = 15823802, upload-time = "2025-05-12T22:02:42.037Z" },
-    { url = "https://files.pythonhosted.org/packages/45/da/9fb8824513fae836239276870bfcc433fa2298d34ed282c3a47d3962561b/onnx-1.18.0-cp313-cp313-macosx_12_0_universal2.whl", hash = "sha256:030d9f5f878c5f4c0ff70a4545b90d7812cd6bfe511de2f3e469d3669c8cff95", size = 18285906, upload-time = "2025-05-12T22:02:45.01Z" },
     { url = "https://files.pythonhosted.org/packages/05/e8/762b5fb5ed1a2b8e9a4bc5e668c82723b1b789c23b74e6b5a3356731ae4e/onnx-1.18.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8521544987d713941ee1e591520044d35e702f73dc87e91e6d4b15a064ae813d", size = 17421486, upload-time = "2025-05-12T22:02:48.467Z" },
     { url = "https://files.pythonhosted.org/packages/12/bb/471da68df0364f22296456c7f6becebe0a3da1ba435cdb371099f516da6e/onnx-1.18.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3c137eecf6bc618c2f9398bcc381474b55c817237992b169dfe728e169549e8f", size = 17583581, upload-time = "2025-05-12T22:02:51.784Z" },
     { url = "https://files.pythonhosted.org/packages/76/0d/01a95edc2cef6ad916e04e8e1267a9286f15b55c90cce5d3cdeb359d75d6/onnx-1.18.0-cp313-cp313-win32.whl", hash = "sha256:6c093ffc593e07f7e33862824eab9225f86aa189c048dd43ffde207d7041a55f", size = 15734621, upload-time = "2025-05-12T22:02:54.62Z" },
     { url = "https://files.pythonhosted.org/packages/64/95/253451a751be32b6173a648b68f407188009afa45cd6388780c330ff5d5d/onnx-1.18.0-cp313-cp313-win_amd64.whl", hash = "sha256:230b0fb615e5b798dc4a3718999ec1828360bc71274abd14f915135eab0255f1", size = 15850472, upload-time = "2025-05-12T22:02:57.54Z" },
     { url = "https://files.pythonhosted.org/packages/0a/b1/6fd41b026836df480a21687076e0f559bc3ceeac90f2be8c64b4a7a1f332/onnx-1.18.0-cp313-cp313-win_arm64.whl", hash = "sha256:6f91930c1a284135db0f891695a263fc876466bf2afbd2215834ac08f600cfca", size = 15823808, upload-time = "2025-05-12T22:03:00.305Z" },
-    { url = "https://files.pythonhosted.org/packages/70/f3/499e53dd41fa7302f914dd18543da01e0786a58b9a9d347497231192001f/onnx-1.18.0-cp313-cp313t-macosx_12_0_universal2.whl", hash = "sha256:2f4d37b0b5c96a873887652d1cbf3f3c70821b8c66302d84b0f0d89dd6e47653", size = 18316526, upload-time = "2025-05-12T22:03:03.691Z" },
     { url = "https://files.pythonhosted.org/packages/84/dd/6abe5d7bd23f5ed3ade8352abf30dff1c7a9e97fc1b0a17b5d7c726e98a9/onnx-1.18.0-cp313-cp313t-win_amd64.whl", hash = "sha256:a69afd0baa372162948b52c13f3aa2730123381edf926d7ef3f68ca7cec6d0d0", size = 15865055, upload-time = "2025-05-12T22:03:06.663Z" },
 ]
 
+[[package]]
+name = "onnx-graphsurgeon"
+version = "0.5.8"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "numpy" },
+    { name = "onnx" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/71/53/98334c4f64a9e289a8cb48f5e7966b8ff015414d0bf26587cf46d764f1d8/onnx_graphsurgeon-0.5.8-py2.py3-none-any.whl", hash = "sha256:6f611ea29a8e4740fbab1aae52bf4c40b8b9918f8459058d20b99acc79fce121", size = 57923, upload-time = "2025-04-10T18:49:24.483Z" },
+]
+
+[[package]]
+name = "onnx-ir"
+version = "0.1.7"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "ml-dtypes", version = "0.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.13'" },
+    { name = "ml-dtypes", version = "0.5.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.13'" },
+    { name = "numpy" },
+    { name = "onnx" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/6a/14/4a003926218f8edee6da19546f69a1831b74cdd993eaf5ff50a2fb168e70/onnx_ir-0.1.7.tar.gz", hash = "sha256:4734b7587807ca657158b042c138879c3f454756fae74e949f6c99f0107d8df6", size = 107944, upload-time = "2025-08-22T15:01:16.383Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/84/cc/35e8490072f61aa54221742b4c9a0c947ef78ead5034481ca9ac655024ef/onnx_ir-0.1.7-py3-none-any.whl", hash = "sha256:8a0441909676f1ab6b22186d79f8d0faf8739177f50d15baeac88e7e1255aae8", size = 124382, upload-time = "2025-08-22T15:01:15.063Z" },
+]
+
+[[package]]
+name = "onnxconverter-common"
+version = "1.15.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "numpy" },
+    { name = "onnx" },
+    { name = "packaging" },
+    { name = "protobuf" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/76/ac/c3ff41cc2d36c8caab51bffa9185ea64019f161850b9641eb0409b243ae1/onnxconverter_common-1.15.0-py2.py3-none-any.whl", hash = "sha256:24579ed1bb3c10beca39a4517d196c17341911be5bd09bd0e6050a7379a2a7d9", size = 89640, upload-time = "2025-07-01T16:42:56.968Z" },
+]
+
+[[package]]
+name = "onnxruntime"
+version = "1.22.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "coloredlogs", marker = "platform_machine == 'aarch64' or sys_platform == 'darwin' or sys_platform == 'win32'" },
+    { name = "flatbuffers", marker = "platform_machine == 'aarch64' or sys_platform == 'darwin' or sys_platform == 'win32'" },
+    { name = "numpy", marker = "platform_machine == 'aarch64' or sys_platform == 'darwin' or sys_platform == 'win32'" },
+    { name = "packaging", marker = "platform_machine == 'aarch64' or sys_platform == 'darwin' or sys_platform == 'win32'" },
+    { name = "protobuf", marker = "platform_machine == 'aarch64' or sys_platform == 'darwin' or sys_platform == 'win32'" },
+    { name = "sympy", marker = "platform_machine == 'aarch64' or sys_platform == 'darwin' or sys_platform == 'win32'" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/29/e5/00b099b4d4f6223b610421080d0eed9327ef9986785c9141819bbba0d396/onnxruntime-1.22.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:984cea2a02fcc5dfea44ade9aca9fe0f7a8a2cd6f77c258fc4388238618f3928", size = 14473861, upload-time = "2025-07-10T19:15:42.911Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/a6/444291524cb52875b5de980a6e918072514df63a57a7120bf9dfae3aeed1/onnxruntime-1.22.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:460487d83b7056ba98f1f7bac80287224c31d8149b15712b0d6f5078fcc33d0f", size = 14474014, upload-time = "2025-07-10T19:15:53.991Z" },
+    { url = "https://files.pythonhosted.org/packages/52/8c/02af24ee1c8dce4e6c14a1642a7a56cebe323d2fa01d9a360a638f7e4b75/onnxruntime-1.22.1-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:33a7980bbc4b7f446bac26c3785652fe8730ed02617d765399e89ac7d44e0f7d", size = 14479333, upload-time = "2025-07-10T19:16:00.544Z" },
+]
+
+[[package]]
+name = "onnxruntime-gpu"
+version = "1.20.0"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.13' and sys_platform == 'win32'",
+    "python_full_version < '3.13' and sys_platform == 'win32'",
+]
+dependencies = [
+    { name = "coloredlogs", marker = "sys_platform == 'win32'" },
+    { name = "flatbuffers", marker = "sys_platform == 'win32'" },
+    { name = "numpy", marker = "sys_platform == 'win32'" },
+    { name = "packaging", marker = "sys_platform == 'win32'" },
+    { name = "protobuf", marker = "sys_platform == 'win32'" },
+    { name = "sympy", marker = "sys_platform == 'win32'" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/db/d3/bf1b185b900533261a417cb87e3e0766335ba764167fbf1655ff600d9de8/onnxruntime_gpu-1.20.0-cp312-cp312-win_amd64.whl", hash = "sha256:7b217354ce20911405b9599e73c9fbbfe63047a95289efe72f53e611cf1688af", size = 279686161, upload-time = "2024-11-01T17:02:21.549Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/a7/d773619a55fbcd749854327970084ad31b744e8090eaa5d85601b38b7a9a/onnxruntime_gpu-1.20.0-cp313-cp313-win_amd64.whl", hash = "sha256:add0624b7fc5f554bf27c9220413230962900b1ff97baccbbaf51aaeb9938a59", size = 279686205, upload-time = "2024-11-01T17:02:56.318Z" },
+]
+
+[[package]]
+name = "onnxruntime-gpu"
+version = "1.22.0"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
+]
+dependencies = [
+    { name = "coloredlogs", marker = "platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'win32'" },
+    { name = "flatbuffers", marker = "platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'win32'" },
+    { name = "numpy", marker = "platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'win32'" },
+    { name = "packaging", marker = "platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'win32'" },
+    { name = "protobuf", marker = "platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'win32'" },
+    { name = "sympy", marker = "platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'win32'" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b5/5c/3f9700ba277d52c121dd2cebc8a672fb60b53e888972fc6682b6692a766c/onnxruntime_gpu-1.22.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:86b064c8f6cbe6da03f51f46351237d985f8fd5eb907d3f9997ea91881131a13", size = 283199528, upload-time = "2025-05-09T19:39:54.489Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/26/35efe9dae012f453f2f7698dec3604368ce91ee2a0464336d2284fe02e3b/onnxruntime_gpu-1.22.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c3e635792931c5edf48a6a44b8daf4f74a9458e2d60245d24d91e29b6c1c7aa5", size = 283205630, upload-time = "2025-05-09T19:40:12.749Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/ab/943c659cded9288519c67e6d5827973762207d19035972c703a1fefd032c/onnxruntime_gpu-1.22.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d1559033601d71023d72a8e279b2575a104de5f46e136f87534206aa2044eb1c", size = 283210584, upload-time = "2025-05-09T19:40:27.372Z" },
+]
+
+[[package]]
+name = "onnxscript"
+version = "0.4.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "ml-dtypes", version = "0.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.13'" },
+    { name = "ml-dtypes", version = "0.5.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.13'" },
+    { name = "numpy" },
+    { name = "onnx" },
+    { name = "onnx-ir" },
+    { name = "packaging" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/a8/9f/45aed9951d3fa50a97b910487186ef9c15ad08d3c9cb3605aabd99f65f92/onnxscript-0.4.0.tar.gz", hash = "sha256:de618eeb6e0c57f5a70f85909ab1f829cbb2053ad55f8f2fcc2701fa29b7adfc", size = 567393, upload-time = "2025-08-22T21:05:46.416Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ec/b6/c1ca749dfe58c23da542b1c71c334a3faa7191647b3335623870ef371e93/onnxscript-0.4.0-py3-none-any.whl", hash = "sha256:3d41f5b190bab9f1a4ace6075c6960ad676719766240b3450f292173bde90242", size = 660871, upload-time = "2025-08-22T21:05:48.332Z" },
+]
+
 [[package]]
 name = "openai"
 version = "1.90.0"
@@ -3674,19 +3557,19 @@ wheels = [
 
 [[package]]
 name = "opencv-python-headless"
-version = "4.12.0.88"
+version = "4.11.0.86"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "numpy" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/a4/63/6861102ec149c3cd298f4d1ea7ce9d6adbc7529221606ff1dab991a19adb/opencv-python-headless-4.12.0.88.tar.gz", hash = "sha256:cfdc017ddf2e59b6c2f53bc12d74b6b0be7ded4ec59083ea70763921af2b6c09", size = 95379675, upload-time = "2025-07-07T09:21:06.815Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/36/2f/5b2b3ba52c864848885ba988f24b7f105052f68da9ab0e693cc7c25b0b30/opencv-python-headless-4.11.0.86.tar.gz", hash = "sha256:996eb282ca4b43ec6a3972414de0e2331f5d9cda2b41091a49739c19fb843798", size = 95177929, upload-time = "2025-01-16T13:53:40.22Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/f7/7d/414e243c5c8216a5277afd104a319cc1291c5e23f5eeef512db5629ee7f4/opencv_python_headless-4.12.0.88-cp37-abi3-macosx_13_0_arm64.whl", hash = "sha256:1e58d664809b3350c1123484dd441e1667cd7bed3086db1b9ea1b6f6cb20b50e", size = 37877864, upload-time = "2025-07-07T09:14:41.693Z" },
-    { url = "https://files.pythonhosted.org/packages/05/14/7e162714beed1cd5e7b5eb66fcbcba2f065c51b1d9da2463024c84d2f7c0/opencv_python_headless-4.12.0.88-cp37-abi3-macosx_13_0_x86_64.whl", hash = "sha256:365bb2e486b50feffc2d07a405b953a8f3e8eaa63865bc650034e5c71e7a5154", size = 57326608, upload-time = "2025-07-07T09:14:51.885Z" },
-    { url = "https://files.pythonhosted.org/packages/69/4e/116720df7f1f7f3b59abc608ca30fbec9d2b3ae810afe4e4d26483d9dfa0/opencv_python_headless-4.12.0.88-cp37-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:aeb4b13ecb8b4a0beb2668ea07928160ea7c2cd2d9b5ef571bbee6bafe9cc8d0", size = 33145800, upload-time = "2025-07-07T09:15:00.367Z" },
-    { url = "https://files.pythonhosted.org/packages/89/53/e19c21e0c4eb1275c3e2c97b081103b6dfb3938172264d283a519bf728b9/opencv_python_headless-4.12.0.88-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:236c8df54a90f4d02076e6f9c1cc763d794542e886c576a6fee46ec8ff75a7a9", size = 54023419, upload-time = "2025-07-07T09:15:10.164Z" },
-    { url = "https://files.pythonhosted.org/packages/bf/9c/a76fd5414de6ec9f21f763a600058a0c3e290053cea87e0275692b1375c0/opencv_python_headless-4.12.0.88-cp37-abi3-win32.whl", hash = "sha256:fde2cf5c51e4def5f2132d78e0c08f9c14783cd67356922182c6845b9af87dbd", size = 30225230, upload-time = "2025-07-07T09:15:17.045Z" },
-    { url = "https://files.pythonhosted.org/packages/f2/35/0858e9e71b36948eafbc5e835874b63e515179dc3b742cbe3d76bc683439/opencv_python_headless-4.12.0.88-cp37-abi3-win_amd64.whl", hash = "sha256:86b413bdd6c6bf497832e346cd5371995de148e579b9774f8eba686dee3f5528", size = 38923559, upload-time = "2025-07-07T09:15:25.229Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/53/2c50afa0b1e05ecdb4603818e85f7d174e683d874ef63a6abe3ac92220c8/opencv_python_headless-4.11.0.86-cp37-abi3-macosx_13_0_arm64.whl", hash = "sha256:48128188ade4a7e517237c8e1e11a9cdf5c282761473383e77beb875bb1e61ca", size = 37326460, upload-time = "2025-01-16T13:52:57.015Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/43/68555327df94bb9b59a1fd645f63fafb0762515344d2046698762fc19d58/opencv_python_headless-4.11.0.86-cp37-abi3-macosx_13_0_x86_64.whl", hash = "sha256:a66c1b286a9de872c343ee7c3553b084244299714ebb50fbdcd76f07ebbe6c81", size = 56723330, upload-time = "2025-01-16T13:55:45.731Z" },
+    { url = "https://files.pythonhosted.org/packages/45/be/1438ce43ebe65317344a87e4b150865c5585f4c0db880a34cdae5ac46881/opencv_python_headless-4.11.0.86-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6efabcaa9df731f29e5ea9051776715b1bdd1845d7c9530065c7951d2a2899eb", size = 29487060, upload-time = "2025-01-16T13:51:59.625Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/5c/c139a7876099916879609372bfa513b7f1257f7f1a908b0bdc1c2328241b/opencv_python_headless-4.11.0.86-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e0a27c19dd1f40ddff94976cfe43066fbbe9dfbb2ec1907d66c19caef42a57b", size = 49969856, upload-time = "2025-01-16T13:53:29.654Z" },
+    { url = "https://files.pythonhosted.org/packages/95/dd/ed1191c9dc91abcc9f752b499b7928aacabf10567bb2c2535944d848af18/opencv_python_headless-4.11.0.86-cp37-abi3-win32.whl", hash = "sha256:f447d8acbb0b6f2808da71fddd29c1cdd448d2bc98f72d9bb78a7a898fc9621b", size = 29324425, upload-time = "2025-01-16T13:52:49.048Z" },
+    { url = "https://files.pythonhosted.org/packages/86/8a/69176a64335aed183529207ba8bc3d329c2999d852b4f3818027203f50e6/opencv_python_headless-4.11.0.86-cp37-abi3-win_amd64.whl", hash = "sha256:6c304df9caa7a6a5710b91709dd4786bf20a74d57672b3c31f7033cc638174ca", size = 39402386, upload-time = "2025-01-16T13:52:56.418Z" },
 ]
 
 [[package]]
@@ -3796,21 +3679,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/d5/f9/07086f5b0f2a19872554abeea7658200824f5835c58a106fa8f2ae96a46c/pandas-2.3.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:5db9637dbc24b631ff3707269ae4559bce4b7fd75c1c4d7e13f40edc42df4444", size = 13189044, upload-time = "2025-07-07T19:19:39.999Z" },
 ]
 
-[[package]]
-name = "paramiko"
-version = "4.0.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "bcrypt" },
-    { name = "cryptography" },
-    { name = "invoke" },
-    { name = "pynacl" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/1f/e7/81fdcbc7f190cdb058cffc9431587eb289833bdd633e2002455ca9bb13d4/paramiko-4.0.0.tar.gz", hash = "sha256:6a25f07b380cc9c9a88d2b920ad37167ac4667f8d9886ccebd8f90f654b5d69f", size = 1630743, upload-time = "2025-08-04T01:02:03.711Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/a9/90/a744336f5af32c433bd09af7854599682a383b37cfd78f7de263de6ad6cb/paramiko-4.0.0-py3-none-any.whl", hash = "sha256:0e20e00ac666503bf0b4eda3b6d833465a2b7aff2e2b3d79a8bba5ef144ee3b9", size = 223932, upload-time = "2025-08-04T01:02:02.029Z" },
-]
-
 [[package]]
 name = "partial-json-parser"
 version = "0.2.1.1.post6"
@@ -3850,15 +3718,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/49/fe/a2da1627aa9cb6310b6034598363bd26ac301c4a99d21f415b1b2855891e/peft-0.17.1-py3-none-any.whl", hash = "sha256:3d129d64def3d74779c32a080d2567e5f7b674e77d546e3585138216d903f99e", size = 504896, upload-time = "2025-08-21T09:25:18.974Z" },
 ]
 
-[[package]]
-name = "pfzy"
-version = "0.3.4"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/d9/5a/32b50c077c86bfccc7bed4881c5a2b823518f5450a30e639db5d3711952e/pfzy-0.3.4.tar.gz", hash = "sha256:717ea765dd10b63618e7298b2d98efd819e0b30cd5905c9707223dceeb94b3f1", size = 8396, upload-time = "2022-01-28T02:26:17.946Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/8c/d7/8ff98376b1acc4503253b685ea09981697385ce344d4e3935c2af49e044d/pfzy-0.3.4-py3-none-any.whl", hash = "sha256:5f50d5b2b3207fa72e7ec0ef08372ef652685470974a107d0d4999fc5a903a96", size = 8537, upload-time = "2022-01-28T02:26:16.047Z" },
-]
-
 [[package]]
 name = "pillow"
 version = "11.3.0"
@@ -3956,6 +3815,14 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" },
 ]
 
+[[package]]
+name = "polygraphy"
+version = "0.49.26"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a1/25/e92af58cca8f8f8833d3346506a186938148b7156671c8154d94b8985d35/polygraphy-0.49.26-py2.py3-none-any.whl", hash = "sha256:5787d218a133163b42c92800134afaba6b266127646efb77416a9530137d1a45", size = 372849, upload-time = "2025-07-16T18:26:21.446Z" },
+]
+
 [[package]]
 name = "pre-commit"
 version = "4.3.0"
@@ -3994,18 +3861,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/27/72/0824c18f3bc75810f55dacc2dd933f6ec829771180245ae3cc976195dec0/prometheus_fastapi_instrumentator-7.1.0-py3-none-any.whl", hash = "sha256:978130f3c0bb7b8ebcc90d35516a6fe13e02d2eb358c8f83887cdef7020c31e9", size = 19296, upload-time = "2025-03-19T19:35:04.323Z" },
 ]
 
-[[package]]
-name = "prompt-toolkit"
-version = "3.0.51"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "wcwidth" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/bb/6e/9d084c929dfe9e3bfe0c6a47e31f78a25c54627d64a66e884a8bf5474f1c/prompt_toolkit-3.0.51.tar.gz", hash = "sha256:931a162e3b27fc90c86f1b48bb1fb2c528c2761475e57c9c06de13311c7b54ed", size = 428940, upload-time = "2025-04-15T09:18:47.731Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/ce/4f/5249960887b1fbe561d9ff265496d170b55a735b76724f10ef19f9e40716/prompt_toolkit-3.0.51-py3-none-any.whl", hash = "sha256:52742911fde84e2d423e2f9a4cf1de7d7ac4e51958f648d9540e0fb8db077b07", size = 387810, upload-time = "2025-04-15T09:18:44.753Z" },
-]
-
 [[package]]
 name = "propcache"
 version = "0.3.2"
@@ -4463,23 +4318,15 @@ wheels = [
 ]
 
 [[package]]
-name = "pynacl"
-version = "1.5.0"
+name = "pynvml"
+version = "12.0.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "cffi" },
+    { name = "nvidia-ml-py" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/a7/22/27582568be639dfe22ddb3902225f91f2f17ceff88ce80e4db396c8986da/PyNaCl-1.5.0.tar.gz", hash = "sha256:8ac7448f09ab85811607bdd21ec2464495ac8b7c66d146bf545b0f08fb9220ba", size = 3392854, upload-time = "2022-01-07T22:05:41.134Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/26/6f/6b5880ed0239e85b9a39aed103b65b2ef81425beef9f45e5c035bf008330/pynvml-12.0.0.tar.gz", hash = "sha256:299ce2451a6a17e6822d6faee750103e25b415f06f59abb8db65d30f794166f5", size = 33636, upload-time = "2024-12-02T15:04:36.631Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/ce/75/0b8ede18506041c0bf23ac4d8e2971b4161cd6ce630b177d0a08eb0d8857/PyNaCl-1.5.0-cp36-abi3-macosx_10_10_universal2.whl", hash = "sha256:401002a4aaa07c9414132aaed7f6836ff98f59277a234704ff66878c2ee4a0d1", size = 349920, upload-time = "2022-01-07T22:05:49.156Z" },
-    { url = "https://files.pythonhosted.org/packages/59/bb/fddf10acd09637327a97ef89d2a9d621328850a72f1fdc8c08bdf72e385f/PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:52cb72a79269189d4e0dc537556f4740f7f0a9ec41c1322598799b0bdad4ef92", size = 601722, upload-time = "2022-01-07T22:05:50.989Z" },
-    { url = "https://files.pythonhosted.org/packages/5d/70/87a065c37cca41a75f2ce113a5a2c2aa7533be648b184ade58971b5f7ccc/PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a36d4a9dda1f19ce6e03c9a784a2921a4b726b02e1c736600ca9c22029474394", size = 680087, upload-time = "2022-01-07T22:05:52.539Z" },
-    { url = "https://files.pythonhosted.org/packages/ee/87/f1bb6a595f14a327e8285b9eb54d41fef76c585a0edef0a45f6fc95de125/PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:0c84947a22519e013607c9be43706dd42513f9e6ae5d39d3613ca1e142fba44d", size = 856678, upload-time = "2022-01-07T22:05:54.251Z" },
-    { url = "https://files.pythonhosted.org/packages/66/28/ca86676b69bf9f90e710571b67450508484388bfce09acf8a46f0b8c785f/PyNaCl-1.5.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:06b8f6fa7f5de8d5d2f7573fe8c863c051225a27b61e6860fd047b1775807858", size = 1133660, upload-time = "2022-01-07T22:05:56.056Z" },
-    { url = "https://files.pythonhosted.org/packages/3d/85/c262db650e86812585e2bc59e497a8f59948a005325a11bbbc9ecd3fe26b/PyNaCl-1.5.0-cp36-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:a422368fc821589c228f4c49438a368831cb5bbc0eab5ebe1d7fac9dded6567b", size = 663824, upload-time = "2022-01-07T22:05:57.434Z" },
-    { url = "https://files.pythonhosted.org/packages/fd/1a/cc308a884bd299b651f1633acb978e8596c71c33ca85e9dc9fa33a5399b9/PyNaCl-1.5.0-cp36-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:61f642bf2378713e2c2e1de73444a3778e5f0a38be6fee0fe532fe30060282ff", size = 1117912, upload-time = "2022-01-07T22:05:58.665Z" },
-    { url = "https://files.pythonhosted.org/packages/25/2d/b7df6ddb0c2a33afdb358f8af6ea3b8c4d1196ca45497dd37a56f0c122be/PyNaCl-1.5.0-cp36-abi3-win32.whl", hash = "sha256:e46dae94e34b085175f8abb3b0aaa7da40767865ac82c928eeb9e57e1ea8a543", size = 204624, upload-time = "2022-01-07T22:06:00.085Z" },
-    { url = "https://files.pythonhosted.org/packages/5e/22/d3db169895faaf3e2eda892f005f433a62db2decbcfbc2f61e6517adfa87/PyNaCl-1.5.0-cp36-abi3-win_amd64.whl", hash = "sha256:20f42270d27e1b6a29f54032090b972d97f0a1b0948cc52392041ef7831fee93", size = 212141, upload-time = "2022-01-07T22:06:01.861Z" },
+    { url = "https://files.pythonhosted.org/packages/ed/df/f7cf07a65a96dd11d71f346f9c2863accdd4784da83af7181b067d556cbc/pynvml-12.0.0-py3-none-any.whl", hash = "sha256:fdff84b62a27dbe98e08e1a647eb77342bef1aebe0878bcd15e99a83fcbecb9e", size = 26560, upload-time = "2024-12-02T15:04:35.047Z" },
 ]
 
 [[package]]
@@ -4492,16 +4339,12 @@ wheels = [
 ]
 
 [[package]]
-name = "pyre-extensions"
-version = "0.0.32"
+name = "pyreadline3"
+version = "3.5.4"
 source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "typing-extensions" },
-    { name = "typing-inspect" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/a7/53/5bc2532536e921c48366ad1047c1344ccef6afa5e84053f0f6e20a453767/pyre_extensions-0.0.32.tar.gz", hash = "sha256:5396715f14ea56c4d5fd0a88c57ca7e44faa468f905909edd7de4ad90ed85e55", size = 10852, upload-time = "2024-11-22T19:26:44.152Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/0f/49/4cea918a08f02817aabae639e3d0ac046fef9f9180518a3ad394e22da148/pyreadline3-3.5.4.tar.gz", hash = "sha256:8d57d53039a1c75adba8e50dd3d992b28143480816187ea5efbd5c78e6c885b7", size = 99839, upload-time = "2024-09-19T02:40:10.062Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/a4/7a/9812cb8be9828ab688203c5ac5f743c60652887f0c00995a6f6f19f912bd/pyre_extensions-0.0.32-py3-none-any.whl", hash = "sha256:a63ba6883ab02f4b1a9f372ed4eb4a2f4c6f3d74879aa2725186fdfcfe3e5c68", size = 12766, upload-time = "2024-11-22T19:26:42.465Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/dc/491b7661614ab97483abf2056be1deee4dc2490ecbf7bff9ab5cdbac86e1/pyreadline3-3.5.4-py3-none-any.whl", hash = "sha256:eaf8e6cc3c49bcccf145fc6067ba8643d1df34d604a1ec0eccbf7a18e6d3fae6", size = 83178, upload-time = "2024-09-19T02:40:08.598Z" },
 ]
 
 [[package]]
@@ -4637,25 +4480,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/45/58/38b5afbc1a800eeea951b9285d3912613f2603bdf897a4ab0f4bd7f405fc/python_multipart-0.0.20-py3-none-any.whl", hash = "sha256:8a62d3a8335e06589fe01f2a3e178cdcc632f3fbe0d492ad9ee0ec35aab1f104", size = 24546, upload-time = "2024-12-16T19:45:44.423Z" },
 ]
 
-[[package]]
-name = "pytorch-lightning"
-version = "2.5.3"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "fsspec", extra = ["http"] },
-    { name = "lightning-utilities" },
-    { name = "packaging" },
-    { name = "pyyaml" },
-    { name = "torch" },
-    { name = "torchmetrics" },
-    { name = "tqdm" },
-    { name = "typing-extensions" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/32/a8/31fe79bf96dab33cee5537ed6f08230ed6f032834bb4ff529cc487fb40e8/pytorch_lightning-2.5.3.tar.gz", hash = "sha256:65f4eee774ee1adba181aacacffb9f677fe5c5f9fd3d01a95f603403f940be6a", size = 639897, upload-time = "2025-08-13T20:29:39.161Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/6a/a2/5f2b7b40ec5213db5282e98dd32fd419fe5b73b5b53895dfff56fe12fed0/pytorch_lightning-2.5.3-py3-none-any.whl", hash = "sha256:7476bd36282d9253dda175b9263b07942489d70ad90bbd1bc0a59c46e012f353", size = 828186, upload-time = "2025-08-13T20:29:37.41Z" },
-]
-
 [[package]]
 name = "pytz"
 version = "2025.2"
@@ -4707,30 +4531,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/fa/de/02b54f42487e3d3c6efb3f89428677074ca7bf43aae402517bc7cca949f3/PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563", size = 156446, upload-time = "2024-08-06T20:33:04.33Z" },
 ]
 
-[[package]]
-name = "pyyaml-ft"
-version = "8.0.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/5e/eb/5a0d575de784f9a1f94e2b1288c6886f13f34185e13117ed530f32b6f8a8/pyyaml_ft-8.0.0.tar.gz", hash = "sha256:0c947dce03954c7b5d38869ed4878b2e6ff1d44b08a0d84dc83fdad205ae39ab", size = 141057, upload-time = "2025-06-10T15:32:15.613Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/68/ba/a067369fe61a2e57fb38732562927d5bae088c73cb9bb5438736a9555b29/pyyaml_ft-8.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8c1306282bc958bfda31237f900eb52c9bedf9b93a11f82e1aab004c9a5657a6", size = 187027, upload-time = "2025-06-10T15:31:48.722Z" },
-    { url = "https://files.pythonhosted.org/packages/ad/c5/a3d2020ce5ccfc6aede0d45bcb870298652ac0cf199f67714d250e0cdf39/pyyaml_ft-8.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:30c5f1751625786c19de751e3130fc345ebcba6a86f6bddd6e1285342f4bbb69", size = 176146, upload-time = "2025-06-10T15:31:50.584Z" },
-    { url = "https://files.pythonhosted.org/packages/e3/bb/23a9739291086ca0d3189eac7cd92b4d00e9fdc77d722ab610c35f9a82ba/pyyaml_ft-8.0.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3fa992481155ddda2e303fcc74c79c05eddcdbc907b888d3d9ce3ff3e2adcfb0", size = 746792, upload-time = "2025-06-10T15:31:52.304Z" },
-    { url = "https://files.pythonhosted.org/packages/5f/c2/e8825f4ff725b7e560d62a3609e31d735318068e1079539ebfde397ea03e/pyyaml_ft-8.0.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cec6c92b4207004b62dfad1f0be321c9f04725e0f271c16247d8b39c3bf3ea42", size = 786772, upload-time = "2025-06-10T15:31:54.712Z" },
-    { url = "https://files.pythonhosted.org/packages/35/be/58a4dcae8854f2fdca9b28d9495298fd5571a50d8430b1c3033ec95d2d0e/pyyaml_ft-8.0.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:06237267dbcab70d4c0e9436d8f719f04a51123f0ca2694c00dd4b68c338e40b", size = 778723, upload-time = "2025-06-10T15:31:56.093Z" },
-    { url = "https://files.pythonhosted.org/packages/86/ed/fed0da92b5d5d7340a082e3802d84c6dc9d5fa142954404c41a544c1cb92/pyyaml_ft-8.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:8a7f332bc565817644cdb38ffe4739e44c3e18c55793f75dddb87630f03fc254", size = 758478, upload-time = "2025-06-10T15:31:58.314Z" },
-    { url = "https://files.pythonhosted.org/packages/f0/69/ac02afe286275980ecb2dcdc0156617389b7e0c0a3fcdedf155c67be2b80/pyyaml_ft-8.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7d10175a746be65f6feb86224df5d6bc5c049ebf52b89a88cf1cd78af5a367a8", size = 799159, upload-time = "2025-06-10T15:31:59.675Z" },
-    { url = "https://files.pythonhosted.org/packages/4e/ac/c492a9da2e39abdff4c3094ec54acac9747743f36428281fb186a03fab76/pyyaml_ft-8.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:58e1015098cf8d8aec82f360789c16283b88ca670fe4275ef6c48c5e30b22a96", size = 158779, upload-time = "2025-06-10T15:32:01.029Z" },
-    { url = "https://files.pythonhosted.org/packages/5d/9b/41998df3298960d7c67653669f37710fa2d568a5fc933ea24a6df60acaf6/pyyaml_ft-8.0.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:e64fa5f3e2ceb790d50602b2fd4ec37abbd760a8c778e46354df647e7c5a4ebb", size = 191331, upload-time = "2025-06-10T15:32:02.602Z" },
-    { url = "https://files.pythonhosted.org/packages/0f/16/2710c252ee04cbd74d9562ebba709e5a284faeb8ada88fcda548c9191b47/pyyaml_ft-8.0.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:8d445bf6ea16bb93c37b42fdacfb2f94c8e92a79ba9e12768c96ecde867046d1", size = 182879, upload-time = "2025-06-10T15:32:04.466Z" },
-    { url = "https://files.pythonhosted.org/packages/9a/40/ae8163519d937fa7bfa457b6f78439cc6831a7c2b170e4f612f7eda71815/pyyaml_ft-8.0.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c56bb46b4fda34cbb92a9446a841da3982cdde6ea13de3fbd80db7eeeab8b49", size = 811277, upload-time = "2025-06-10T15:32:06.214Z" },
-    { url = "https://files.pythonhosted.org/packages/f9/66/28d82dbff7f87b96f0eeac79b7d972a96b4980c1e445eb6a857ba91eda00/pyyaml_ft-8.0.0-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dab0abb46eb1780da486f022dce034b952c8ae40753627b27a626d803926483b", size = 831650, upload-time = "2025-06-10T15:32:08.076Z" },
-    { url = "https://files.pythonhosted.org/packages/e8/df/161c4566facac7d75a9e182295c223060373d4116dead9cc53a265de60b9/pyyaml_ft-8.0.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bd48d639cab5ca50ad957b6dd632c7dd3ac02a1abe0e8196a3c24a52f5db3f7a", size = 815755, upload-time = "2025-06-10T15:32:09.435Z" },
-    { url = "https://files.pythonhosted.org/packages/05/10/f42c48fa5153204f42eaa945e8d1fd7c10d6296841dcb2447bf7da1be5c4/pyyaml_ft-8.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:052561b89d5b2a8e1289f326d060e794c21fa068aa11255fe71d65baf18a632e", size = 810403, upload-time = "2025-06-10T15:32:11.051Z" },
-    { url = "https://files.pythonhosted.org/packages/d5/d2/e369064aa51009eb9245399fd8ad2c562bd0bcd392a00be44b2a824ded7c/pyyaml_ft-8.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:3bb4b927929b0cb162fb1605392a321e3333e48ce616cdcfa04a839271373255", size = 835581, upload-time = "2025-06-10T15:32:12.897Z" },
-    { url = "https://files.pythonhosted.org/packages/c0/28/26534bed77109632a956977f60d8519049f545abc39215d086e33a61f1f2/pyyaml_ft-8.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:de04cfe9439565e32f178106c51dd6ca61afaa2907d143835d501d84703d3793", size = 171579, upload-time = "2025-06-10T15:32:14.34Z" },
-]
-
 [[package]]
 name = "pyzmq"
 version = "27.0.1"
@@ -5582,15 +5382,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/a2/09/77d55d46fd61b4a135c444fc97158ef34a095e5681d0a6c10b75bf356191/sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5", size = 6299353, upload-time = "2025-04-27T18:04:59.103Z" },
 ]
 
-[[package]]
-name = "tabulate"
-version = "0.9.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/ec/fe/802052aecb21e3797b8f7902564ab6ea0d60ff8ca23952079064155d1ae1/tabulate-0.9.0.tar.gz", hash = "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c", size = 81090, upload-time = "2022-10-06T17:21:48.54Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/40/44/4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854/tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f", size = 35252, upload-time = "2022-10-06T17:21:44.262Z" },
-]
-
 [[package]]
 name = "tensorboard"
 version = "2.20.0"
@@ -5621,13 +5412,51 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/73/c6/825dab04195756cf8ff2e12698f22513b3db2f64925bdd41671bfb33aaa5/tensorboard_data_server-0.7.2-py3-none-manylinux_2_31_x86_64.whl", hash = "sha256:ef687163c24185ae9754ed5650eb5bc4d84ff257aabdc33f0cc6f74d8ba54530", size = 6590363, upload-time = "2023-10-23T21:23:35.583Z" },
 ]
 
+[[package]]
+name = "tensorstore"
+version = "0.1.74"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'",
+    "python_full_version >= '3.13' and sys_platform == 'win32'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin'",
+    "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
+]
+dependencies = [
+    { name = "ml-dtypes", version = "0.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.13'" },
+    { name = "numpy", marker = "python_full_version >= '3.13'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/3c/b9/ea25aba62c688a87d7d7d9cc5926d602e2f9e84fa72586825486fb180b7e/tensorstore-0.1.74.tar.gz", hash = "sha256:a062875f27283d30ce4959c408c253ecb336fce8e3f9837c064e3d30cda79203", size = 6795605, upload-time = "2025-04-24T15:42:18.829Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a1/14/2e6d1cad744af9e9a1a78d881a908a859ad95b61b15de10397069f55fbd8/tensorstore-0.1.74-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:7218722ee5d74e4d01f357917d3b1b7b1d6b1c068aa73e3d801cb3d58fc45116", size = 15334307, upload-time = "2025-04-24T15:41:48.315Z" },
+    { url = "https://files.pythonhosted.org/packages/b2/ac/8d572b8c6d689eb50db0252e9d35ee6278a6aed481b64d7e025cf51e32c4/tensorstore-0.1.74-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a6926554a8633d0210bdba619d3996fff6a6af4214237fbca626e6ddfcc8ea39", size = 13288669, upload-time = "2025-04-24T15:41:50.808Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/6c/3e76d614ad70b61670686d91abaa3ddee6b01255bf2b40f050beb15b7970/tensorstore-0.1.74-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d584e468eb4ef8195f5d21a9da4780cf96c6074b87ef219b43a89efce3d503ca", size = 17031720, upload-time = "2025-04-24T15:41:55.092Z" },
+    { url = "https://files.pythonhosted.org/packages/31/f3/09d7c3ad7c9517f89b5be9b4460b83333e98dce1c9ab0a52464ded0bab67/tensorstore-0.1.74-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e0af2225431d59f8a2bb4db4c1519252f10ee407e6550875d78212d3d34ee743", size = 18378829, upload-time = "2025-04-24T15:41:58.167Z" },
+    { url = "https://files.pythonhosted.org/packages/a7/f2/45ece38705280ed9ebf4ccaf084ed1e76e35b1eeec8c510e589978ac8dcd/tensorstore-0.1.74-cp312-cp312-win_amd64.whl", hash = "sha256:4e35f3679873cdc488aae20b9ae2cea4589c7b147a80edb07eb3f09eba47d43d", size = 12432300, upload-time = "2025-04-24T15:42:00.761Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/e9/a08c6a6eb7d6b4b26053d4575196a06c6fccf4e89f9bc625f81e7c91bb5d/tensorstore-0.1.74-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:f7d2c80de9ab352ca14aeca798d6650c5670725e6f8eac73f4fcc8f3147ca614", size = 15334469, upload-time = "2025-04-24T15:42:03.731Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/a9/64b90c6e66e0b8043e641090144c6614b0c78d9a719b9110d953d13a516d/tensorstore-0.1.74-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ceef7d2dcfd1caf61356f7eeb9a37896b4825b4be2750b00615cf5fb1ae47a8b", size = 13288791, upload-time = "2025-04-24T15:42:06.145Z" },
+    { url = "https://files.pythonhosted.org/packages/62/e8/226cfc25d7eac00e783ff2ee4994830c4a42cd8690e207c4a8b93210f3d9/tensorstore-0.1.74-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e71637002a806bc1b0f0f05556d1c33493a43f3ab35f9632b3d48855677d93dc", size = 17031815, upload-time = "2025-04-24T15:42:09.239Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/09/dce8a0942d84f6bb039b5ea3e8bc6a479b1a9535cd216b0d42dd03c4f761/tensorstore-0.1.74-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c799edf9000aee68d6676e3d2f73d4e1a56fc817c47e150732f6d3bd2b1ef46d", size = 18378091, upload-time = "2025-04-24T15:42:13.546Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/23/5218575d25de9d8debfb3faf290a1e3b9a7b6be9e77ba07ff3a63a0bc899/tensorstore-0.1.74-cp313-cp313-win_amd64.whl", hash = "sha256:5da86437ffa1ee0f0c590c38daa2f4b548890ce66b1f470ac98714cb0eabdbf5", size = 12432635, upload-time = "2025-04-24T15:42:16.275Z" },
+]
+
 [[package]]
 name = "tensorstore"
 version = "0.1.76"
 source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'",
+    "python_full_version < '3.13' and sys_platform == 'win32'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version < '3.13' and sys_platform == 'darwin'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform == 'linux'",
+]
 dependencies = [
-    { name = "ml-dtypes" },
-    { name = "numpy" },
+    { name = "ml-dtypes", version = "0.5.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.13'" },
+    { name = "numpy", marker = "python_full_version < '3.13'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/ff/ae/947a9f232de7319b664ed8d278e9e0363e9294da73fd422c687ac4eb070e/tensorstore-0.1.76.tar.gz", hash = "sha256:ed0d565e7a038a84b1b5b5d9f7397caec200b53941d8889f44b7f63dd6abffe7", size = 6869230, upload-time = "2025-07-02T21:34:03.773Z" }
 wheels = [
@@ -5718,15 +5547,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/41/f2/fd673d979185f5dcbac4be7d09461cbb99751554ffb6718d0013af8604cb/tokenizers-0.21.4-cp39-abi3-win_amd64.whl", hash = "sha256:475d807a5c3eb72c59ad9b5fcdb254f6e17f53dfcbb9903233b0dfa9c943b597", size = 2507568, upload-time = "2025-07-28T15:48:55.456Z" },
 ]
 
-[[package]]
-name = "toml"
-version = "0.10.2"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/be/ba/1f744cdc819428fc6b5084ec34d9b30660f6f9daaf70eead706e3203ec3c/toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f", size = 22253, upload-time = "2020-11-01T01:40:22.204Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/44/6f/7120676b6d73228c96e17f1f794d8ab046fc910d781c8d151120c3f1569e/toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b", size = 16588, upload-time = "2020-11-01T01:40:20.672Z" },
-]
-
 [[package]]
 name = "torch"
 version = "2.7.1+cu128"
@@ -5822,21 +5642,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/95/d4/af694ef718aedbe95a72760ab9ff7a6a7a44ace2d7f70c27bfeb67c5c503/torchdata-0.11.0-py3-none-any.whl", hash = "sha256:52b940fbbe0e00fb21cabddf528449d1bec5bfb0d0823b7487b15f951658ee33", size = 61968, upload-time = "2025-02-20T22:26:30.666Z" },
 ]
 
-[[package]]
-name = "torchmetrics"
-version = "1.8.1"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "lightning-utilities" },
-    { name = "numpy" },
-    { name = "packaging" },
-    { name = "torch" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/78/1f/2cd9eb8f3390c3ec4693ac0871913d4b468964b3833638e4091a70817e0a/torchmetrics-1.8.1.tar.gz", hash = "sha256:04ca021105871637c5d34d0a286b3ab665a1e3d2b395e561f14188a96e862fdb", size = 580373, upload-time = "2025-08-07T20:44:44.631Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/8f/59/5c1c1cb08c494621901cf549a543f87143019fac1e6dd191eb4630bbc8fb/torchmetrics-1.8.1-py3-none-any.whl", hash = "sha256:2437501351e0da3d294c71210ce8139b9c762b5e20604f7a051a725443db8f4b", size = 982961, upload-time = "2025-08-07T20:44:42.608Z" },
-]
-
 [[package]]
 name = "torchprofile"
 version = "0.0.4"
@@ -5878,8 +5683,14 @@ source = { registry = "https://download.pytorch.org/whl/cu128" }
 resolution-markers = [
     "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'",
     "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux'",
-    "python_full_version >= '3.13' and sys_platform != 'linux'",
-    "python_full_version < '3.13' and sys_platform != 'linux'",
+    "python_full_version >= '3.13' and sys_platform == 'win32'",
+    "python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version >= '3.13' and sys_platform == 'darwin'",
+    "python_full_version >= '3.13' and platform_machine == 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version < '3.13' and sys_platform == 'win32'",
+    "python_full_version < '3.13' and platform_machine != 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
+    "python_full_version < '3.13' and sys_platform == 'darwin'",
+    "python_full_version < '3.13' and platform_machine == 'aarch64' and sys_platform != 'darwin' and sys_platform != 'linux' and sys_platform != 'win32'",
 ]
 dependencies = [
     { name = "numpy", marker = "platform_machine != 'aarch64' or sys_platform != 'linux'" },
@@ -5895,26 +5706,6 @@ wheels = [
     { url = "https://download.pytorch.org/whl/cu128/torchvision-0.22.1%2Bcu128-cp313-cp313t-win_amd64.whl", hash = "sha256:e5320bb2c9f69636f3dc18abc3291fe8c8e448cb9ef0112510a5413a5af3f8f2" },
 ]
 
-[[package]]
-name = "torchx"
-version = "0.7.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "docker" },
-    { name = "docstring-parser" },
-    { name = "filelock" },
-    { name = "fsspec" },
-    { name = "importlib-metadata" },
-    { name = "pyre-extensions" },
-    { name = "pyyaml" },
-    { name = "tabulate" },
-    { name = "urllib3" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/ed/a9/e07e64222e59f44eb18b333bc8db942a295c0d9c16f04c8703fe106a8fd5/torchx-0.7.0.tar.gz", hash = "sha256:933b800849c69ddff9feda931f0ae2c4083638eafc73abfdf361158667c68ad6", size = 196142, upload-time = "2024-07-16T22:06:57.222Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/a0/8d/282b884cef8f26b240e1ac8e60513a25a68e3463dc1c68771191a2c8a209/torchx-0.7.0-py3-none-any.whl", hash = "sha256:815c2628c30de1f5938c14c4427cfe280fdf93473b171be906ab7c1ea5971824", size = 256100, upload-time = "2024-07-16T21:16:01.849Z" },
-]
-
 [[package]]
 name = "tqdm"
 version = "4.67.1"
@@ -5929,13 +5720,13 @@ wheels = [
 
 [[package]]
 name = "transformer-engine"
-version = "2.3.0"
+version = "2.5.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "transformer-engine-cu12" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/5a/5a/60b25ecc3396fc73b07bf9eb2fbf6a3e186efe1e3e86070017ba9d34a4a0/transformer_engine-2.3.0-py3-none-any.whl", hash = "sha256:2a02c2ac29b9781d09975d4cc373df66ed5f0326ff079a2908c3c26d35ac9ee9", size = 486192, upload-time = "2025-05-19T22:08:25.342Z" },
+    { url = "https://files.pythonhosted.org/packages/13/e7/b1af90c00f6c63abfada141f25c2ef7ed620d01aa62d8c3a8f74644b2396/transformer_engine-2.5.0-py3-none-any.whl", hash = "sha256:a78a2ad01b1f97bccd740f701287fa43cf6ba37ba5a9b2d8c401d35ecdd0898e", size = 535575, upload-time = "2025-07-15T20:48:28.119Z" },
 ]
 
 [package.optional-dependencies]
@@ -5945,7 +5736,7 @@ pytorch = [
 
 [[package]]
 name = "transformer-engine-cu12"
-version = "2.3.0"
+version = "2.5.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "importlib-metadata" },
@@ -5953,18 +5744,19 @@ dependencies = [
     { name = "pydantic" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/72/6a/b94e7bb1c61db83beb89f5500cffb6205ea3c27a343bd29b35e3e5e55bbf/transformer_engine_cu12-2.3.0-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:3b3632ef9e4ea94ba2a373e45aa98c2ff4aaa929e5418610d1397c59d0fbc3b8", size = 266059296, upload-time = "2025-05-20T00:53:27.635Z" },
-    { url = "https://files.pythonhosted.org/packages/95/30/b4c0741fbe0402aa60f24db81ee50ee213cf068f1b79e2f447543e95f194/transformer_engine_cu12-2.3.0-py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:f4b48957de2318b7009c54bb9442b717a383f6b254a9fc26fac0c67d2d658d52", size = 266398998, upload-time = "2025-05-20T00:54:24.308Z" },
+    { url = "https://files.pythonhosted.org/packages/17/ca/c2594e92fa63366947b2e34f21e7b2c29f5aa7c8ed3648356623af6fc580/transformer_engine_cu12-2.5.0-py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:06676d92ee87e207c91d27a1947a880c4da502667f7b5dacabc1413dc5c5a795", size = 274123550, upload-time = "2025-07-15T21:15:27.622Z" },
+    { url = "https://files.pythonhosted.org/packages/74/84/ee88068473ff38a7d304c7861ff61290eadf1139dfd2fda56804b34724d3/transformer_engine_cu12-2.5.0-py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:d4a63985115bbc399b44a70ef7a8e028cbd53aee6f2a621c11f7c12bd4632095", size = 274534430, upload-time = "2025-07-15T21:16:23.638Z" },
 ]
 
 [[package]]
 name = "transformer-engine-torch"
-version = "2.3.0"
+version = "2.5.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
+    { name = "einops" },
     { name = "torch" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/1b/8f/d8b39a34cebb91528f118591e622d9ba1d9e52e4ea40cca1252ec335d07c/transformer_engine_torch-2.3.0.tar.gz", hash = "sha256:2b0adb75d7a5bf590cc5452276db6badb0963de99e6c50c7fde6a3a8a7c504e9", size = 165832, upload-time = "2025-05-15T23:10:38.909Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/a1/1d/73ec467d20d96e0bb12c3047d0567d41955ab2e18ce3c26cb53451773605/transformer_engine_torch-2.5.0.tar.gz", hash = "sha256:f56e65287a1f082ac5f44b8edd3c024131f51414f44ecffcdd1372e2781cb113", size = 159599, upload-time = "2025-07-17T15:37:07.939Z" }
 
 [[package]]
 name = "transformers"
@@ -6066,19 +5858,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b5/00/d631e67a838026495268c2f6884f3711a15a9a2a96cd244fdaea53b823fb/typing_extensions-4.14.1-py3-none-any.whl", hash = "sha256:d1e1e3b58374dc93031d6eda2420a48ea44a36c2b4766a4fdeb3710755731d76", size = 43906, upload-time = "2025-07-04T13:28:32.743Z" },
 ]
 
-[[package]]
-name = "typing-inspect"
-version = "0.9.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "mypy-extensions" },
-    { name = "typing-extensions" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/dc/74/1789779d91f1961fa9438e9a8710cdae6bd138c80d7303996933d117264a/typing_inspect-0.9.0.tar.gz", hash = "sha256:b23fc42ff6f6ef6954e4852c1fb512cdd18dbea03134f91f856a95ccc9461f78", size = 13825, upload-time = "2023-05-24T20:25:47.612Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/65/f3/107a22063bf27bdccf2024833d3445f4eea42b2e598abfbd46f6a63b6cb0/typing_inspect-0.9.0-py3-none-any.whl", hash = "sha256:9ee6fc59062311ef8547596ab6b955e1b8aa46242d854bfc78f4f6b0eff35f9f", size = 8827, upload-time = "2023-05-24T20:25:45.287Z" },
-]
-
 [[package]]
 name = "typing-inspection"
 version = "0.4.1"
@@ -6338,29 +6117,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/32/fa/a4f5c2046385492b2273213ef815bf71a0d4c1943b784fb904e184e30201/watchfiles-1.1.0-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:af06c863f152005c7592df1d6a7009c836a247c9d8adb78fef8575a5a98699db", size = 623315, upload-time = "2025-06-15T19:06:29.076Z" },
 ]
 
-[[package]]
-name = "wcwidth"
-version = "0.2.13"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/6c/63/53559446a878410fc5a5974feb13d31d78d752eb18aeba59c7fef1af7598/wcwidth-0.2.13.tar.gz", hash = "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5", size = 101301, upload-time = "2024-01-06T02:10:57.829Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/fd/84/fd2ba7aafacbad3c4201d395674fc6348826569da3c0937e75505ead3528/wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859", size = 34166, upload-time = "2024-01-06T02:10:55.763Z" },
-]
-
-[[package]]
-name = "webdataset"
-version = "1.0.2"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "braceexpand" },
-    { name = "numpy" },
-    { name = "pyyaml" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/5a/3a/68800d92e065cf4750ebecf973b13979c0c929b439e1293012938862038d/webdataset-1.0.2.tar.gz", hash = "sha256:7f0498be827cfa46cc5430a58768a24e2c6a410676a61be1838f53d61afdaab4", size = 80090, upload-time = "2025-06-19T23:26:21.945Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/d9/00/aca6beb3658dab4ed3dbb41a78e6e7f31342e0b41d28088f205525751601/webdataset-1.0.2-py3-none-any.whl", hash = "sha256:3dbfced32b25c0d199c6b9787937b6f85742bc3c84f652c846893075c1c082d9", size = 74956, upload-time = "2025-06-19T23:26:20.354Z" },
-]
-
 [[package]]
 name = "websockets"
 version = "15.0.1"
@@ -6404,12 +6160,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/52/24/ab44c871b0f07f491e5d2ad12c9bd7358e527510618cb1b803a88e986db1/werkzeug-3.1.3-py3-none-any.whl", hash = "sha256:54b78bf3716d19a65be4fceccc0d1d7b89e608834989dfae50ea87564639213e", size = 224498, upload-time = "2024-11-08T15:52:16.132Z" },
 ]
 
-[[package]]
-name = "wget"
-version = "3.2"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/47/6a/62e288da7bcda82b935ff0c6cfe542970f04e29c756b0e147251b2fb251f/wget-3.2.zip", hash = "sha256:35e630eca2aa50ce998b9b1a127bb26b30dfee573702782aa982f875e3f16061", size = 10857, upload-time = "2015-10-22T15:26:37.51Z" }
-
 [[package]]
 name = "wrapt"
 version = "1.17.3"