From 8240eadfdba9dc785f602a1f980f06021f8205a7 Mon Sep 17 00:00:00 2001
From: SurbhiJainUSC <sjsurbhi@google.com>
Date: Fri, 3 Apr 2026 19:33:36 +0000
Subject: [PATCH 1/2] Refactor install_extra_deps scripts

---
 .github/workflows/run_jupyter_notebooks.yml   |  2 +-
 .github/workflows/run_pathways_tests.yml      |  2 +-
 .../workflows/run_tests_against_package.yml   |  4 +--
 docs/install_maxtext.md                       | 18 ++++++------
 docs/tutorials/inference.md                   |  2 +-
 pyproject.toml                                |  6 ++--
 .../maxtext_gpu_dependencies.Dockerfile       |  2 +-
 .../maxtext_tpu_dependencies.Dockerfile       |  2 +-
 .../{github_deps => extra_deps}/__init__.py   |  0
 .../post_train_base_deps.txt                  |  0
 .../post_train_github_deps.txt}               |  0
 .../pre_train_github_deps.txt}                |  0
 .../install_post_train_extra_deps.py}         | 26 ++++++++---------
 .../install_pre_train_extra_deps.py}          | 28 ++++++++-----------
 src/dependencies/scripts/setup.sh             |  6 ++--
 src/maxtext/examples/rl_llama3_demo.ipynb     |  2 +-
 .../examples/sft_llama3_demo_tpu.ipynb        |  4 +--
 src/maxtext/examples/sft_qwen3_demo.ipynb     |  2 +-
 18 files changed, 49 insertions(+), 57 deletions(-)
 rename src/dependencies/{github_deps => extra_deps}/__init__.py (100%)
 rename src/dependencies/{github_deps => extra_deps}/post_train_base_deps.txt (100%)
 rename src/dependencies/{github_deps/post_train_deps.txt => extra_deps/post_train_github_deps.txt} (100%)
 rename src/dependencies/{github_deps/pre_train_deps.txt => extra_deps/pre_train_github_deps.txt} (100%)
 rename src/dependencies/{github_deps/install_post_train_deps.py => scripts/install_post_train_extra_deps.py} (78%)
 rename src/dependencies/{github_deps/install_pre_train_deps.py => scripts/install_pre_train_extra_deps.py} (70%)

diff --git a/.github/workflows/run_jupyter_notebooks.yml b/.github/workflows/run_jupyter_notebooks.yml
index a4663a26e0..fe3457b2ed 100644
--- a/.github/workflows/run_jupyter_notebooks.yml
+++ b/.github/workflows/run_jupyter_notebooks.yml
@@ -72,7 +72,7 @@ jobs:
 
           # 2. Install MaxText package and all the post training dependencies
           uv pip install ${maxtext_wheel}[tpu-post-train] --resolution=lowest
-          install_maxtext_tpu_post_train_extra_deps
+          install_tpu_post_train_extra_deps
           
           python3 -m pip freeze
       - name: Run Post-Training Notebooks
diff --git a/.github/workflows/run_pathways_tests.yml b/.github/workflows/run_pathways_tests.yml
index 844ffd62ab..b6776d7ddd 100644
--- a/.github/workflows/run_pathways_tests.yml
+++ b/.github/workflows/run_pathways_tests.yml
@@ -85,7 +85,7 @@ jobs:
           source .venv/bin/activate
           maxtext_wheel=$(ls maxtext-*-py3-none-any.whl 2>/dev/null)
           uv pip install ${maxtext_wheel}[tpu] --resolution=lowest
-          uv pip install -r src/dependencies/github_deps/pre_train_deps.txt
+          install_tpu_pre_train_extra_deps
           python3 --version
           python3 -m pip freeze
       - name: Copy test assets files
diff --git a/.github/workflows/run_tests_against_package.yml b/.github/workflows/run_tests_against_package.yml
index 1ab4467d8b..4520b7580c 100644
--- a/.github/workflows/run_tests_against_package.yml
+++ b/.github/workflows/run_tests_against_package.yml
@@ -110,9 +110,9 @@ jobs:
           echo "Installing ${maxtext_wheel} for ${MAXTEXT_PACKAGE_EXTRA}..."
           uv pip install ${maxtext_wheel}[${MAXTEXT_PACKAGE_EXTRA}] --resolution=lowest
           if [ "${MAXTEXT_PACKAGE_EXTRA}" == "tpu-post-train" ]; then
-            uv pip install -r src/dependencies/github_deps/post_train_base_deps.txt
+            install_tpu_post_train_extra_deps
           else
-            uv pip install -r src/dependencies/github_deps/pre_train_deps.txt
+            install_tpu_pre_train_extra_deps
           fi
           python3 --version
           python3 -m pip freeze
diff --git a/docs/install_maxtext.md b/docs/install_maxtext.md
index 90c7a4c697..da6e91d06d 100644
--- a/docs/install_maxtext.md
+++ b/docs/install_maxtext.md
@@ -41,22 +41,22 @@ source maxtext_venv/bin/activate
 
 # Option 1: Installing maxtext[tpu]
 uv pip install maxtext[tpu]==0.2.1 --resolution=lowest
-install_maxtext_tpu_github_deps
+install_tpu_pre_train_extra_deps
 
 # Option 2: Installing maxtext[cuda12]
 uv pip install maxtext[cuda12]==0.2.1 --resolution=lowest
-install_maxtext_cuda12_github_dep
+install_cuda12_pre_train_extra_dep
 
 # Option 3: Installing maxtext[tpu-post-train]
 uv pip install maxtext[tpu-post-train]==0.2.1 --resolution=lowest
-install_maxtext_tpu_post_train_extra_deps
+install_tpu_post_train_extra_deps
 
 # Option 4: Installing maxtext[runner]
 uv pip install maxtext[runner]==0.2.1 --resolution=lowest
 ```
 
-> **Note:** The `install_maxtext_tpu_github_deps`, `install_maxtext_cuda12_github_dep`, and
-> `install_maxtext_tpu_post_train_extra_deps` commands are temporarily required to install dependencies directly from GitHub
+> **Note:** The `install_tpu_pre_train_extra_deps`, `install_cuda12_pre_train_extra_deps`, and
+> `install_tpu_post_train_extra_deps` commands are temporarily required to install dependencies directly from GitHub
 > that are not yet available on PyPI. As shown above, choose the one that corresponds to your use case.
 
 > **Note:** The maxtext package contains a comprehensive list of all direct and transitive dependencies, with lower bounds, generated by [seed-env](https://github.com/google-ml-infra/actions/tree/main/python_seed_env). We highly recommend the `--resolution=lowest` flag. It instructs `uv` to install the specific, tested versions of dependencies defined by MaxText, rather than the latest available ones. This ensures a consistent and reproducible environment, which is critical for stable performance and for running benchmarks.
@@ -82,15 +82,15 @@ source maxtext_venv/bin/activate
 
 # Option 1: Installing .[tpu]
 uv pip install -e .[tpu] --resolution=lowest
-install_maxtext_tpu_github_deps
+install_tpu_pre_train_extra_deps
 
 # Option 2: Installing .[cuda12]
 uv pip install -e .[cuda12] --resolution=lowest
-install_maxtext_cuda12_github_dep
+install_cuda12_pre_train_extra_deps
 
 # Option 3: Installing .[tpu-post-train]
 uv pip install -e .[tpu-post-train] --resolution=lowest
-install_maxtext_tpu_post_train_extra_deps
+install_tpu_post_train_extra_deps
 
 # Option 4: Installing maxtext[runner]
 uv pip install -e .[runner] --resolution=lowest
@@ -110,7 +110,7 @@ Please keep dependencies updated throughout development. This will allow each co
 
 To update dependencies, you will follow these general steps:
 
-1. **Modify Base Requirements**: Update the desired dependencies in `base_requirements/requirements.txt` or the hardware-specific files (`base_requirements/tpu-base-requirements.txt`, `base_requirements/gpu-base-requirements.txt`).
+1. **Modify Base Requirements**: Update the desired dependencies in `base_requirements/requirements.txt` or the hardware-specific files (`base_requirements/tpu-requirements.txt`, `base_requirements/gpu-requirements.txt`).
 2. **Generate New Files**: Run the `seed-env` CLI tool to generate new, fully-pinned requirements files based on your changes.
 3. **Update Project Files**: Copy the newly generated files into the `generated_requirements/` directory.
 4. **Handle GitHub Dependencies**: Move any dependencies that are installed directly from GitHub from the generated files to `src/dependencies/github_deps/pre_train_deps.txt`.
diff --git a/docs/tutorials/inference.md b/docs/tutorials/inference.md
index adc94cee64..f4e7a8d495 100644
--- a/docs/tutorials/inference.md
+++ b/docs/tutorials/inference.md
@@ -47,7 +47,7 @@ Required-by:
 If the plugin is not installed, please run the install post training extra dependencies script again with the following command:
 
 ```bash
-install_maxtext_tpu_post_train_extra_deps
+install_tpu_post_train_extra_deps
 ```
 
 # Offline Inference
diff --git a/pyproject.toml b/pyproject.toml
index 92f650980c..24dbd28e1b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -46,8 +46,8 @@ packages = ["src/maxtext", "src/dependencies"]
 path = "build_hooks.py"
 
 [project.scripts]
-install_maxtext_tpu_github_deps = "dependencies.github_deps.install_pre_train_deps:main"
-install_maxtext_cuda12_github_deps = "dependencies.github_deps.install_pre_train_deps:main"
-install_maxtext_tpu_post_train_extra_deps = "dependencies.github_deps.install_post_train_deps:main"
+install_tpu_pre_train_extra_deps = "dependencies.scripts.install_pre_train_extra_deps:main"
+install_cuda12_pre_train_extra_deps = "dependencies.scripts.install_pre_train_extra_deps:main"
+install_tpu_post_train_extra_deps = "dependencies.scripts.install_post_train_extra_deps:main"
 build_maxtext_docker_image = "dependencies.scripts.build_maxtext_docker_image:main"
 upload_maxtext_docker_image = "dependencies.scripts.upload_maxtext_docker_image:main"
diff --git a/src/dependencies/dockerfiles/maxtext_gpu_dependencies.Dockerfile b/src/dependencies/dockerfiles/maxtext_gpu_dependencies.Dockerfile
index b3caf4faa0..b22b334a1c 100644
--- a/src/dependencies/dockerfiles/maxtext_gpu_dependencies.Dockerfile
+++ b/src/dependencies/dockerfiles/maxtext_gpu_dependencies.Dockerfile
@@ -50,7 +50,7 @@ ENV MAXTEXT_REPO_ROOT=/deps
 WORKDIR /deps
 
 # Copy setup files and dependency files separately for better caching
-COPY ${PACKAGE_DIR}/dependencies/github_deps/ src/dependencies/github_deps/
+COPY ${PACKAGE_DIR}/dependencies/extra_deps/ src/dependencies/extra_deps/
 COPY ${PACKAGE_DIR}/dependencies/requirements/ src/dependencies/requirements/
 COPY ${PACKAGE_DIR}/dependencies/scripts/ src/dependencies/scripts/
 COPY ${PACKAGE_DIR}/maxtext/integration/vllm/ src/maxtext/integration/vllm/
diff --git a/src/dependencies/dockerfiles/maxtext_tpu_dependencies.Dockerfile b/src/dependencies/dockerfiles/maxtext_tpu_dependencies.Dockerfile
index 131fdabaf1..afa4526b34 100644
--- a/src/dependencies/dockerfiles/maxtext_tpu_dependencies.Dockerfile
+++ b/src/dependencies/dockerfiles/maxtext_tpu_dependencies.Dockerfile
@@ -47,7 +47,7 @@ ENV MAXTEXT_REPO_ROOT=/deps
 WORKDIR /deps
 
 # Copy setup files and dependency files separately for better caching
-COPY ${PACKAGE_DIR}/dependencies/github_deps/ src/dependencies/github_deps/
+COPY ${PACKAGE_DIR}/dependencies/extra_deps/ src/dependencies/extra_deps/
 COPY ${PACKAGE_DIR}/dependencies/requirements/ src/dependencies/requirements/
 COPY ${PACKAGE_DIR}/dependencies/scripts/ src/dependencies/scripts/
 COPY ${PACKAGE_DIR}/maxtext/integration/vllm/ src/maxtext/integration/vllm/
diff --git a/src/dependencies/github_deps/__init__.py b/src/dependencies/extra_deps/__init__.py
similarity index 100%
rename from src/dependencies/github_deps/__init__.py
rename to src/dependencies/extra_deps/__init__.py
diff --git a/src/dependencies/github_deps/post_train_base_deps.txt b/src/dependencies/extra_deps/post_train_base_deps.txt
similarity index 100%
rename from src/dependencies/github_deps/post_train_base_deps.txt
rename to src/dependencies/extra_deps/post_train_base_deps.txt
diff --git a/src/dependencies/github_deps/post_train_deps.txt b/src/dependencies/extra_deps/post_train_github_deps.txt
similarity index 100%
rename from src/dependencies/github_deps/post_train_deps.txt
rename to src/dependencies/extra_deps/post_train_github_deps.txt
diff --git a/src/dependencies/github_deps/pre_train_deps.txt b/src/dependencies/extra_deps/pre_train_github_deps.txt
similarity index 100%
rename from src/dependencies/github_deps/pre_train_deps.txt
rename to src/dependencies/extra_deps/pre_train_github_deps.txt
diff --git a/src/dependencies/github_deps/install_post_train_deps.py b/src/dependencies/scripts/install_post_train_extra_deps.py
similarity index 78%
rename from src/dependencies/github_deps/install_post_train_deps.py
rename to src/dependencies/scripts/install_post_train_extra_deps.py
index fd09cd2109..eac8c158b9 100644
--- a/src/dependencies/github_deps/install_post_train_deps.py
+++ b/src/dependencies/scripts/install_post_train_extra_deps.py
@@ -14,10 +14,8 @@
 
 """Installs extra dependencies from a requirements file using uv.
 
-This script is designed to be run to install dependencies specified in
-'post_train_deps.txt', which is expected to be in the same directory.
-It first ensures 'uv' is installed and then uses it to install the packages
-listed in the requirements file.
+This script is designed to install dependencies specified in 'dependencies/extra_deps/post_train_*.txt'.
+It first ensures 'uv' is installed and then uses it to install the packages listed in the requirements file.
 """
 
 import os
@@ -27,18 +25,16 @@
 
 def main():
   """
-  Installs extra dependencies specified in post_train_deps.txt using uv.
-
-  This script looks for 'post_train_deps.txt' relative to its own location.
+  Installs extra dependencies specified in 'dependencies/extra_deps/post_train_*.txt' using uv.
   It executes 'uv pip install -r <path_to_extra_deps.txt> --resolution=lowest'.
   """
   os.environ["VLLM_TARGET_DEVICE"] = "tpu"
 
   current_dir = os.path.dirname(os.path.abspath(__file__))
   repo_root = os.path.abspath(os.path.join(current_dir, "..", ".."))
-  extra_deps_path = os.path.join(current_dir, "post_train_deps.txt")
-  if not os.path.exists(extra_deps_path):
-    raise FileNotFoundError(f"Dependencies file not found at {extra_deps_path}")
+  github_deps_path = os.path.join(repo_root, "dependencies", "extra_deps", "post_train_github_deps.txt")
+  if not os.path.exists(github_deps_path):
+    raise FileNotFoundError(f"GitHub dependencies file not found at {github_deps_path}")
 
   # Check if 'uv' is available in the environment
   try:
@@ -49,14 +45,14 @@ def main():
     print(f"Stderr: {e.stderr.decode()}")
     sys.exit(1)
 
-  command = [
+  github_deps_command = [
       sys.executable,  # Use the current Python executable's pip to ensure the correct environment
       "-m",
       "uv",
       "pip",
       "install",
       "-r",
-      str(extra_deps_path),
+      str(github_deps_path),
       "--no-deps",
   ]
 
@@ -72,9 +68,9 @@ def main():
 
   try:
     # Run the command to install Github dependencies
-    print(f"Installing extra dependencies: {' '.join(command)}")
-    _ = subprocess.run(command, check=True, capture_output=True, text=True)
-    print("Extra dependencies installed successfully!")
+    print(f"Installing github dependencies: {' '.join(github_deps_command)}")
+    _ = subprocess.run(github_deps_command, check=True, capture_output=True, text=True)
+    print("GitHub dependencies installed successfully!")
 
     # Run the command to install the MaxText vLLM directory
     print(f"Installing MaxText vLLM dependency: {' '.join(local_vllm_install_command)}")
diff --git a/src/dependencies/github_deps/install_pre_train_deps.py b/src/dependencies/scripts/install_pre_train_extra_deps.py
similarity index 70%
rename from src/dependencies/github_deps/install_pre_train_deps.py
rename to src/dependencies/scripts/install_pre_train_extra_deps.py
index d2cbe15ccb..661cc91cab 100644
--- a/src/dependencies/github_deps/install_pre_train_deps.py
+++ b/src/dependencies/scripts/install_pre_train_extra_deps.py
@@ -14,10 +14,8 @@
 
 """Installs extra dependencies from a requirements file using uv.
 
-This script is designed to be run to install dependencies specified in
-'pre_train_deps.txt', which is expected to be in the same directory.
-It first ensures 'uv' is installed and then uses it to install the packages
-listed in the requirements file.
+This script is designed to install dependencies specified in 'dependencies/extra_deps/pre_train_*.txt'.
+It first ensures 'uv' is installed and then uses it to install the packages listed in the requirements file.
 """
 
 import os
@@ -27,15 +25,14 @@
 
 def main():
   """
-  Installs extra dependencies specified in pre_train_deps.txt using uv.
-
-  This script looks for 'pre_train_deps.txt' relative to its own location.
+  Installs extra dependencies specified in 'dependencies/extra_deps/pre_train_*.txt' using uv.
   It executes 'uv pip install -r <path_to_extra_deps.txt> --resolution=lowest'.
   """
   current_dir = os.path.dirname(os.path.abspath(__file__))
-  extra_deps_path = os.path.join(current_dir, "pre_train_deps.txt")
-  if not os.path.exists(extra_deps_path):
-    raise FileNotFoundError(f"Dependencies file not found at {extra_deps_path}")
+  repo_root = os.path.abspath(os.path.join(current_dir, "..", ".."))
+  github_deps_path = os.path.join(repo_root, "dependencies", "extra_deps", "pre_train_github_deps.txt")
+  if not os.path.exists(github_deps_path):
+    raise FileNotFoundError(f"GitHub dependencies file not found at {github_deps_path}")
 
   # Check if 'uv' is available in the environment
   try:
@@ -46,22 +43,21 @@ def main():
     print(f"Stderr: {e.stderr.decode()}")
     sys.exit(1)
 
-  command = [
+  github_deps_command = [
       sys.executable,  # Use the current Python executable's pip to ensure the correct environment
       "-m",
       "uv",
       "pip",
       "install",
       "-r",
-      str(extra_deps_path),
+      str(github_deps_path),
       "--no-deps",
   ]
 
   try:
-    # Run the command
-    print(f"Installing extra dependencies: {' '.join(command)}")
-    _ = subprocess.run(command, check=True, capture_output=True, text=True)
-    print("Extra dependencies installed successfully!")
+    print(f"Installing github dependencies: {' '.join(github_deps_command)}")
+    _ = subprocess.run(github_deps_command, check=True, capture_output=True, text=True)
+    print("GitHub dependencies installed successfully!")
   except subprocess.CalledProcessError as e:
     print("Failed to install extra dependencies.")
     print(f"Command '{' '.join(e.cmd)}' returned non-zero exit status {e.returncode}.")
diff --git a/src/dependencies/scripts/setup.sh b/src/dependencies/scripts/setup.sh
index 9b2fe67a1e..10f7d262a5 100644
--- a/src/dependencies/scripts/setup.sh
+++ b/src/dependencies/scripts/setup.sh
@@ -215,8 +215,8 @@ install_maxtext_with_deps() {
         dep_name='src/dependencies/requirements/generated_requirements/tpu-requirements.txt'
     fi
     echo "Installing requirements from $dep_name"
-    python3 -m uv pip install --resolution=lowest -r "$dep_name" \
-        -r 'src/dependencies/github_deps/pre_train_deps.txt'
+    python3 -m uv pip install --resolution=lowest -r "$dep_name"
+    python3 -m src.dependencies.scripts.install_pre_train_extra_deps
 
     install_maxtext_package_without_deps
 }
@@ -230,7 +230,7 @@ install_post_training_deps() {
     dep_name='src/dependencies/requirements/generated_requirements/tpu-post-train-requirements.txt'
     echo "Installing requirements from $dep_name"
     python3 -m uv pip install --resolution=lowest -r "$dep_name"
-    python3 -m src.dependencies.github_deps.install_post_train_deps
+    python3 -m src.dependencies.scripts.install_post_train_extra_deps
 }
 
 # ---------- Post-Training workflow installation ----------
diff --git a/src/maxtext/examples/rl_llama3_demo.ipynb b/src/maxtext/examples/rl_llama3_demo.ipynb
index 07cf455956..0a58ee725b 100644
--- a/src/maxtext/examples/rl_llama3_demo.ipynb
+++ b/src/maxtext/examples/rl_llama3_demo.ipynb
@@ -108,7 +108,7 @@
     "    \n",
     "    # Install MaxText and post-training dependencies\n",
     "    !uv pip install -e .[tpu-post-train] --resolution=lowest\n",
-    "    !install_maxtext_tpu_post_train_extra_deps"
+    "    !install_tpu_post_train_extra_deps"
    ]
   },
   {
diff --git a/src/maxtext/examples/sft_llama3_demo_tpu.ipynb b/src/maxtext/examples/sft_llama3_demo_tpu.ipynb
index 16012bec19..713c0dfb52 100644
--- a/src/maxtext/examples/sft_llama3_demo_tpu.ipynb
+++ b/src/maxtext/examples/sft_llama3_demo_tpu.ipynb
@@ -6,7 +6,7 @@
         "id": "iBmRfde4Kgv4"
       },
       "source": [
-        "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/AI-Hypercomputer/maxtext/blob/main/src/maxtext/examples/sft_llama3_demo.ipynb)\n",
+        "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/AI-Hypercomputer/maxtext/blob/main/src/maxtext/examples/sft_llama3_demo_tpu.ipynb)\n",
         "\n",
         "# Llama3.1-8B-Instruct Supervised Fine-Tuning (SFT) Demo\n"
       ]
@@ -120,7 +120,7 @@
         "    \n",
         "    # Install MaxText and post-training dependencies\n",
         "    !uv pip install -e .[tpu-post-train] --resolution=lowest\n",
-        "    !install_maxtext_tpu_post_train_extra_deps"
+        "    !install_tpu_post_train_extra_deps"
       ]
     },
     {
diff --git a/src/maxtext/examples/sft_qwen3_demo.ipynb b/src/maxtext/examples/sft_qwen3_demo.ipynb
index c4fbf53529..73f4c5b7d0 100644
--- a/src/maxtext/examples/sft_qwen3_demo.ipynb
+++ b/src/maxtext/examples/sft_qwen3_demo.ipynb
@@ -126,7 +126,7 @@
         "    \n",
         "    # Install MaxText and post-training dependencies\n",
         "    !uv pip install -e .[tpu-post-train] --resolution=lowest\n",
-        "    !install_maxtext_tpu_post_train_extra_deps"
+        "    !install_tpu_post_train_extra_deps"
       ]
     },
     {

From eb15cdcf7adb6f8128afd4696cf539b2b21e25bc Mon Sep 17 00:00:00 2001
From: Surbhi Jain <sjsurbhi@google.com>
Date: Thu, 12 Mar 2026 21:55:02 +0000
Subject: [PATCH 2/2] Update generated requirements for JAX 0.9.2

---
 docs/install_maxtext.md                       |  60 ++-
 .../extra_deps/post_train_overrides.txt       |   1 +
 ...quirements.txt => cuda12-requirements.txt} |   1 +
 .../base_requirements/requirements.txt        |  10 +-
 .../tpu-post-train-requirements.txt           |   7 +
 ...-requirements.txt => tpu-requirements.txt} |   2 +-
 .../cuda12-requirements.txt                   | 295 ++++++------
 .../tpu-post-train-requirements.txt           | 434 +++++++-----------
 .../tpu-requirements.txt                      | 279 ++++++-----
 .../scripts/install_post_train_extra_deps.py  |  24 +-
 .../scripts/install_pre_train_extra_deps.py   |   6 +-
 .../utils/hf_model_configs.py                 |   1 +
 .../distillation_data_processing.py           |   4 +-
 .../input_pipeline/input_pipeline_utils.py    |  14 +-
 .../unit/sft_data_processing_test.py          |  23 +-
 tests/unit/attention_test.py                  |   1 +
 tests/unit/train_compile_test.py              |   3 +
 17 files changed, 536 insertions(+), 629 deletions(-)
 create mode 100644 src/dependencies/extra_deps/post_train_overrides.txt
 rename src/dependencies/requirements/base_requirements/{gpu-base-requirements.txt => cuda12-requirements.txt} (78%)
 create mode 100644 src/dependencies/requirements/base_requirements/tpu-post-train-requirements.txt
 rename src/dependencies/requirements/base_requirements/{tpu-base-requirements.txt => tpu-requirements.txt} (60%)

diff --git a/docs/install_maxtext.md b/docs/install_maxtext.md
index da6e91d06d..a5f19e0a39 100644
--- a/docs/install_maxtext.md
+++ b/docs/install_maxtext.md
@@ -110,11 +110,10 @@ Please keep dependencies updated throughout development. This will allow each co
 
 To update dependencies, you will follow these general steps:
 
-1. **Modify Base Requirements**: Update the desired dependencies in `base_requirements/requirements.txt` or the hardware-specific files (`base_requirements/tpu-requirements.txt`, `base_requirements/gpu-requirements.txt`).
+1. **Modify Base Requirements**: Update the desired dependencies in `src/dependencies/requirements/base_requirements/requirements.txt` or the hardware-specific pre-training files (`base_requirements/tpu-requirements.txt`, `base_requirements/gpu-requirements.txt`) or post-training requirements.
 2. **Generate New Files**: Run the `seed-env` CLI tool to generate new, fully-pinned requirements files based on your changes.
 3. **Update Project Files**: Copy the newly generated files into the `generated_requirements/` directory.
-4. **Handle GitHub Dependencies**: Move any dependencies that are installed directly from GitHub from the generated files to `src/dependencies/github_deps/pre_train_deps.txt`.
-5. **Verify**: Test the new dependencies to ensure the project installs and runs correctly.
+4. **Verify**: Test the new dependencies to ensure the project installs and runs correctly.
 
 The following sections provide detailed instructions for each step.
 
@@ -125,59 +124,70 @@ First, you need to install the `seed-env` command-line tool by running `pip inst
 
 ## Step 2: Find the JAX Build Commit Hash
 
-The dependency generation process is pinned to a specific nightly build of JAX. You need to find the commit hash for the desired JAX build.
-
-You can find the latest commit hashes in the [JAX `build/` folder](https://github.com/jax-ml/jax/commits/main/build). Choose a recent, successful build and copy its full commit hash.
+The dependency generation process is pinned to a specific nightly build of JAX. You need to find the commit hash for the desired JAX build from [JAX `build/` folder](https://github.com/jax-ml/jax/commits/main/build).
 
 ## Step 3: Generate the Requirements Files
 
 Next, run the `seed-env` CLI to generate the new requirements files. You will need to do this separately for the TPU and GPU environments. The generated files will be placed in a directory specified by `--output-dir`.
 
-### For TPU
+> **Note:** The current `src/dependencies/requirements/generated_requirements/` in the repository were generated using JAX build commit hash: [e0d2967b50abbefd651d563dbcd7afbcb963d08c](https://github.com/jax-ml/jax/commit/e0d2967b50abbefd651d563dbcd7afbcb963d08c).
+
+### TPU Pre-Training
 
-Run the following command, replacing `<jax-build-commit-hash>` with the hash you copied in the previous step.
+If you have made changes to TPU pre-training dependencies in `src/dependencies/requirements/base_requirements/tpu-requirements.txt`, you need to regenerate the pinned pre-training requirements in `generated_requirements/` directory. Run the following command, replacing `<jax-build-commit-hash>` with the hash you copied in the previous step:
 
 ```bash
 seed-env \
-  --local-requirements=src/dependencies/requirements/base_requirements/tpu-base-requirements.txt \
+  --local-requirements=src/dependencies/requirements/base_requirements/tpu-requirements.txt \
   --host-name=MaxText \
   --seed-commit=<jax-build-commit-hash> \
   --python-version=3.12 \
   --requirements-txt=tpu-requirements.txt \
   --output-dir=generated_tpu_artifacts
+
+# Copy generated requirements to src/dependencies/requirements/generated_requirements
+mv generated_tpu_artifacts/tpu-requirements.txt src/dependencies/requirements/generated_requirements/tpu-requirements.txt
 ```
 
-### For GPU
+#### TPU Post-Training
 
-Similarly, run the command for the GPU requirements.
+If you have made changes to the post-training dependencies in `src/dependencies/requirements/base_requirements/tpu-post-train-requirements.txt`, you need to regenerate the pinned post-training requirements in `generated_requirements/` directory. Run the following command, replacing `<jax-build-commit-hash>` with the hash you copied in the previous step:
 
 ```bash
 seed-env \
-  --local-requirements=src/dependencies/requirements/base_requirements/cuda12-base-requirements.txt \
+  --local-requirements=src/dependencies/requirements/base_requirements/tpu-post-train-requirements.txt \
   --host-name=MaxText \
   --seed-commit=<jax-build-commit-hash> \
   --python-version=3.12 \
-  --requirements-txt=cuda12-requirements.txt \
-  --hardware=cuda12 \
-  --output-dir=generated_gpu_artifacts
-```
+  --requirements-txt=tpu-post-train-requirements.txt \
+  --output-dir=generated_tpu_post_train_artifacts
 
-## Step 4: Update Project Files
+# Copy generated requirements to src/dependencies/requirements/generated_requirements
+mv generated_tpu_post_train_artifacts/tpu-post-train-requirements.txt src/dependencies/requirements/generated_requirements/tpu-post-train-requirements.txt
+```
 
-After generating the new requirements, you need to update the files in the MaxText repository.
+### GPU Pre-Training
 
-1. **Copy the generated files:**
+If you have made changes to the GPU pre-training dependencies in `src/dependencies/requirements/base_requirements/gpu-requirements.txt`, you need to regenerate the pinned pre-training requirements in `generated_requirements/` directory. Run the following command, replacing `<jax-build-commit-hash>` with the hash you copied in the previous step:
 
-   - Move `generated_tpu_artifacts/tpu-requirements.txt` to `generated_requirements/tpu-requirements.txt`.
-   - Move `generated_gpu_artifacts/cuda12-requirements.txt` to `generated_requirements/cuda12-requirements.txt`.
+```bash
+seed-env \
+  --local-requirements=src/dependencies/requirements/base_requirements/cuda12-requirements.txt \
+  --host-name=MaxText \
+  --seed-commit=<jax-build-commit-hash> \
+  --python-version=3.12 \
+  --requirements-txt=cuda12-requirements.txt \
+  --hardware=cuda12 \
+  --output-dir=generated_gpu_artifacts
 
-2. **Update `pre_train_deps.txt` (if necessary):**
-   Currently, MaxText uses a few dependencies, such as `mlperf-logging` and `google-jetstream`, that are installed directly from GitHub source. These are defined in `base_requirements/requirements.txt`, and the `seed-env` tool will carry them over to the generated requirements files.
+# Copy generated requirements to src/dependencies/requirements/generated_requirements
+mv generated_gpu_artifacts/cuda12-requirements.txt.txt src/dependencies/requirements/generated_requirements/cuda12-requirements.txt.txt
+```
 
-## Step 5: Verify the New Dependencies
+## Step 4: Verify the New Dependencies
 
 Finally, test that the new dependencies install correctly and that MaxText runs as expected.
 
-1. **Install MaxText and dependencies**: For instructions on installing MaxText on your VM, please refer to the [official documentation](https://maxtext.readthedocs.io/en/maxtext-v0.2.0/install_maxtext.html#from-source).
+1. **Install MaxText and dependencies**: For instructions on installing MaxText on your VM, please refer to the [official documentation](https://maxtext.readthedocs.io/en/latest/install_maxtext.html#from-source).
 
 2. **Verify the installation**: Run MaxText tests to ensure everything is working as expected with the newly installed dependencies and there are no regressions.
diff --git a/src/dependencies/extra_deps/post_train_overrides.txt b/src/dependencies/extra_deps/post_train_overrides.txt
new file mode 100644
index 0000000000..8fb0539b08
--- /dev/null
+++ b/src/dependencies/extra_deps/post_train_overrides.txt
@@ -0,0 +1 @@
+google-metrax>=0.2.3
diff --git a/src/dependencies/requirements/base_requirements/gpu-base-requirements.txt b/src/dependencies/requirements/base_requirements/cuda12-requirements.txt
similarity index 78%
rename from src/dependencies/requirements/base_requirements/gpu-base-requirements.txt
rename to src/dependencies/requirements/base_requirements/cuda12-requirements.txt
index 4761862e58..f1f143a2b3 100644
--- a/src/dependencies/requirements/base_requirements/gpu-base-requirements.txt
+++ b/src/dependencies/requirements/base_requirements/cuda12-requirements.txt
@@ -1,2 +1,3 @@
 -r requirements.txt
+jax[cuda12]
 transformer-engine[jax]
diff --git a/src/dependencies/requirements/base_requirements/requirements.txt b/src/dependencies/requirements/base_requirements/requirements.txt
index 61dc53e61d..7970740fcc 100644
--- a/src/dependencies/requirements/base_requirements/requirements.txt
+++ b/src/dependencies/requirements/base_requirements/requirements.txt
@@ -1,8 +1,9 @@
 absl-py
 aqtp
 array-record
+chex
 cloud-accelerator-diagnostics
-cloud-tpu-diagnostics
+cloud-tpu-diagnostics!=1.1.14
 datasets
 drjax
 flax
@@ -13,7 +14,6 @@ google-cloud-mldiagnostics
 google-cloud-monitoring
 grain[parquet]
 huggingface_hub
-jax
 jaxlib
 jaxtyping
 jsonlines
@@ -24,6 +24,7 @@ numpy
 omegaconf
 optax
 orbax-checkpoint
+parameterized
 pathwaysutils
 pillow
 pre-commit
@@ -34,15 +35,14 @@ pylint
 pytest
 pytype
 sentencepiece
+seqio
 tensorboard-plugin-profile
 tensorboardx
 tensorflow-datasets
 tensorflow-text
 tensorflow
 tiktoken
-tokamax
+tokamax!=0.1.0
 transformers
 uvloop
 qwix
-google-jetstream @ https://github.com/AI-Hypercomputer/JetStream/archive/29329e8e73820993f77cfc8efe34eb2a73f5de98.zip
-mlperf-logging @ https://github.com/mlcommons/logging/archive/38ab22670527888c8eb7825a4ece176fcc36a95d.zip
diff --git a/src/dependencies/requirements/base_requirements/tpu-post-train-requirements.txt b/src/dependencies/requirements/base_requirements/tpu-post-train-requirements.txt
new file mode 100644
index 0000000000..6ae6025605
--- /dev/null
+++ b/src/dependencies/requirements/base_requirements/tpu-post-train-requirements.txt
@@ -0,0 +1,7 @@
+-r requirements.txt
+google-metrax
+ipykernel
+jax[tpu]
+kagglehub
+papermill
+perfetto
diff --git a/src/dependencies/requirements/base_requirements/tpu-base-requirements.txt b/src/dependencies/requirements/base_requirements/tpu-requirements.txt
similarity index 60%
rename from src/dependencies/requirements/base_requirements/tpu-base-requirements.txt
rename to src/dependencies/requirements/base_requirements/tpu-requirements.txt
index 3771eb9143..f2a5efdcf7 100644
--- a/src/dependencies/requirements/base_requirements/tpu-base-requirements.txt
+++ b/src/dependencies/requirements/base_requirements/tpu-requirements.txt
@@ -1,2 +1,2 @@
 -r requirements.txt
-google-tunix
+jax[tpu]
diff --git a/src/dependencies/requirements/generated_requirements/cuda12-requirements.txt b/src/dependencies/requirements/generated_requirements/cuda12-requirements.txt
index 364465d935..fb9624d5f1 100644
--- a/src/dependencies/requirements/generated_requirements/cuda12-requirements.txt
+++ b/src/dependencies/requirements/generated_requirements/cuda12-requirements.txt
@@ -1,259 +1,258 @@
 # Generated by seed-env. Do not edit manually.
 # If you need to modify dependencies, please do so in the host requirements file and run seed-env again.
 
-absl-py>=2.3.1
+absl-py>=2.4.0
 aiofiles>=25.1.0
 aiohappyeyeballs>=2.6.1
-aiohttp>=3.13.2
+aiohttp>=3.13.5
 aiosignal>=1.4.0
 annotated-doc>=0.0.4
 annotated-types>=0.7.0
 antlr4-python3-runtime>=4.9.3
-anyio>=4.11.0
+anyio>=4.13.0
 aqtp>=0.9.0
 array-record>=0.8.3
-astroid>=4.0.2
+astroid>=4.0.4
 astunparse>=1.6.3
 attrs>=25.4.0
-auditwheel>=6.5.0
-black>=24.10.0
-blobfile>=3.1.0
-build>=1.3.0
-cachetools>=6.2.2
-certifi>=2025.11.12
+auditwheel>=6.6.0
+black>=25.12.0
+build>=1.4.0
+certifi>=2026.2.25
+cffi>=2.0.0 ; platform_python_implementation != 'PyPy'
 cfgv>=3.5.0
-charset-normalizer>=3.4.4
-cheroot>=11.1.2
+charset-normalizer>=3.4.6
 chex>=0.1.91
-click>=8.3.1
+click>=8.3.2
 cloud-accelerator-diagnostics>=0.1.1
 cloud-tpu-diagnostics>=0.1.5
 cloudpickle>=3.1.2
 clu>=0.0.12
 colorama>=0.4.6
 contourpy>=1.3.3
-coverage>=7.12.0
+cryptography>=46.0.6
 cycler>=0.12.1
-datasets>=4.4.1
+dataclasses-json>=0.6.7
+datasets>=4.8.4
 decorator>=5.2.1
-dill>=0.4.0
+deprecated>=1.3.1
+dill>=0.4.1
 distlib>=0.4.0
-dm-tree>=0.1.9
+distro>=1.9.0
+dm-tree>=0.1.10
 docstring-parser>=0.17.0
 drjax>=0.1.4
 editdistance>=0.8.1
-einops>=0.8.1
+einops>=0.8.2
 einshape>=1.0
-etils>=1.13.0
-evaluate>=0.4.6
+etils>=1.14.0
 execnet>=2.1.2
-fastapi>=0.122.0
-filelock>=3.20.0
-flatbuffers>=25.9.23
-flax>=0.12.1
-fonttools>=4.60.1
+fastapi>=0.135.3
+filelock>=3.20.3
+flatbuffers>=25.12.19
+flax>=0.12.6
+fonttools>=4.62.1
 frozenlist>=1.8.0
-fsspec>=2025.10.0
-gast>=0.6.0
-gcsfs>=2025.10.0
-google-api-core>=2.28.1
-google-api-python-client>=2.187.0
-google-auth-httplib2>=0.2.1
-google-auth-oauthlib>=1.2.2
-google-auth>=2.43.0
-google-cloud-aiplatform>=1.128.0
-google-cloud-appengine-logging>=1.7.0
-google-cloud-audit-log>=0.4.0
-google-cloud-bigquery>=3.38.0
-google-cloud-core>=2.5.0
-google-cloud-logging>=3.12.1
-google-cloud-mldiagnostics>=0.5.10
-google-cloud-monitoring>=2.28.0
-google-cloud-resource-manager>=1.15.0
-google-cloud-storage>=3.6.0
-google-crc32c>=1.7.1
-google-genai>=1.52.0
+fsspec>=2026.2.0
+gast>=0.7.0
+gcsfs>=2026.2.0
+google-api-core>=2.30.2
+google-api-python-client>=2.193.0
+google-auth-httplib2>=0.3.1
+google-auth-oauthlib>=1.3.1
+google-auth>=2.49.1
+google-cloud-aiplatform>=1.145.0
+google-cloud-appengine-logging>=1.9.0
+google-cloud-audit-log>=0.5.0
+google-cloud-bigquery>=3.41.0
+google-cloud-core>=2.5.1
+google-cloud-logging>=3.15.0
+google-cloud-mldiagnostics>=1.0.1
+google-cloud-monitoring>=2.30.0
+google-cloud-resource-manager>=1.17.0
+google-cloud-storage-control>=1.11.0
+google-cloud-storage>=3.10.1
+google-crc32c>=1.8.0
+google-genai>=1.70.0
 google-pasta>=0.2.0
-google-resumable-media>=2.8.0
-googleapis-common-protos>=1.72.0
-grain>=0.2.15
-grpc-google-iam-v1>=0.14.3
-grpcio-status>=1.71.2
-grpcio>=1.76.0
+google-resumable-media>=2.8.2
+googleapis-common-protos>=1.74.0
+grain>=0.2.16
+grpc-google-iam-v1>=0.14.4
+grpcio-status>=1.78.0
+grpcio>=1.78.0
 gviz-api>=1.10.0
 h11>=0.16.0
-h5py>=3.15.1
-hf-xet>=1.2.0 ; platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
+h5py>=3.14.0
+hf-xet>=1.4.3 ; platform_machine == 'AMD64' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
 httpcore>=1.0.9
-httplib2>=0.31.0
+httplib2>=0.31.2
 httpx>=0.28.1
-huggingface-hub>=0.36.0
-humanize>=4.14.0
+huggingface-hub>=1.9.0
+humanize>=4.15.0
 hypothesis>=6.142.1
-identify>=2.6.15
+identify>=2.6.18
 idna>=3.11
-immutabledict>=4.2.2
+immutabledict>=4.3.1
 importlab>=0.8.1
-importlib-metadata>=8.7.0
-importlib-resources>=6.5.2
+importlib-metadata>=9.0.0
 iniconfig>=2.3.0
-isort>=7.0.0
-jaraco-functools>=4.3.0
-jax-cuda12-pjrt>=0.8.1 ; sys_platform == 'linux'
-jax-cuda12-plugin>=0.8.1 ; sys_platform == 'linux'
-jax>=0.8.1
-jaxlib>=0.8.1
-jaxtyping>=0.3.3
+isort>=8.0.1
+jax-cuda12-pjrt>=0.9.2
+jax-cuda12-plugin>=0.9.2
+jax>=0.9.2
+jaxlib>=0.9.2
+jaxtyping>=0.3.9
 jinja2>=3.1.6
-joblib>=1.5.2
 jsonlines>=4.0.0
-keras>=3.12.0
-kiwisolver>=1.4.9
+keras>=3.13.2
+kiwisolver>=1.5.0
+latex2sympy2-extended>=1.11.0
 libclang>=18.1.1
 libcst>=1.8.6
-lxml>=6.0.2
+libtpu>=0.0.37 ; platform_machine == 'x86_64' and sys_platform == 'linux'
 markdown-it-py>=4.0.0
-markdown>=3.10
+markdown>=3.10.2
 markupsafe>=3.0.3
-matplotlib>=3.10.7
+marshmallow>=3.26.2
+math-verify>=0.9.0
+matplotlib>=3.10.8
 mccabe>=0.7.0
 mdurl>=0.1.2
 ml-collections>=1.1.0
 ml-dtypes>=0.5.4
-ml-goodput-measurement>=0.0.15
-more-itertools>=10.8.0
+ml-goodput-measurement>=0.0.16
 mpmath>=1.3.0
 msgpack>=1.1.2
 msgspec>=0.20.0
-multidict>=6.7.0
-multiprocess>=0.70.18
+multidict>=6.7.1
+multiprocess>=0.70.19
 mypy-extensions>=1.1.0
 namex>=0.1.0
-nest-asyncio>=1.6.0
-networkx>=3.6
+networkx>=3.6.1
 ninja>=1.13.0
-nltk>=3.9.2
-nodeenv>=1.9.1
-numpy-typing-compat>=20250818.2.0
+nodeenv>=1.10.0
+numpy-typing-compat>=20251206.2.0
 numpy>=2.0.2
-nvidia-cublas-cu12>=12.9.1.4 ; sys_platform == 'linux'
+nvidia-cublas-cu12>=12.9.2.10 ; sys_platform == 'linux'
+nvidia-cuda-cccl-cu12>=12.9.27 ; sys_platform == 'linux'
+nvidia-cuda-cccl>=13.2.27
 nvidia-cuda-cupti-cu12>=12.9.79 ; sys_platform == 'linux'
 nvidia-cuda-nvcc-cu12>=12.9.86 ; sys_platform == 'linux'
 nvidia-cuda-nvrtc-cu12>=12.9.86 ; sys_platform == 'linux'
 nvidia-cuda-runtime-cu12>=12.9.79 ; sys_platform == 'linux'
-nvidia-cudnn-cu12>=9.16.0.29 ; sys_platform == 'linux'
+nvidia-cudnn-cu12>=9.20.0.48 ; sys_platform == 'linux'
 nvidia-cufft-cu12>=11.4.1.4 ; sys_platform == 'linux'
 nvidia-cusolver-cu12>=11.7.5.82 ; sys_platform == 'linux'
 nvidia-cusparse-cu12>=12.5.10.65 ; sys_platform == 'linux'
-nvidia-nccl-cu12>=2.28.9 ; sys_platform == 'linux'
+nvidia-nccl-cu12>=2.29.7 ; sys_platform == 'linux'
 nvidia-nvjitlink-cu12>=12.9.86 ; sys_platform == 'linux'
-nvidia-nvshmem-cu12>=3.4.5 ; sys_platform == 'linux'
+nvidia-nvshmem-cu12>=3.6.5 ; sys_platform == 'linux'
 oauthlib>=3.3.1
 omegaconf>=2.3.0
-opentelemetry-api>=1.38.0
+opentelemetry-api>=1.16.0
 opt-einsum>=3.4.0
-optax>=0.2.6
-optree>=0.18.0
-optype>=0.14.0
+optax>=0.2.8
+optree>=0.19.0
+optype>=0.17.0
 orbax-checkpoint>=0.11.33
-packaging>=25.0
-pandas>=2.3.3
+orbax-export>=0.0.8
+packaging>=26.0
+pandas>=3.0.2
 parameterized>=0.9.0
-pathspec>=0.12.1
-pathwaysutils>=0.1.3
-pillow>=12.0.0
-platformdirs>=4.5.0
+pathspec>=1.0.4
+pathwaysutils>=0.1.6
+pillow>=12.1.1
+platformdirs>=4.9.4
 pluggy>=1.6.0
 portpicker>=1.6.0
-pre-commit>=4.5.0
-prometheus-client>=0.23.1
+pre-commit>=4.5.1
 promise>=2.3
 propcache>=0.4.1
-proto-plus>=1.26.1
-protobuf>=5.29.5
-psutil>=7.1.3
-pyarrow>=22.0.0
+proto-plus>=1.27.2
+protobuf>=6.33.6
+psutil>=7.2.2
+pyarrow>=23.0.1
 pyasn1-modules>=0.4.2
-pyasn1>=0.6.1
+pyasn1>=0.6.3
 pycnite>=2024.7.31
-pycryptodomex>=3.23.0
+pycparser>=3.0 ; implementation_name != 'PyPy' and platform_python_implementation != 'PyPy'
 pydantic-core>=2.41.5
 pydantic>=2.12.5
 pydot>=4.0.1
 pyelftools>=0.32
 pyglove>=0.4.5
 pygments>=2.19.2
-pyink>=24.10.1
-pylint>=4.0.3
-pyparsing>=3.2.5
+pyink>=25.12.0
+pylint>=4.0.5
+pyparsing>=3.3.2
 pyproject-hooks>=1.2.0
 pytest-xdist>=3.8.0
 pytest>=8.4.2
 python-dateutil>=2.9.0.post0
+pytokens>=0.4.1
 pytype>=2024.10.11
-pytz>=2025.2
 pyyaml>=6.0.3
-qwix>=0.1.4
-regex>=2025.11.3
+qwix>=0.1.5
+regex>=2026.3.32
 requests-oauthlib>=2.0.0
 requests>=2.32.5
-rich>=14.2.0
-rsa>=4.9.1
+rich>=14.3.3
 safetensors>=0.7.0
-scipy-stubs>=1.16.3.0
-scipy>=1.16.3
+scipy-stubs>=1.17.1.2
+scipy>=1.17.1
 sentencepiece>=0.2.1
 seqio>=0.0.20
-setuptools>=80.9.0
-shapely>=2.1.2
-shortuuid>=1.0.13
-simple-parsing>=0.1.7
+setuptools>=82.0.1
+shellingham>=1.5.4
+simple-parsing>=0.1.8
 simplejson>=3.20.2
 six>=1.17.0
 sniffio>=1.3.1
 sortedcontainers>=2.4.0
-starlette>=0.50.0
+starlette>=1.0.0
 sympy>=1.14.0
-tabulate>=0.9.0
-tenacity>=9.1.2
+tabulate>=0.10.0
+tenacity>=9.1.4
 tensorboard-data-server>=0.7.2
 tensorboard-plugin-profile>=2.13.0
-tensorboard>=2.19.0
-tensorboardx>=2.6.4
+tensorboard>=2.20.0
+tensorboardx>=2.6.5
 tensorflow-datasets>=4.9.9
-tensorflow-metadata>=1.17.2
-tensorflow-text>=2.19.0
-tensorflow>=2.19.1
-tensorstore>=0.1.79
-termcolor>=3.2.0
+tensorflow-metadata>=1.17.3
+tensorflow-text>=2.20.1
+tensorflow>=2.20.0
+tensorstore>=0.1.82
+termcolor>=3.3.0
 tiktoken>=0.12.0
-tokamax>=0.0.8
-tokenizers>=0.22.1
+tokamax>=0.0.12
+tokenizers>=0.22.2
 toml>=0.10.2
-tomlkit>=0.13.3
+tomlkit>=0.14.0
 toolz>=1.1.0
-tqdm>=4.67.1
-transformer-engine-cu12>=2.9.0
-transformer-engine-jax>=2.9.0
-transformer-engine>=2.9.0
-transformers>=4.57.3
+tqdm>=4.67.3
+transformer-engine-cu12>=2.13.0
+transformer-engine-jax>=2.13.0
+transformer-engine>=2.13.0
+transformers>=5.5.0
 treescope>=0.1.10
 typeguard>=2.13.3
+typer>=0.24.1
 typing-extensions>=4.15.0
+typing-inspect>=0.9.0
 typing-inspection>=0.4.2
-tzdata>=2025.2
+tzdata>=2026.1 ; sys_platform == 'emscripten' or sys_platform == 'win32'
 uritemplate>=4.2.0
-urllib3>=2.5.0
-uvicorn>=0.38.0
-uvloop>=0.19.0
-virtualenv>=20.35.4
+urllib3>=2.6.3
+uvicorn>=0.43.0
+uvloop>=0.22.1
+virtualenv>=20.36.1
 wadler-lindig>=0.1.7
-websockets>=15.0.1
-werkzeug>=3.1.3
-wheel>=0.45.1
-wrapt>=2.0.1
-xprof>=2.21.1
+websockets>=16.0
+werkzeug>=3.1.8
+wheel>=0.46.3
+wrapt>=2.1.2
 xxhash>=3.6.0
-yarl>=1.22.0
+yarl>=1.23.0
 zipp>=3.23.0
-zstandard>=0.25.0
+zstandard>=0.25.0
\ No newline at end of file
diff --git a/src/dependencies/requirements/generated_requirements/tpu-post-train-requirements.txt b/src/dependencies/requirements/generated_requirements/tpu-post-train-requirements.txt
index 60dcb1eb86..833ac355b7 100644
--- a/src/dependencies/requirements/generated_requirements/tpu-post-train-requirements.txt
+++ b/src/dependencies/requirements/generated_requirements/tpu-post-train-requirements.txt
@@ -1,399 +1,275 @@
-absl-py>=2.3.1
+# Generated by seed-env. Do not edit manually.
+# If you need to modify dependencies, please do so in the host requirements file and run seed-env again.
+
+absl-py>=2.4.0
 aiofiles>=25.1.0
 aiohappyeyeballs>=2.6.1
-aiohttp>=3.13.3
-aiohttp-cors>=0.8.1
+aiohttp>=3.13.5
 aiosignal>=1.4.0
 annotated-doc>=0.0.4
 annotated-types>=0.7.0
-anthropic>=0.84.0
 antlr4-python3-runtime>=4.9.3
-anyio>=4.11.0
+anyio>=4.13.0
+appnope>=0.1.4 ; sys_platform == 'darwin'
 aqtp>=0.9.0
-array_record>=0.8.3
+array-record>=0.8.3
+astroid>=4.0.4
 asttokens>=3.0.1
-astor>=0.8.1
-astroid>=4.0.2
 astunparse>=1.6.3
 attrs>=25.4.0
-auditwheel>=6.5.0
-black>=24.10.0
-blake3>=1.0.8
-blobfile>=3.1.0
-boto3>=1.42.56
-botocore>=1.42.56
-build>=1.3.0
-cachetools>=6.2.2
-cbor2>=5.8.0
+auditwheel>=6.6.0
+black>=25.12.0
+build>=1.4.0
 certifi>=2026.2.25
-cffi>=2.0.0
+cffi>=2.0.0 ; implementation_name == 'pypy' or platform_python_implementation != 'PyPy'
 cfgv>=3.5.0
-charset-normalizer>=3.4.4
-cheroot>=11.1.2
+charset-normalizer>=3.4.6
 chex>=0.1.91
 click>=8.3.1
 cloud-accelerator-diagnostics>=0.1.1
 cloud-tpu-diagnostics>=0.1.5
 cloudpickle>=3.1.2
 clu>=0.0.12
-cmake>=4.2.1
 colorama>=0.4.6
-colorful>=0.5.8
 comm>=0.2.3
-compressed-tensors>=0.13.0
 contourpy>=1.3.3
-coverage>=7.12.0
-cryptography>=46.0.5
+cryptography>=46.0.6
 cycler>=0.12.1
-dacite>=1.9.2
 dataclasses-json>=0.6.7
-datasets>=4.6.0
+datasets>=4.8.4
 debugpy>=1.8.20
 decorator>=5.2.1
-depyf>=0.20.0
-dill>=0.4.0
-diskcache>=5.6.3
+dill>=0.4.1
 distlib>=0.4.0
 distro>=1.9.0
-dm-tree>=0.1.9
-dnspython>=2.8.0
-docstring_parser>=0.17.0
+dm-tree>=0.1.10
+docstring-parser>=0.17.0
 drjax>=0.1.4
 editdistance>=0.8.1
-einops>=0.8.1
+einops>=0.8.2
 einshape>=1.0
-email-validator>=2.3.0
 entrypoints>=0.4
-etils>=1.13.0
-evaluate>=0.4.6
+etils>=1.14.0
 execnet>=2.1.2
 executing>=2.2.1
-fastapi>=0.122.0
-fastapi-cli>=0.0.24
-fastapi-cloud-cli>=0.13.0
-fastar>=0.8.0
+fastapi>=0.135.3
 fastjsonschema>=2.21.2
-filelock>=3.20.0
-flatbuffers>=25.9.23
-flax>=0.12.4
-fonttools>=4.60.1
+filelock>=3.20.3
+flatbuffers>=25.12.19
+flax>=0.12.6
+fonttools>=4.62.1
 frozenlist>=1.8.0
-fsspec>=2026.1.0
-gast>=0.6.0
-gcsfs>=2026.1.0
-gguf>=0.17.1
-google-api-core>=2.28.1
-google-api-python-client>=2.187.0
-google-auth>=2.43.0
-google-auth-httplib2>=0.2.1
-google-auth-oauthlib>=1.2.2
-google-cloud-aiplatform>=1.128.0
-google-cloud-appengine-logging>=1.7.0
-google-cloud-audit-log>=0.4.0
-google-cloud-bigquery>=3.38.0
-google-cloud-core>=2.5.0
-google-cloud-logging>=3.12.1
-google-cloud-mldiagnostics>=0.5.10
-google-cloud-monitoring>=2.28.0
-google-cloud-resource-manager>=1.15.0
-google-cloud-storage>=3.9.0
-google-cloud-storage-control>=1.10.0
-google-crc32c>=1.7.1
-google-genai>=1.52.0
+fsspec>=2026.2.0
+gast>=0.7.0
+gcsfs>=2026.2.0
+google-api-core>=2.30.2
+google-api-python-client>=2.193.0
+google-auth-httplib2>=0.3.1
+google-auth-oauthlib>=1.3.1
+google-auth>=2.49.1
+google-cloud-aiplatform>=1.145.0
+google-cloud-appengine-logging>=1.9.0
+google-cloud-audit-log>=0.5.0
+google-cloud-bigquery>=3.41.0
+google-cloud-core>=2.5.1
+google-cloud-logging>=3.15.0
+google-cloud-mldiagnostics>=1.0.1
+google-cloud-monitoring>=2.30.0
+google-cloud-resource-manager>=1.17.0
+google-cloud-storage-control>=1.11.0
+google-cloud-storage>=3.10.1
+google-crc32c>=1.8.0
+google-genai>=1.70.0
+google-metrax>=0.1.3
 google-pasta>=0.2.0
-google-resumable-media>=2.8.0
-google_metrax>=0.2.4
-googleapis-common-protos>=1.72.0
-grain>=0.2.15
-grpc-google-iam-v1>=0.14.3
+google-resumable-media>=2.8.2
+googleapis-common-protos>=1.74.0
+grain>=0.2.16
+grpc-google-iam-v1>=0.14.4
+grpcio-status>=1.78.0
 grpcio>=1.78.0
-grpcio-reflection>=1.71.0
-grpcio-status>=1.71.2
-gspread>=6.2.1
 gviz-api>=1.10.0
 h11>=0.16.0
-h5py>=3.15.1
-hf-xet>=1.2.0
-hf_transfer>=0.1.9
+h5py>=3.14.0
+hf-xet>=1.4.3 ; platform_machine == 'AMD64' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
 httpcore>=1.0.9
-httplib2>=0.31.0
-httptools>=0.7.1
+httplib2>=0.31.2
 httpx>=0.28.1
-httpx-sse>=0.4.3
-huggingface-hub>=0.36.0
-humanize>=4.14.0
+huggingface-hub>=1.9.0
+humanize>=4.15.0
 hypothesis>=6.142.1
-identify>=2.6.15
+identify>=2.6.18
 idna>=3.11
-ijson>=3.5.0
-immutabledict>=4.2.2
+immutabledict>=4.3.1
 importlab>=0.8.1
-importlib_metadata>=8.7.0
-importlib_resources>=6.5.2
+importlib-metadata>=8.7.1
 iniconfig>=2.3.0
-interegular>=0.3.3
 ipykernel>=7.2.0
-ipython>=9.10.0
-ipython_pygments_lexers>=1.1.1
-ipywidgets>=8.1.8
-isort>=7.0.0
-jaraco.classes>=3.4.0
-jaraco.context>=6.1.0
-jaraco.functools>=4.3.0
-jax>=0.8.3
-jaxlib>=0.8.3
-jaxtyping>=0.3.3
+ipython-pygments-lexers>=1.1.1
+ipython>=9.12.0
+isort>=8.0.1
+jax>=0.9.2
+jaxlib>=0.9.2
+jaxtyping>=0.3.9
 jedi>=0.19.2
-jeepney>=0.9.0
-Jinja2>=3.1.6
-jiter>=0.13.0
-jmespath>=1.1.0
-joblib>=1.5.2
+jinja2>=3.1.6
 jsonlines>=4.0.0
-jsonschema>=4.26.0
 jsonschema-specifications>=2025.9.1
-jupyter_client>=8.8.0
-jupyter_core>=5.9.1
-jupyterlab_widgets>=3.0.16
-kagglehub>=0.3.13
-keras>=3.12.0
-keyring>=25.7.0
-keyrings.google-artifactregistry-auth>=1.1.2
-kiwisolver>=1.4.9
-lark>=1.2.2
-latex2sympy2_extended>=1.11.0
+jsonschema>=4.26.0
+jupyter-client>=8.8.0
+jupyter-core>=5.9.1
+kagglehub>=1.0.0
+kagglesdk>=0.1.16
+keras>=3.13.2
+kiwisolver>=1.5.0
+latex2sympy2-extended>=1.11.0
 libclang>=18.1.1
 libcst>=1.8.6
-libtpu>=0.0.32
-llguidance>=1.3.0
-llvmlite>=0.45.1
-lm-format-enforcer>=0.11.3
-loguru>=0.7.3
-lxml>=6.0.2
-Markdown>=3.10
+libtpu>=0.0.37 ; platform_machine == 'x86_64' and sys_platform == 'linux'
 markdown-it-py>=4.0.0
-MarkupSafe>=3.0.3
+markdown>=3.10.2
+markupsafe>=3.0.3
 marshmallow>=3.26.2
 math-verify>=0.9.0
-matplotlib>=3.10.7
 matplotlib-inline>=0.2.1
+matplotlib>=3.10.8
 mccabe>=0.7.0
-mcp>=1.26.0
 mdurl>=0.1.2
-mistral_common>=1.9.1
-ml_collections>=1.1.0
-ml_dtypes>=0.5.4
-ml_goodput_measurement>=0.0.15
-model-hosting-container-standards>=0.1.13
-more-itertools>=10.8.0
+ml-collections>=1.1.0
+ml-dtypes>=0.5.4
+ml-goodput-measurement>=0.0.16
 mpmath>=1.3.0
 msgpack>=1.1.2
 msgspec>=0.20.0
-multidict>=6.7.0
-multiprocess>=0.70.18
-mypy_extensions>=1.1.0
+multidict>=6.7.1
+multiprocess>=0.70.19
+mypy-extensions>=1.1.0
 namex>=0.1.0
 nbclient>=0.10.4
 nbformat>=5.10.4
 nest-asyncio>=1.6.0
-networkx>=3.6
+networkx>=3.6.1
 ninja>=1.13.0
-nixl>=0.3.0
-nltk>=3.9.2
-nodeenv>=1.9.1
-numba>=0.62.1
-numpy>=2.2.6
-numpy-typing-compat>=20250818.2.2
-nvidia-cublas-cu12>=12.8.4.1
-nvidia-cuda-cupti-cu12>=12.8.90
-nvidia-cuda-nvrtc-cu12>=12.8.93
-nvidia-cuda-runtime-cu12>=12.8.90
-nvidia-cudnn-cu12>=9.10.2.21
-nvidia-cufft-cu12>=11.3.3.83
-nvidia-cufile-cu12>=1.13.1.3
-nvidia-curand-cu12>=10.3.9.90
-nvidia-cusolver-cu12>=11.7.3.90
-nvidia-cusparse-cu12>=12.5.8.93
-nvidia-cusparselt-cu12>=0.7.1
-nvidia-nccl-cu12>=2.27.5
-nvidia-nvjitlink-cu12>=12.8.93
-nvidia-nvshmem-cu12>=3.3.20
-nvidia-nvtx-cu12>=12.8.90
+nodeenv>=1.10.0
+numpy-typing-compat>=20251206.2.0
+numpy>=2.0.2
+nvidia-cuda-cccl>=13.2.27
 oauthlib>=3.3.1
 omegaconf>=2.3.0
-openai>=2.24.0
-openai-harmony>=0.0.8
-opencensus>=0.11.4
-opencensus-context>=0.1.3
-opencv-python-headless>=4.13.0.92
-opentelemetry-api>=1.39.1
-opentelemetry-exporter-otlp>=1.39.1
-opentelemetry-exporter-otlp-proto-common>=1.39.1
-opentelemetry-exporter-otlp-proto-grpc>=1.39.1
-opentelemetry-exporter-otlp-proto-http>=1.39.1
-opentelemetry-exporter-prometheus>=0.60b1
-opentelemetry-proto>=1.39.1
-opentelemetry-sdk>=1.39.1
-opentelemetry-semantic-conventions>=0.60b1
-opentelemetry-semantic-conventions-ai>=0.4.15
-opt_einsum>=3.4.0
-optax>=0.2.6
-optree>=0.18.0
-optype>=0.14.0
-orbax-checkpoint>=0.11.28
+opentelemetry-api>=1.40.0
+opt-einsum>=3.4.0
+optax>=0.2.8
+optree>=0.19.0
+optype>=0.17.0
+orbax-checkpoint>=0.11.33
 orbax-export>=0.0.8
-outlines_core>=0.2.11
 packaging>=26.0
-pandas>=2.3.3
+pandas>=3.0.2
 papermill>=2.7.0
 parameterized>=0.9.0
 parso>=0.8.6
-partial-json-parser>=0.2.1.1.post7
-pathspec>=0.12.1
-pathwaysutils>=0.1.4
+pathspec>=1.0.4
+pathwaysutils>=0.1.6
 perfetto>=0.16.0
-pexpect>=4.9.0
-pillow>=12.0.0
-platformdirs>=4.9.2
+pexpect>=4.9.0 ; sys_platform != 'emscripten' and sys_platform != 'win32'
+pillow>=12.1.1
+platformdirs>=4.9.4
 pluggy>=1.6.0
 portpicker>=1.6.0
-pre_commit>=4.5.0
-prometheus-fastapi-instrumentator>=7.1.0
-prometheus_client>=0.23.1
+pre-commit>=4.5.1
 promise>=2.3
-prompt_toolkit>=3.0.52
+prompt-toolkit>=3.0.52
 propcache>=0.4.1
-proto-plus>=1.26.1
-protobuf>=5.29.6
+proto-plus>=1.27.2
+protobuf>=6.33.6
 psutil>=7.2.2
-ptyprocess>=0.7.0
-pure_eval>=0.2.3
-py-cpuinfo>=9.0.0
-py-spy>=0.4.1
-pyarrow>=22.0.0
-pyasn1>=0.6.1
-pyasn1_modules>=0.4.2
-pybase64>=1.4.3
+ptyprocess>=0.7.0 ; sys_platform != 'emscripten' and sys_platform != 'win32'
+pure-eval>=0.2.3
+pyarrow>=23.0.1
+pyasn1-modules>=0.4.2
+pyasn1>=0.6.3
 pycnite>=2024.7.31
-pycountry>=26.2.16
-pycparser>=3.0
-pycryptodomex>=3.23.0
+pycparser>=3.0 ; (implementation_name != 'PyPy' and platform_python_implementation != 'PyPy') or (implementation_name == 'pypy' and platform_python_implementation == 'PyPy')
+pydantic-core>=2.41.5
 pydantic>=2.12.5
-pydantic-extra-types>=2.11.0
-pydantic-settings>=2.13.1
-pydantic_core>=2.41.5
 pydot>=4.0.1
 pyelftools>=0.32
 pyglove>=0.4.5
-Pygments>=2.19.2
-pyink>=24.10.1
-PyJWT>=2.11.0
-pylatexenc>=2.10
-pylint>=4.0.3
-pyparsing>=3.2.5
-pyproject_hooks>=1.2.0
-pytest>=8.4.2
-pytest-mock>=3.15.1
+pygments>=2.19.2
+pyink>=25.12.0
+pylint>=4.0.5
+pyparsing>=3.3.2
+pyproject-hooks>=1.2.0
 pytest-xdist>=3.8.0
+pytest>=8.4.2
 python-dateutil>=2.9.0.post0
-python-dotenv>=1.2.1
-python-json-logger>=4.0.0
-python-multipart>=0.0.22
+pytokens>=0.4.1
 pytype>=2024.10.11
-pytz>=2025.2
-PyYAML>=6.0.3
+pyyaml>=6.0.3
 pyzmq>=27.1.0
-qwix>=0.1.4
-ray>=2.54.0
+qwix>=0.1.5
 referencing>=0.37.0
-regex>=2025.11.3
-requests>=2.32.5
+regex>=2026.3.32
 requests-oauthlib>=2.0.0
-rich>=14.2.0
-rich-toolkit>=0.19.7
-rignore>=0.7.6
+requests>=2.32.5
+rich>=14.3.3
 rpds-py>=0.30.0
-rsa>=4.9.1
-runai-model-streamer>=0.15.4
-runai-model-streamer-gcs>=0.15.4
-runai-model-streamer-s3>=0.15.4
-s3transfer>=0.16.0
 safetensors>=0.7.0
-scipy>=1.16.3
-scipy-stubs>=1.16.3.0
-SecretStorage>=3.5.0
+scipy-stubs>=1.17.1.2
+scipy>=1.17.1
 sentencepiece>=0.2.1
-sentry-sdk>=2.53.0
 seqio>=0.0.20
-setproctitle>=1.3.7
-setuptools>=78.1.0
-setuptools-scm>=9.2.2
-shapely>=2.1.2
+setuptools>=82.0.1
 shellingham>=1.5.4
-shortuuid>=1.0.13
-simple-parsing>=0.1.7
+simple-parsing>=0.1.8
 simplejson>=3.20.2
 six>=1.17.0
-smart_open>=7.5.1
 sniffio>=1.3.1
 sortedcontainers>=2.4.0
-sse-starlette>=3.2.0
-starlette>=0.50.0
 stack-data>=0.6.3
-supervisor>=4.3.0
+starlette>=1.0.0
 sympy>=1.14.0
-tabulate>=0.9.0
+tabulate>=0.10.0
 tenacity>=9.1.4
-tensorboard>=2.20.0
 tensorboard-data-server>=0.7.2
 tensorboard-plugin-profile>=2.13.0
-tensorboardX>=2.6.4
-tensorflow>=2.20.0
+tensorboard>=2.20.0
+tensorboardx>=2.6.5
 tensorflow-datasets>=4.9.9
-tensorflow-metadata>=1.17.2
-tensorflow-text>=2.20.0
-tensorstore>=0.1.79
-termcolor>=3.2.0
+tensorflow-metadata>=1.17.3
+tensorflow-text>=2.20.1
+tensorflow>=2.20.0
+tensorstore>=0.1.82
+termcolor>=3.3.0
 tiktoken>=0.12.0
-tokamax>=0.0.8
-tokenizers>=0.22.1
+tokamax>=0.0.12
+tokenizers>=0.22.2
 toml>=0.10.2
-tomlkit>=0.13.3
+tomlkit>=0.14.0
 toolz>=1.1.0
-torch>=2.9.0
-torchax>=0.0.11
-torchvision>=0.24.0
-tornado>=6.5.4
-tpu-info>=0.7.1
+tornado>=6.5.5
 tqdm>=4.67.3
 traitlets>=5.14.3
-transformers>=4.57.1
+transformers>=5.5.0
 treescope>=0.1.10
-triton>=3.5.0
 typeguard>=2.13.3
 typer>=0.24.1
+typing-extensions>=4.15.0
 typing-inspect>=0.9.0
 typing-inspection>=0.4.2
-typing_extensions>=4.15.0
-tzdata>=2025.2
+tzdata>=2026.1 ; sys_platform == 'emscripten' or sys_platform == 'win32'
 uritemplate>=4.2.0
-urllib3>=2.5.0
-uv>=0.10.6
-uvicorn>=0.38.0
+urllib3>=2.6.3
+uvicorn>=0.42.0
 uvloop>=0.22.1
-virtualenv>=20.35.4
-wadler_lindig>=0.1.7
-watchfiles>=1.1.1
+virtualenv>=20.36.1
+wadler-lindig>=0.1.7
 wcwidth>=0.6.0
-websockets>=15.0.1
-Werkzeug>=3.1.3
+websockets>=16.0
+werkzeug>=3.1.8
 wheel>=0.46.3
-widgetsnbextension>=4.0.15
-wrapt>=2.0.1
-xgrammar>=0.1.29
-xprof>=2.21.1
+wrapt>=2.1.2
 xxhash>=3.6.0
-yapf>=0.43.0
-yarl>=1.22.0
+yarl>=1.23.0
 zipp>=3.23.0
-zstandard>=0.25.0
+zstandard>=0.25.0
\ No newline at end of file
diff --git a/src/dependencies/requirements/generated_requirements/tpu-requirements.txt b/src/dependencies/requirements/generated_requirements/tpu-requirements.txt
index 08da4a3ab7..d785ed591b 100644
--- a/src/dependencies/requirements/generated_requirements/tpu-requirements.txt
+++ b/src/dependencies/requirements/generated_requirements/tpu-requirements.txt
@@ -1,29 +1,27 @@
 # Generated by seed-env. Do not edit manually.
 # If you need to modify dependencies, please do so in the host requirements file and run seed-env again.
 
-absl-py>=2.3.1
+absl-py>=2.4.0
 aiofiles>=25.1.0
 aiohappyeyeballs>=2.6.1
-aiohttp>=3.13.2
+aiohttp>=3.13.5
 aiosignal>=1.4.0
 annotated-doc>=0.0.4
 annotated-types>=0.7.0
 antlr4-python3-runtime>=4.9.3
-anyio>=4.11.0
+anyio>=4.13.0
 aqtp>=0.9.0
 array-record>=0.8.3
-astroid>=4.0.2
+astroid>=4.0.4
 astunparse>=1.6.3
 attrs>=25.4.0
-auditwheel>=6.5.0
-black>=24.10.0
-blobfile>=3.1.0
-build>=1.3.0
-cachetools>=6.2.2
-certifi>=2025.11.12
+auditwheel>=6.6.0
+black>=25.12.0
+build>=1.4.0
+certifi>=2026.2.25
+cffi>=2.0.0 ; platform_python_implementation != 'PyPy'
 cfgv>=3.5.0
-charset-normalizer>=3.4.4
-cheroot>=11.1.2
+charset-normalizer>=3.4.6
 chex>=0.1.91
 click>=8.3.1
 cloud-accelerator-diagnostics>=0.1.1
@@ -32,221 +30,210 @@ cloudpickle>=3.1.2
 clu>=0.0.12
 colorama>=0.4.6
 contourpy>=1.3.3
-coverage>=7.12.0
+cryptography>=46.0.6
 cycler>=0.12.1
-dacite>=1.9.2
-datasets>=4.4.1
+dataclasses-json>=0.6.7
+datasets>=4.8.4
 decorator>=5.2.1
-dill>=0.4.0
+dill>=0.4.1
 distlib>=0.4.0
-dm-tree>=0.1.9
+distro>=1.9.0
+dm-tree>=0.1.10
 docstring-parser>=0.17.0
 drjax>=0.1.4
 editdistance>=0.8.1
-einops>=0.8.1
+einops>=0.8.2
 einshape>=1.0
-etils>=1.13.0
-evaluate>=0.4.6
+etils>=1.14.0
 execnet>=2.1.2
-fastapi>=0.122.0
-filelock>=3.20.0
-flatbuffers>=25.9.23
+fastapi>=0.135.3
+filelock>=3.20.3
+flatbuffers>=25.12.19
 flax>=0.12.6
-fonttools>=4.60.1
+fonttools>=4.62.1
 frozenlist>=1.8.0
-fsspec>=2025.10.0
-gast>=0.6.0
-gcsfs>=2025.10.0
-google-api-core>=2.28.1
-google-api-python-client>=2.187.0
-google-auth-httplib2>=0.2.1
-google-auth-oauthlib>=1.2.2
-google-auth>=2.43.0
-google-cloud-aiplatform>=1.128.0
-google-cloud-appengine-logging>=1.7.0
-google-cloud-audit-log>=0.4.0
-google-cloud-bigquery>=3.38.0
-google-cloud-core>=2.5.0
-google-cloud-logging>=3.12.1
-google-cloud-mldiagnostics>=0.5.10
-google-cloud-monitoring>=2.28.0
-google-cloud-resource-manager>=1.15.0
-google-cloud-storage>=3.6.0
-google-crc32c>=1.7.1
-google-genai>=1.52.0
+fsspec>=2026.2.0
+gast>=0.7.0
+gcsfs>=2026.2.0
+google-api-core>=2.30.2
+google-api-python-client>=2.193.0
+google-auth-httplib2>=0.3.1
+google-auth-oauthlib>=1.3.1
+google-auth>=2.49.1
+google-cloud-aiplatform>=1.145.0
+google-cloud-appengine-logging>=1.9.0
+google-cloud-audit-log>=0.5.0
+google-cloud-bigquery>=3.41.0
+google-cloud-core>=2.5.1
+google-cloud-logging>=3.15.0
+google-cloud-mldiagnostics>=1.0.1
+google-cloud-monitoring>=2.30.0
+google-cloud-resource-manager>=1.17.0
+google-cloud-storage-control>=1.11.0
+google-cloud-storage>=3.10.1
+google-crc32c>=1.8.0
+google-genai>=1.70.0
 google-pasta>=0.2.0
-google-resumable-media>=2.8.0
-google-tunix>=0.1.3
-googleapis-common-protos>=1.72.0
-grain>=0.2.15
-grpc-google-iam-v1>=0.14.3
-grpcio-status>=1.71.2
-grpcio>=1.76.0
-gspread>=6.2.1
+google-resumable-media>=2.8.2
+googleapis-common-protos>=1.74.0
+grain>=0.2.16
+grpc-google-iam-v1>=0.14.4
+grpcio-status>=1.78.0
+grpcio>=1.78.0
 gviz-api>=1.10.0
 h11>=0.16.0
-h5py>=3.15.1
-hf-transfer>=0.1.9
-hf-xet>=1.2.0 ; platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
+h5py>=3.14.0
+hf-xet>=1.4.3 ; platform_machine == 'AMD64' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
 httpcore>=1.0.9
-httplib2>=0.31.0
+httplib2>=0.31.2
 httpx>=0.28.1
-huggingface-hub>=0.36.0
-humanize>=4.14.0
+huggingface-hub>=1.9.0
+humanize>=4.15.0
 hypothesis>=6.142.1
-identify>=2.6.15
+identify>=2.6.18
 idna>=3.11
-immutabledict>=4.2.2
+immutabledict>=4.3.1
 importlab>=0.8.1
-importlib-metadata>=8.7.0
-importlib-resources>=6.5.2
+importlib-metadata>=8.7.1
 iniconfig>=2.3.0
-isort>=7.0.0
-jaraco-functools>=4.3.0
-jax>=0.8.1
-jaxlib>=0.8.1
-jaxtyping>=0.3.3
+isort>=8.0.1
+jax>=0.9.2
+jaxlib>=0.9.2
+jaxtyping>=0.3.9
 jinja2>=3.1.6
-joblib>=1.5.2
 jsonlines>=4.0.0
-kagglehub>=0.3.13
-keras>=3.12.0
-kiwisolver>=1.4.9
+keras>=3.13.2
+kiwisolver>=1.5.0
+latex2sympy2-extended>=1.11.0
 libclang>=18.1.1
 libcst>=1.8.6
-libtpu>=0.0.30 ; platform_machine == 'x86_64' and sys_platform == 'linux'
-llvmlite>=0.45.1
-lxml>=6.0.2
+libtpu>=0.0.37 ; platform_machine == 'x86_64' and sys_platform == 'linux'
 markdown-it-py>=4.0.0
-markdown>=3.10
+markdown>=3.10.2
 markupsafe>=3.0.3
+marshmallow>=3.26.2
 math-verify>=0.9.0
-matplotlib>=3.10.7
+matplotlib>=3.10.8
 mccabe>=0.7.0
 mdurl>=0.1.2
 ml-collections>=1.1.0
 ml-dtypes>=0.5.4
-ml-goodput-measurement>=0.0.15
-more-itertools>=10.8.0
+ml-goodput-measurement>=0.0.16
 mpmath>=1.3.0
 msgpack>=1.1.2
 msgspec>=0.20.0
-multidict>=6.7.0
-multiprocess>=0.70.18
+multidict>=6.7.1
+multiprocess>=0.70.19
 mypy-extensions>=1.1.0
 namex>=0.1.0
-nest-asyncio>=1.6.0
-networkx>=3.6
+networkx>=3.6.1
 ninja>=1.13.0
-nltk>=3.9.2
-nodeenv>=1.9.1
-numba>=0.62.1
-numpy-typing-compat>=20250818.2.0
+nodeenv>=1.10.0
+numpy-typing-compat>=20251206.2.0
 numpy>=2.0.2
+nvidia-cuda-cccl>=13.2.27
 oauthlib>=3.3.1
 omegaconf>=2.3.0
-opentelemetry-api>=1.38.0
+opentelemetry-api>=1.40.0
 opt-einsum>=3.4.0
-optax>=0.2.6
-optree>=0.18.0
-optype>=0.14.0
+optax>=0.2.8
+optree>=0.19.0
+optype>=0.17.0
 orbax-checkpoint>=0.11.33
-packaging>=25.0
-pandas>=2.3.3
+orbax-export>=0.0.8
+packaging>=26.0
+pandas>=3.0.2
 parameterized>=0.9.0
-pathspec>=0.12.1
-pathwaysutils>=0.1.4
-pillow>=12.0.0
-platformdirs>=4.5.0
+pathspec>=1.0.4
+pathwaysutils>=0.1.6
+pillow>=12.1.1
+platformdirs>=4.9.4
 pluggy>=1.6.0
 portpicker>=1.6.0
-pre-commit>=4.5.0
-prometheus-client>=0.23.1
+pre-commit>=4.5.1
 promise>=2.3
 propcache>=0.4.1
-proto-plus>=1.26.1
-protobuf>=5.29.5
-psutil>=7.1.3
-pyarrow>=22.0.0
+proto-plus>=1.27.2
+protobuf>=6.33.6
+psutil>=7.2.2
+pyarrow>=23.0.1
 pyasn1-modules>=0.4.2
-pyasn1>=0.6.1
+pyasn1>=0.6.3
 pycnite>=2024.7.31
-pycryptodomex>=3.23.0
+pycparser>=3.0 ; implementation_name != 'PyPy' and platform_python_implementation != 'PyPy'
 pydantic-core>=2.41.5
 pydantic>=2.12.5
 pydot>=4.0.1
 pyelftools>=0.32
 pyglove>=0.4.5
 pygments>=2.19.2
-pyink>=24.10.1
-pylint>=4.0.3
-pyparsing>=3.2.5
+pyink>=25.12.0
+pylint>=4.0.5
+pyparsing>=3.3.2
 pyproject-hooks>=1.2.0
 pytest-xdist>=3.8.0
 pytest>=8.4.2
 python-dateutil>=2.9.0.post0
-python-dotenv>=1.2.1
+pytokens>=0.4.1
 pytype>=2024.10.11
-pytz>=2025.2
 pyyaml>=6.0.3
-qwix>=0.1.4
-regex>=2025.11.3
+qwix>=0.1.5
+regex>=2026.3.32
 requests-oauthlib>=2.0.0
 requests>=2.32.5
-rich>=14.2.0
-rsa>=4.9.1
+rich>=14.3.3
 safetensors>=0.7.0
-scipy-stubs>=1.16.3.0
-scipy>=1.16.3
+scipy-stubs>=1.17.1.2
+scipy>=1.17.1
 sentencepiece>=0.2.1
 seqio>=0.0.20
-setuptools>=80.9.0
-shapely>=2.1.2
-shortuuid>=1.0.13
-simple-parsing>=0.1.7
+setuptools>=82.0.1
+shellingham>=1.5.4
+simple-parsing>=0.1.8
 simplejson>=3.20.2
 six>=1.17.0
 sniffio>=1.3.1
 sortedcontainers>=2.4.0
-starlette>=0.50.0
+starlette>=1.0.0
 sympy>=1.14.0
-tabulate>=0.9.0
-tenacity>=9.1.2
+tabulate>=0.10.0
+tenacity>=9.1.4
 tensorboard-data-server>=0.7.2
 tensorboard-plugin-profile>=2.13.0
-tensorboard>=2.19.0
-tensorboardx>=2.6.4
+tensorboard>=2.20.0
+tensorboardx>=2.6.5
 tensorflow-datasets>=4.9.9
-tensorflow-metadata>=1.17.2
-tensorflow-text>=2.19.0
-tensorflow>=2.19.1
-tensorstore>=0.1.79
-termcolor>=3.2.0
+tensorflow-metadata>=1.17.3
+tensorflow-text>=2.20.1
+tensorflow>=2.20.0
+tensorstore>=0.1.82
+termcolor>=3.3.0
 tiktoken>=0.12.0
-tokamax>=0.0.8
-tokenizers>=0.22.1
+tokamax>=0.0.12
+tokenizers>=0.22.2
 toml>=0.10.2
-tomlkit>=0.13.3
+tomlkit>=0.14.0
 toolz>=1.1.0
-tqdm>=4.67.1
-transformers>=4.57.3
+tqdm>=4.67.3
+transformers>=5.5.0
 treescope>=0.1.10
 typeguard>=2.13.3
+typer>=0.24.1
 typing-extensions>=4.15.0
+typing-inspect>=0.9.0
 typing-inspection>=0.4.2
-tzdata>=2025.2
+tzdata>=2026.1 ; sys_platform == 'emscripten' or sys_platform == 'win32'
 uritemplate>=4.2.0
-urllib3>=2.5.0
-uvicorn>=0.38.0
-uvloop>=0.19.0
-virtualenv>=20.35.4
+urllib3>=2.6.3
+uvicorn>=0.42.0
+uvloop>=0.22.1
+virtualenv>=20.36.1
 wadler-lindig>=0.1.7
-websockets>=15.0.1
-werkzeug>=3.1.3
-wheel>=0.45.1
-wrapt>=2.0.1
-xprof>=2.21.1
+websockets>=16.0
+werkzeug>=3.1.8
+wheel>=0.46.3
+wrapt>=2.1.2
 xxhash>=3.6.0
-yarl>=1.22.0
+yarl>=1.23.0
 zipp>=3.23.0
-zstandard>=0.25.0
+zstandard>=0.25.0
\ No newline at end of file
diff --git a/src/dependencies/scripts/install_post_train_extra_deps.py b/src/dependencies/scripts/install_post_train_extra_deps.py
index eac8c158b9..7f8fbc49a9 100644
--- a/src/dependencies/scripts/install_post_train_extra_deps.py
+++ b/src/dependencies/scripts/install_post_train_extra_deps.py
@@ -33,8 +33,11 @@ def main():
   current_dir = os.path.dirname(os.path.abspath(__file__))
   repo_root = os.path.abspath(os.path.join(current_dir, "..", ".."))
   github_deps_path = os.path.join(repo_root, "dependencies", "extra_deps", "post_train_github_deps.txt")
+  overrides_deps_path = os.path.join(repo_root, "dependencies", "extra_deps", "post_train_overrides.txt")
   if not os.path.exists(github_deps_path):
-    raise FileNotFoundError(f"GitHub dependencies file not found at {github_deps_path}")
+    raise FileNotFoundError(f"Github dependencies file not found at {github_deps_path}")
+  if not os.path.exists(overrides_deps_path):
+    raise FileNotFoundError(f"Overrides file not found at {overrides_deps_path}")
 
   # Check if 'uv' is available in the environment
   try:
@@ -45,6 +48,16 @@ def main():
     print(f"Stderr: {e.stderr.decode()}")
     sys.exit(1)
 
+  override_deps_command = [
+      sys.executable,  # Use the current Python executable's pip to ensure the correct environment
+      "-m",
+      "uv",
+      "pip",
+      "install",
+      "-r",
+      str(overrides_deps_path),
+  ]
+
   github_deps_command = [
       sys.executable,  # Use the current Python executable's pip to ensure the correct environment
       "-m",
@@ -67,10 +80,15 @@ def main():
   ]
 
   try:
+    # Run the command to install overrides first
+    print(f"Installing overrides: {' '.join(override_deps_command)}")
+    _ = subprocess.run(override_deps_command, check=True, capture_output=True, text=True)
+    print("Overrides installed successfully!")
+
     # Run the command to install Github dependencies
-    print(f"Installing github dependencies: {' '.join(github_deps_command)}")
+    print(f"Installing Github dependencies: {' '.join(github_deps_command)}")
     _ = subprocess.run(github_deps_command, check=True, capture_output=True, text=True)
-    print("GitHub dependencies installed successfully!")
+    print("Github dependencies installed successfully!")
 
     # Run the command to install the MaxText vLLM directory
     print(f"Installing MaxText vLLM dependency: {' '.join(local_vllm_install_command)}")
diff --git a/src/dependencies/scripts/install_pre_train_extra_deps.py b/src/dependencies/scripts/install_pre_train_extra_deps.py
index 661cc91cab..310e246b8b 100644
--- a/src/dependencies/scripts/install_pre_train_extra_deps.py
+++ b/src/dependencies/scripts/install_pre_train_extra_deps.py
@@ -32,7 +32,7 @@ def main():
   repo_root = os.path.abspath(os.path.join(current_dir, "..", ".."))
   github_deps_path = os.path.join(repo_root, "dependencies", "extra_deps", "pre_train_github_deps.txt")
   if not os.path.exists(github_deps_path):
-    raise FileNotFoundError(f"GitHub dependencies file not found at {github_deps_path}")
+    raise FileNotFoundError(f"Github dependencies file not found at {github_deps_path}")
 
   # Check if 'uv' is available in the environment
   try:
@@ -55,9 +55,9 @@ def main():
   ]
 
   try:
-    print(f"Installing github dependencies: {' '.join(github_deps_command)}")
+    print(f"Installing Github dependencies: {' '.join(github_deps_command)}")
     _ = subprocess.run(github_deps_command, check=True, capture_output=True, text=True)
-    print("GitHub dependencies installed successfully!")
+    print("Github dependencies installed successfully!")
   except subprocess.CalledProcessError as e:
     print("Failed to install extra dependencies.")
     print(f"Command '{' '.join(e.cmd)}' returned non-zero exit status {e.returncode}.")
diff --git a/src/maxtext/checkpoint_conversion/utils/hf_model_configs.py b/src/maxtext/checkpoint_conversion/utils/hf_model_configs.py
index 0d187fca46..0ad4047ca4 100644
--- a/src/maxtext/checkpoint_conversion/utils/hf_model_configs.py
+++ b/src/maxtext/checkpoint_conversion/utils/hf_model_configs.py
@@ -855,6 +855,7 @@
 
 # TODO(shuningjin): replace with DeepseekV32Config when available in transformers library
 class DeepseekV32Config(PTConfig):
+  model_type = "deepseek_v32"
 
   def __init__(self, **kwargs):
     self.max_position_embeddings = kwargs.get("max_position_embeddings", 163840)
diff --git a/src/maxtext/input_pipeline/distillation_data_processing.py b/src/maxtext/input_pipeline/distillation_data_processing.py
index 44495a39e8..4aebb9169f 100644
--- a/src/maxtext/input_pipeline/distillation_data_processing.py
+++ b/src/maxtext/input_pipeline/distillation_data_processing.py
@@ -121,7 +121,9 @@ def filter_dataset(config, dataset, tokenizer):
     max_output_tokens = min(max_output_length, len(tokenizer.encode(actual_completion)))
     if config.use_chat_template:
       message = [{"role": "user", "content": prompt}]
-      prompt_token_ids = tokenizer.apply_chat_template(message, add_generation_prompt=True, tokenize=True)
+      prompt_token_ids = input_pipeline_utils.extract_token_ids(
+          tokenizer.apply_chat_template(message, add_generation_prompt=True, tokenize=True)
+      )
     else:
       prompt_token_ids = tokenizer.encode(prompt)
 
diff --git a/src/maxtext/input_pipeline/input_pipeline_utils.py b/src/maxtext/input_pipeline/input_pipeline_utils.py
index d8c93d141a..471f25e5ca 100644
--- a/src/maxtext/input_pipeline/input_pipeline_utils.py
+++ b/src/maxtext/input_pipeline/input_pipeline_utils.py
@@ -180,7 +180,7 @@ def is_conversational(features, data_columns):
   return False
 
 
-def _extract_token_ids(tokens):
+def extract_token_ids(tokens):
   """Extracts token IDs from various tokenizer output formats.
 
   This helper function standardizes the extraction of tokenized integer IDs
@@ -248,21 +248,21 @@ def verify_chat_template_generation_prompt_logic(tokenizer_model):
     )
     dummy_msgs.pop(0)
     prompt_wo_gen_tokens = tokenizer_model.apply_chat_template(dummy_msgs, add_generation_prompt=False, tokenize=True)
-  prompt_wo_gen_ids = _extract_token_ids(prompt_wo_gen_tokens)
+  prompt_wo_gen_ids = extract_token_ids(prompt_wo_gen_tokens)
 
   prompt_w_gen_tokens = tokenizer_model.apply_chat_template(dummy_msgs, add_generation_prompt=True, tokenize=True)
-  prompt_w_gen_ids = _extract_token_ids(prompt_w_gen_tokens)
+  prompt_w_gen_ids = extract_token_ids(prompt_w_gen_tokens)
 
   if prompt_w_gen_ids[: len(prompt_wo_gen_ids)] != prompt_wo_gen_ids:
     raise ValueError("Unable to extract generation prompt tokens.")
   # Extract the tokenized generation prompt (the expected assistant prefix)
   assistant_prefix = prompt_w_gen_ids[len(prompt_wo_gen_ids) :]
-  full_turn_tokens = _extract_token_ids(
+  full_turn_tokens = extract_token_ids(
       tokenizer_model.apply_chat_template(
           dummy_msgs + [{"role": "assistant", "content": "Dummy response"}], add_generation_prompt=False, tokenize=True
       )
   )
-  full_turn_ids = _extract_token_ids(full_turn_tokens)
+  full_turn_ids = extract_token_ids(full_turn_tokens)
   # Extract the actual tokens that appear right after the user message in the full turn
   actual_prefix_in_full_turn = full_turn_ids[len(prompt_wo_gen_ids) : len(prompt_wo_gen_ids) + len(assistant_prefix)]
 
@@ -295,8 +295,8 @@ def _get_completion_in_chat_template(tokenizer_model, round_msgs):
   # include generation_prompt as part of the prompt tokens
   prompt_tokens = tokenizer_model.apply_chat_template(round_msgs[:-1], add_generation_prompt=True, tokenize=True)
 
-  prompt_completion_ids = _extract_token_ids(prompt_completion_tokens)
-  prompt_ids = _extract_token_ids(prompt_tokens)
+  prompt_completion_ids = extract_token_ids(prompt_completion_tokens)
+  prompt_ids = extract_token_ids(prompt_tokens)
 
   completion_tokens = prompt_completion_ids[len(prompt_ids) :]
   completion_in_chat_template = tokenizer_model.decode(completion_tokens, skip_special_tokens=False)
diff --git a/tests/post_training/unit/sft_data_processing_test.py b/tests/post_training/unit/sft_data_processing_test.py
index 9519fe978b..a1a9ab8a09 100644
--- a/tests/post_training/unit/sft_data_processing_test.py
+++ b/tests/post_training/unit/sft_data_processing_test.py
@@ -106,9 +106,9 @@
     "tokenizer_path": None,
     "messages": {
         "truncated_exp1_inputs": (
-            "<s> [INST] <<SYS>>\nthe system prompt\n<</SYS>>\n\nexample one question one [/INST] "
+            "<s>[INST] <<SYS>>\nthe system prompt\n<</SYS>>\n\nexample one question one [/INST] "
             "example one answer one </s>"
-            "<s> [INST] example one question two [/INST] "
+            "<s>[INST] example one question two [/INST] "
             "example one answer two"
         ),
         "truncated_exp1_targets": (
@@ -122,11 +122,11 @@
             "example one answer two<unk>"
         ),
         "packed_exp2_inputs": (
-            "<s> [INST] question two [/INST] "
+            "<s>[INST] question two [/INST] "
             "answer two </s>"
-            "<s> [INST] question three [/INST] "
+            "<s>[INST] question three [/INST] "
             "answer three </s>"
-            "<s> [INST] question four [/INST] "
+            "<s>[INST] question four [/INST] "
             "answer four </s>"
             "<unk><unk><unk><unk><unk><unk><unk><unk>"
         ),
@@ -149,11 +149,11 @@
     },
     "prompt_completion": {
         "truncated_exp1_inputs": (
-            "<s> [INST] example one question one [/INST] "
+            "<s>[INST] example one question one [/INST] "
             "example one answer one </s>"
-            "<s> [INST] example one question two [/INST] "
+            "<s>[INST] example one question two [/INST] "
             "example one answer two </s>"
-            "<s> [INST] example one question three [/INST] "
+            "<s>[INST] example one question three [/INST] "
             "example one"
         ),
         "truncated_exp1_targets": (
@@ -173,11 +173,11 @@
             "example one<unk>"
         ),
         "packed_exp2_inputs": (
-            "<s> [INST] question two [/INST] "
+            "<s>[INST] question two [/INST] "
             "answer two </s>"
-            "<s> [INST] question three [/INST]"
+            "<s>[INST] question three [/INST]"
             " answer three </s>"
-            "<s> [INST] question four [/INST]"
+            "<s>[INST] question four [/INST]"
             " answer four </s>"
             "<unk><unk><unk><unk><unk><unk><unk><unk>"
         ),
@@ -442,6 +442,7 @@ def test_sft_format_with_prompt_completion(self):
     data_iter = self.get_data_iterator(dataset, data_columns)
 
     batch = next(data_iter)
+    print("Iter output: ", self.tokenizer.decode(batch["inputs"][0]))
 
     # Check Truncation
     self.assertEqual(self.tokenizer.decode(batch["inputs"][0]), expected["truncated_exp1_inputs"])
diff --git a/tests/unit/attention_test.py b/tests/unit/attention_test.py
index d498fb546c..c0d37912cc 100644
--- a/tests/unit/attention_test.py
+++ b/tests/unit/attention_test.py
@@ -266,6 +266,7 @@ class AttentionTest(parameterized.TestCase):
       "sa_block_kv_dkv_compute": 128,
       "sa_block_q_dq": 128,
       "sa_block_kv_dq": 128,
+      "log_config": False,
   }
 
   def setUp(self):
diff --git a/tests/unit/train_compile_test.py b/tests/unit/train_compile_test.py
index cb291e13bd..a583c40cdd 100644
--- a/tests/unit/train_compile_test.py
+++ b/tests/unit/train_compile_test.py
@@ -24,7 +24,9 @@
 from tempfile import gettempdir
 
 import pytest
+import transformers
 
+from maxtext.checkpoint_conversion.utils.hf_model_configs import DeepseekV32Config
 from maxtext.trainers.pre_train.train_compile import main as train_compile_main
 from tests.utils.test_helpers import get_test_config_path
 
@@ -893,6 +895,7 @@ def test_mhc_integration(self):
   def test_engram_integration(self):
     """AOT test for Engram implementation"""
     compiled_trainstep_file = "/tmp/test_engram_integration"
+    transformers.AutoConfig.register("deepseek_v32", DeepseekV32Config)
     train_compile_main(
         (
             "",