diff --git a/.github/workflows/base.yml b/.github/workflows/base.yml
index eddce14384..ea721bff4d 100644
--- a/.github/workflows/base.yml
+++ b/.github/workflows/base.yml
@@ -25,32 +25,18 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          - cuda: "126"
-            cuda_version: 12.6.3
-            cudnn_version: ""
-            python_version: "3.11"
-            pytorch: 2.7.0
-            torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
-            dockerfile: "Dockerfile-base"
-          - cuda: "126"
-            cuda_version: 12.6.3
-            cudnn_version: ""
-            python_version: "3.11"
-            pytorch: 2.7.1
-            torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
-            dockerfile: "Dockerfile-base"
           - cuda: "128"
             cuda_version: 12.8.1
             cudnn_version: ""
             python_version: "3.11"
-            pytorch: 2.7.1
+            pytorch: 2.8.0
             torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
             dockerfile: "Dockerfile-base"
           - cuda: "128"
             cuda_version: 12.8.1
             cudnn_version: ""
             python_version: "3.11"
-            pytorch: 2.8.0
+            pytorch: 2.9.0
             torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
             dockerfile: "Dockerfile-base"
           - cuda: "128"
@@ -121,32 +107,25 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          - cuda: "126"
-            cuda_version: 12.6.3
-            cudnn_version: ""
-            python_version: "3.11"
-            pytorch: 2.7.1
-            torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
-            dockerfile: "Dockerfile-uv-base"
           - cuda: "128"
             cuda_version: 12.8.1
             cudnn_version: ""
             python_version: "3.11"
-            pytorch: 2.7.1
+            pytorch: 2.8.0
             torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
             dockerfile: "Dockerfile-uv-base"
           - cuda: "128"
             cuda_version: 12.8.1
             cudnn_version: ""
             python_version: "3.11"
-            pytorch: 2.8.0
+            pytorch: 2.9.1
             torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
             dockerfile: "Dockerfile-uv-base"
           - cuda: "128"
             cuda_version: 12.8.1
             cudnn_version: ""
             python_version: "3.11"
-            pytorch: 2.9.1
+            pytorch: 2.9.0
             torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
             dockerfile: "Dockerfile-uv-base"
           - cuda: "130"
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index f34a0cf2f1..052f9aa72f 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -15,21 +15,6 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          - cuda: 126
-            cuda_version: 12.6.3
-            python_version: "3.11"
-            pytorch: 2.7.0
-            axolotl_extras:
-          - cuda: 126
-            cuda_version: 12.6.3
-            python_version: "3.11"
-            pytorch: 2.7.1
-            axolotl_extras: vllm
-          - cuda: 128
-            cuda_version: 12.8.1
-            python_version: "3.11"
-            pytorch: 2.7.1
-            axolotl_extras:
           - cuda: 128
             cuda_version: 12.8.1
             python_version: "3.11"
@@ -46,6 +31,11 @@ jobs:
             python_version: "3.11"
             pytorch: 2.9.1
             axolotl_extras:
+          - cuda: 130
+            cuda_version: 13.0.0
+            python_version: "3.11"
+            pytorch: 2.9.1
+            axolotl_extras:
     runs-on: axolotl-gpu-runner
     steps:
       - name: Checkout
@@ -92,27 +82,6 @@ jobs:
     strategy:
       matrix:
         include:
-          - cuda: 126
-            cuda_version: 12.6.3
-            python_version: "3.11"
-            pytorch: 2.7.0
-            axolotl_extras:
-          - cuda: 126
-            cuda_version: 12.6.3
-            python_version: "3.11"
-            pytorch: 2.7.1
-            axolotl_extras:
-            is_latest:
-          - cuda: 126
-            cuda_version: 12.6.3
-            python_version: "3.11"
-            pytorch: 2.7.1
-            axolotl_extras: vllm
-          - cuda: 128
-            cuda_version: 12.8.1
-            python_version: "3.11"
-            pytorch: 2.7.1
-            axolotl_extras:
           - cuda: 128
             cuda_version: 12.8.1
             python_version: "3.11"
@@ -129,6 +98,11 @@ jobs:
             python_version: "3.11"
             pytorch: 2.9.1
             axolotl_extras:
+          - cuda: 130
+            cuda_version: 13.0.0
+            python_version: "3.11"
+            pytorch: 2.9.1
+            axolotl_extras:
     runs-on: axolotl-gpu-runner
     steps:
       - name: Checkout
@@ -170,22 +144,16 @@ jobs:
     strategy:
       matrix:
         include:
-          - cuda: 126
-            cuda_version: 12.6.3
+          - cuda: 128
+            cuda_version: 12.8.1
             python_version: "3.11"
-            pytorch: 2.7.1
+            pytorch: 2.8.0
             axolotl_extras:
             is_latest:
-          - cuda: 126
-            cuda_version: 12.6.3
-            python_version: "3.11"
-            pytorch: 2.7.1
-            axolotl_extras: vllm
-            is_latest: true
           - cuda: 128
             cuda_version: 12.8.1
             python_version: "3.11"
-            pytorch: 2.8.0
+            pytorch: 2.9.1
             axolotl_extras:
             is_latest:
     runs-on: axolotl-gpu-runner
diff --git a/.github/workflows/multi-gpu-e2e.yml b/.github/workflows/multi-gpu-e2e.yml
index 13162f8b19..1dd019dc79 100644
--- a/.github/workflows/multi-gpu-e2e.yml
+++ b/.github/workflows/multi-gpu-e2e.yml
@@ -29,13 +29,6 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          - cuda: 126
-            cuda_version: 12.6.3
-            python_version: "3.11"
-            pytorch: 2.7.1
-            axolotl_extras: vllm
-            num_gpus: 2
-            nightly_build: "true"
           - cuda: 128
             cuda_version: 12.8.1
             python_version: "3.11"
@@ -46,7 +39,7 @@ jobs:
           - cuda: 128
             cuda_version: 12.8.1
             python_version: "3.11"
-            pytorch: 2.9.0
+            pytorch: 2.9.1
             axolotl_extras: fbgemm-gpu
             num_gpus: 2
             nightly_build: "true"
diff --git a/.github/workflows/nightlies.yml b/.github/workflows/nightlies.yml
index a24946ae99..d2c587cc7e 100644
--- a/.github/workflows/nightlies.yml
+++ b/.github/workflows/nightlies.yml
@@ -12,15 +12,15 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          - cuda: 126
-            cuda_version: 12.6.3
+          - cuda: 128
+            cuda_version: 12.8.1
             python_version: "3.11"
-            pytorch: 2.7.1
+            pytorch: 2.8.0
             axolotl_extras:
           - cuda: 128
             cuda_version: 12.8.1
             python_version: "3.11"
-            pytorch: 2.8.0
+            pytorch: 2.9.1
             axolotl_extras:
     runs-on: axolotl-gpu-runner
     steps:
@@ -64,15 +64,15 @@ jobs:
     strategy:
       matrix:
         include:
-          - cuda: 126
-            cuda_version: 12.6.3
+          - cuda: 128
+            cuda_version: 12.8.1
             python_version: "3.11"
-            pytorch: 2.7.1
+            pytorch: 2.8.0
             axolotl_extras:
           - cuda: 128
             cuda_version: 12.8.1
             python_version: "3.11"
-            pytorch: 2.8.0
+            pytorch: 2.9.1
             axolotl_extras:
     runs-on: axolotl-gpu-runner
     steps:
diff --git a/.github/workflows/tests-nightly.yml b/.github/workflows/tests-nightly.yml
index 53139fac1b..67b68a7e60 100644
--- a/.github/workflows/tests-nightly.yml
+++ b/.github/workflows/tests-nightly.yml
@@ -26,7 +26,7 @@ jobs:
       max-parallel: 2
       matrix:
         python_version: ["3.11"]
-        pytorch_version: ["2.7.1", "2.8.0"]
+        pytorch_version: ["2.8.0", "2.9.0", "2.9.1"]
     timeout-minutes: 20
 
     steps:
@@ -99,17 +99,17 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          - cuda: 126
-            cuda_version: 12.6.3
+          - cuda: 128
+            cuda_version: 12.8.1
             python_version: "3.11"
-            pytorch: 2.7.1
+            pytorch: 2.8.0
             num_gpus: 1
             axolotl_extras:
             nightly_build: "true"
           - cuda: 128
             cuda_version: 12.8.1
             python_version: "3.11"
-            pytorch: 2.8.0
+            pytorch: 2.9.1
             num_gpus: 1
             axolotl_extras:
             nightly_build: "true"
@@ -148,10 +148,10 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          - cuda: 126
-            cuda_version: 12.6.3
+          - cuda: 128
+            cuda_version: 12.8.1
             python_version: "3.11"
-            pytorch: 2.7.1
+            pytorch: 2.9.1
             num_gpus: 2
             axolotl_extras:
             nightly_build: "true"
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 9cf2315754..ae5ba17403 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -55,7 +55,7 @@ jobs:
       fail-fast: false
       matrix:
         python_version: ["3.11"]
-        pytorch_version: ["2.7.1", "2.8.0", "2.9.0"]
+        pytorch_version: ["2.8.0", "2.9.0", "2.9.1"]
     timeout-minutes: 20
 
     steps:
@@ -145,7 +145,7 @@ jobs:
       fail-fast: false
       matrix:
         python_version: ["3.11"]
-        pytorch_version: ["2.7.1", "2.8.0", "2.9.0"]
+        pytorch_version: ["2.8.0", "2.9.0", "2.9.1"]
     timeout-minutes: 20
 
     steps:
@@ -303,18 +303,6 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          - cuda: 126
-            cuda_version: 12.6.3
-            python_version: "3.11"
-            pytorch: 2.7.1
-            num_gpus: 1
-            axolotl_extras:
-#          - cuda: 128
-#            cuda_version: 12.8.1
-#            python_version: "3.11"
-#            pytorch: 2.7.1
-#            num_gpus: 1
-#            axolotl_extras:
           - cuda: 128
             cuda_version: 12.8.1
             python_version: "3.11"
@@ -325,7 +313,7 @@ jobs:
           - cuda: 128
             cuda_version: 12.8.1
             python_version: "3.11"
-            pytorch: 2.9.0
+            pytorch: 2.9.1
             num_gpus: 1
             axolotl_extras:
     steps:
@@ -365,10 +353,10 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          - cuda: 126
-            cuda_version: 12.6.3
+          - cuda: 128
+            cuda_version: 12.8.1
             python_version: "3.11"
-            pytorch: 2.7.1
+            pytorch: 2.9.1
             num_gpus: 1
             axolotl_extras:
     steps:
diff --git a/README.md b/README.md
index 01e0c44d9b..0521f7bedf 100644
--- a/README.md
+++ b/README.md
@@ -77,7 +77,7 @@ Features:
 
 - NVIDIA GPU (Ampere or newer for `bf16` and Flash Attention) or AMD GPU
 - Python 3.11
-- PyTorch ≥2.7.1
+- PyTorch ≥2.8.0
 
 ### Google Colab
 
diff --git a/docs/docker.qmd b/docs/docker.qmd
index da61843945..5d146eac23 100644
--- a/docs/docker.qmd
+++ b/docs/docker.qmd
@@ -32,11 +32,8 @@ main-base-py{python_version}-cu{cuda_version}-{pytorch_version}
 
 Tags examples:
 
-- `main-base-py3.11-cu128-2.7.1`
-- `main-base-py3.11-cu126-2.7.1`
-- `main-base-py3.11-cu126-2.7.0`
-- `main-base-py3.11-cu126-2.6.0`
-- `main-base-py3.11-cu124-2.6.0`
+- `main-base-py3.11-cu128-2.8.0`
+- `main-base-py3.11-cu128-2.9.1`
 
 ## Main
 
@@ -74,15 +71,12 @@ There may be some extra tags appended to the image, like `-vllm` which installs
 
 Tags examples:
 
-- `main-py3.11-cu128-2.7.1`
-- `main-py3.11-cu126-2.7.1`
-- `main-py3.11-cu126-2.7.0`
-- `main-py3.11-cu126-2.6.0`
-- `main-py3.11-cu124-2.6.0`
+- `main-py3.11-cu128-2.8.0`
+- `main-py3.11-cu128-2.9.1`
 - `main-latest`
 - `main-20250303-py3.11-cu124-2.6.0`
 - `main-20250303-py3.11-cu126-2.6.0`
-- `0.10.1`
+- `0.12.0`
 
 ## Cloud
 
diff --git a/docs/installation.qmd b/docs/installation.qmd
index 265ff238c1..b8d427eb00 100644
--- a/docs/installation.qmd
+++ b/docs/installation.qmd
@@ -26,7 +26,7 @@ Follow the instructions at: [https://pytorch.org/get-started/locally/](https://p
 :::
 
 ::: {.callout-important}
-For Blackwell GPUs, please use Pytorch 2.7.0 and CUDA 12.8.
+For Blackwell GPUs, please use Pytorch 2.9.1 and CUDA 12.8.
 :::
 
 ### PyPI Installation (Recommended) {#sec-pypi}
@@ -111,7 +111,7 @@ docker run --privileged --gpus '"all"' --shm-size 10g --rm -it \
 :::
 
 ::: {.callout-important}
-For Blackwell GPUs, please use `axolotlai/axolotl:main-py3.11-cu128-2.7.0` or the cloud variant `axolotlai/axolotl-cloud:main-py3.11-cu128-2.7.0`.
+For Blackwell GPUs, please use `axolotlai/axolotl:main-py3.11-cu128-2.9.1` or the cloud variant `axolotlai/axolotl-cloud:main-py3.11-cu128-2.9.1`.
 :::
 
 Please refer to the [Docker documentation](docker.qmd) for more information on the different Docker images that are available.
diff --git a/src/axolotl/cli/cloud/baseten/template/train_sft.py b/src/axolotl/cli/cloud/baseten/template/train_sft.py
index 137fb91714..6dcf477c79 100644
--- a/src/axolotl/cli/cloud/baseten/template/train_sft.py
+++ b/src/axolotl/cli/cloud/baseten/template/train_sft.py
@@ -24,8 +24,7 @@
     launcher_args_str = "-- " + " ".join(launcher_args)
 
 # 1. Define a base image for your training job
-# must use torch 2.7.0 for vllm
-BASE_IMAGE = "axolotlai/axolotl:main-py3.11-cu126-2.7.1"
+BASE_IMAGE = "axolotlai/axolotl:main-py3.11-cu128-2.9.1"
 
 # 2. Define the Runtime Environment for the Training Job
 # This includes start commands and environment variables.a
diff --git a/src/axolotl/cli/cloud/modal_.py b/src/axolotl/cli/cloud/modal_.py
index 7f953372d4..3e703a4946 100644
--- a/src/axolotl/cli/cloud/modal_.py
+++ b/src/axolotl/cli/cloud/modal_.py
@@ -82,7 +82,7 @@ def get_env(self):
         return res
 
     def get_image(self):
-        docker_tag = "main-py3.11-cu126-2.7.1"
+        docker_tag = "main-py3.11-cu128-2.9.1"
         if self.config.docker_tag:
             docker_tag = self.config.docker_tag
         docker_image = f"axolotlai/axolotl:{docker_tag}"