From 2c91d5c61cc8886b6fb5266c08c64f582d38d98f Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Tue, 24 Jun 2025 10:18:18 -0400 Subject: [PATCH 1/6] upgrade to flash-attn 2.8.0.post2 --- docker/Dockerfile-base | 4 ---- docker/Dockerfile-uv-base | 4 ---- setup.py | 4 ++-- 3 files changed, 2 insertions(+), 10 deletions(-) diff --git a/docker/Dockerfile-base b/docker/Dockerfile-base index cc9ca2f2d5..52201f276e 100644 --- a/docker/Dockerfile-base +++ b/docker/Dockerfile-base @@ -37,7 +37,3 @@ RUN git lfs install --skip-repo && \ pip3 install awscli && \ # The base image ships with `pydantic==1.8.2` which is not working pip3 install -U --no-cache-dir pydantic==1.10.10 - -RUN if [ "$PYTORCH_VERSION" = "2.7.1" ] ; then \ - pip3 install flash-attn==2.7.4.post1; \ - fi diff --git a/docker/Dockerfile-uv-base b/docker/Dockerfile-uv-base index c612278aec..4b08e55f8e 100644 --- a/docker/Dockerfile-uv-base +++ b/docker/Dockerfile-uv-base @@ -34,7 +34,3 @@ RUN uv pip install packaging setuptools wheel psutil \ && uv pip install --no-build-isolation "causal_conv1d @ git+https://github.com/Dao-AILab/causal-conv1d.git@main" \ && uv pip install "mamba_ssm @ git+https://github.com/state-spaces/mamba.git@main" \ && uv pip install awscli pydantic - -RUN if [ "$PYTORCH_VERSION" = "2.7.1" ] ; then \ - uv pip install --no-build-isolation flash-attn==2.7.4.post1; \ - fi diff --git a/setup.py b/setup.py index 08c39c71c8..212625bddd 100644 --- a/setup.py +++ b/setup.py @@ -111,9 +111,9 @@ def get_package_version(): extras_require = { - "flash-attn": ["flash-attn==2.7.4.post1"], + "flash-attn": ["flash-attn==2.8.0.post2"], "ring-flash-attn": [ - "flash-attn==2.7.4.post1", + "flash-attn==2.8.0.post2", "ring-flash-attn>=0.1.4", "yunchang==0.6.0", ], From cc25a2e7c7607ef51ad2d9be5bba4c0b95d3b433 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Fri, 27 Jun 2025 11:42:19 -0400 Subject: [PATCH 2/6] use cu126 with torch 2.6 --- .github/workflows/tests.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index bb865e98d4..7ce070c336 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -195,8 +195,8 @@ jobs: fail-fast: false matrix: include: - - cuda: 124 - cuda_version: 12.4.1 + - cuda: 126 + cuda_version: 12.6.3 python_version: "3.11" pytorch: 2.6.0 num_gpus: 1 @@ -247,8 +247,8 @@ jobs: fail-fast: false matrix: include: - - cuda: 124 - cuda_version: 12.4.1 + - cuda: 126 + cuda_version: 12.6.3 python_version: "3.11" pytorch: 2.6.0 num_gpus: 1 From 042561614afeff60000294319970d2efa3ad9a09 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Sat, 28 Jun 2025 11:36:05 -0400 Subject: [PATCH 3/6] seems vllm 0.8.5.post1 not compatible with cuda12.6.3 and torch 2.6 --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 7ce070c336..7e4e73f71b 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -200,7 +200,7 @@ jobs: python_version: "3.11" pytorch: 2.6.0 num_gpus: 1 - axolotl_extras: vllm + axolotl_extras: - cuda: 126 cuda_version: 12.6.3 python_version: "3.11" From 8707a0c274d8cbcdc617d3f1bce1b4e80b6e2dad Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Sat, 28 Jun 2025 15:34:33 -0400 Subject: [PATCH 4/6] cu126 + torch 2.6 as the default --- .github/workflows/main.yml | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 7ff7127574..29cd2556d5 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -20,12 +20,11 @@ jobs: python_version: "3.11" pytorch: 2.5.1 axolotl_extras: - - cuda: 124 - cuda_version: 12.4.1 + - cuda: 126 + cuda_version: 12.6.3 python_version: "3.11" pytorch: 2.6.0 axolotl_extras: vllm - is_latest: true - cuda: 126 cuda_version: 12.6.3 python_version: "3.11" @@ -88,8 +87,8 @@ jobs: python_version: "3.11" pytorch: 2.5.1 axolotl_extras: - - cuda: 124 - cuda_version: 12.4.1 + - cuda: 126 + cuda_version: 12.6.3 python_version: "3.11" pytorch: 2.6.0 axolotl_extras: @@ -146,8 +145,8 @@ jobs: strategy: matrix: include: - - cuda: 124 - cuda_version: 12.4.1 + - cuda: 126 + cuda_version: 12.6.3 python_version: "3.11" pytorch: 2.6.0 axolotl_extras: From 3ab6c3400347e39797a0cd0a330253b8bec00ee4 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Sat, 28 Jun 2025 20:37:06 -0400 Subject: [PATCH 5/6] use cu126 for multigpu w torch 2.6 too --- .github/workflows/multi-gpu-e2e.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/multi-gpu-e2e.yml b/.github/workflows/multi-gpu-e2e.yml index deea0ed299..f8a3af0d64 100644 --- a/.github/workflows/multi-gpu-e2e.yml +++ b/.github/workflows/multi-gpu-e2e.yml @@ -26,8 +26,8 @@ jobs: fail-fast: false matrix: include: - - cuda: 124 - cuda_version: 12.4.1 + - cuda: 126 + cuda_version: 12.6.3 python_version: "3.11" pytorch: 2.6.0 axolotl_extras: vllm From c0a250bdb9fc37bdbf1d9f431da5f18604374970 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Sun, 29 Jun 2025 12:22:20 -0400 Subject: [PATCH 6/6] drop vllm for now from ci for now --- .github/workflows/multi-gpu-e2e.yml | 2 +- .github/workflows/tests.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/multi-gpu-e2e.yml b/.github/workflows/multi-gpu-e2e.yml index f8a3af0d64..09d9663a9c 100644 --- a/.github/workflows/multi-gpu-e2e.yml +++ b/.github/workflows/multi-gpu-e2e.yml @@ -30,7 +30,7 @@ jobs: cuda_version: 12.6.3 python_version: "3.11" pytorch: 2.6.0 - axolotl_extras: vllm + axolotl_extras: num_gpus: 2 nightly_build: "true" - cuda: 124 diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 7e4e73f71b..b489e27b85 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -311,7 +311,7 @@ jobs: python_version: "3.11" pytorch: 2.6.0 num_gpus: 1 - axolotl_extras: vllm + axolotl_extras: steps: - name: Checkout uses: actions/checkout@v4