diff --git a/.github/workflows/base.yml b/.github/workflows/base.yml index ed987558fe..a9deb323a7 100644 --- a/.github/workflows/base.yml +++ b/.github/workflows/base.yml @@ -35,14 +35,17 @@ jobs: cudnn_version: "" python_version: "3.11" pytorch: 2.9.1 + torchvision: 0.24.1 torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX" dockerfile: "Dockerfile-base" - platforms: "linux/amd64,linux/arm64" + # arm64 disabled: torchvision 0.24.1+cu128 has no aarch64 wheel + platforms: "linux/amd64" - cuda: "128" cuda_version: 12.8.1 cudnn_version: "" python_version: "3.12" pytorch: 2.10.0 + torchvision: 0.25.0 torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX" dockerfile: "Dockerfile-base" platforms: "linux/amd64,linux/arm64" @@ -51,6 +54,7 @@ jobs: # cudnn_version: "" # python_version: "3.12" # pytorch: 2.9.1 +# torchvision: 0.24.1 # torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX" # dockerfile: "Dockerfile-base" # platforms: "linux/amd64,linux/arm64" @@ -59,14 +63,17 @@ jobs: cudnn_version: "" python_version: "3.11" pytorch: 2.9.1 + torchvision: 0.24.1 torch_cuda_arch_list: "9.0 10.0 10.3 12.0+PTX" dockerfile: "Dockerfile-base" - platforms: "linux/amd64,linux/arm64" + # arm64 disabled: torchvision 0.24.1+cu130 has no aarch64 wheel + platforms: "linux/amd64" - cuda: "130" cuda_version: 13.0.0 cudnn_version: "" python_version: "3.12" pytorch: 2.10.0 + torchvision: 0.25.0 torch_cuda_arch_list: "9.0 10.0 10.3 12.0+PTX" dockerfile: "Dockerfile-base" platforms: "linux/amd64,linux/arm64" @@ -75,6 +82,7 @@ jobs: # cudnn_version: "" # python_version: "3.11" # pytorch: nightly +# torchvision: nightly # torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX" # dockerfile: "Dockerfile-base-nightly" # # "next" is for release candidates of pytorch @@ -83,6 +91,7 @@ jobs: # cudnn_version: "" # python_version: "3.11" # pytorch: next +# torchvision: next # torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX" # dockerfile: "Dockerfile-base-next" steps: @@ -117,6 +126,7 @@ jobs: CUDA=${{ matrix.cuda }} PYTHON_VERSION=${{ matrix.python_version }} PYTORCH_VERSION=${{ matrix.pytorch }} + TORCHVISION_VERSION=${{ matrix.torchvision }} TORCH_CUDA_ARCH_LIST=${{ matrix.torch_cuda_arch_list }} build-base-uv: if: ${{ github.repository_owner == 'axolotl-ai-cloud' && (github.event_name != 'pull_request' || !github.event.pull_request.draft) }} @@ -133,6 +143,7 @@ jobs: cudnn_version: "" python_version: "3.11" pytorch: 2.9.1 + torchvision: 0.24.1 torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX" dockerfile: "Dockerfile-uv-base" platforms: "linux/amd64,linux/arm64" @@ -141,6 +152,7 @@ jobs: cudnn_version: "" python_version: "3.12" pytorch: 2.9.1 + torchvision: 0.24.1 torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX" dockerfile: "Dockerfile-uv-base" platforms: "linux/amd64,linux/arm64" @@ -149,6 +161,7 @@ jobs: cudnn_version: "" python_version: "3.11" pytorch: 2.10.0 + torchvision: 0.25.0 torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX" dockerfile: "Dockerfile-uv-base" platforms: "linux/amd64,linux/arm64" @@ -157,6 +170,7 @@ jobs: cudnn_version: "" python_version: "3.12" pytorch: 2.10.0 + torchvision: 0.25.0 torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX" dockerfile: "Dockerfile-uv-base" platforms: "linux/amd64,linux/arm64" @@ -165,6 +179,7 @@ jobs: # cudnn_version: "" # python_version: "3.12" # pytorch: 2.9.1 +# torchvision: 0.24.1 # torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX" # dockerfile: "Dockerfile-uv-base" # platforms: "linux/amd64,linux/arm64" @@ -173,6 +188,7 @@ jobs: cudnn_version: "" python_version: "3.11" pytorch: 2.9.1 + torchvision: 0.24.1 torch_cuda_arch_list: "9.0 10.0 10.3 12.0+PTX" dockerfile: "Dockerfile-uv-base" platforms: "linux/amd64,linux/arm64" @@ -181,6 +197,7 @@ jobs: cudnn_version: "" python_version: "3.12" pytorch: 2.9.1 + torchvision: 0.24.1 torch_cuda_arch_list: "9.0 10.0 10.3 12.0+PTX" dockerfile: "Dockerfile-uv-base" platforms: "linux/amd64,linux/arm64" @@ -189,6 +206,7 @@ jobs: cudnn_version: "" python_version: "3.12" pytorch: 2.10.0 + torchvision: 0.25.0 torch_cuda_arch_list: "9.0 10.0 10.3 12.0+PTX" dockerfile: "Dockerfile-uv-base" platforms: "linux/amd64,linux/arm64" @@ -197,6 +215,7 @@ jobs: cudnn_version: "" python_version: "3.12" pytorch: 2.11.0 + torchvision: 0.26.0 torch_cuda_arch_list: "9.0 10.0 10.3 12.0+PTX" dockerfile: "Dockerfile-uv-base" platforms: "linux/amd64,linux/arm64" @@ -205,6 +224,7 @@ jobs: cudnn_version: "" python_version: "3.12" pytorch: 2.12.0 + torchvision: 0.27.0 torch_cuda_arch_list: "9.0 10.0 10.3 12.0+PTX" dockerfile: "Dockerfile-uv-base" platforms: "linux/amd64,linux/arm64" @@ -240,4 +260,5 @@ jobs: CUDA=${{ matrix.cuda }} PYTHON_VERSION=${{ matrix.python_version }} PYTORCH_VERSION=${{ matrix.pytorch }} + TORCHVISION_VERSION=${{ matrix.torchvision }} TORCH_CUDA_ARCH_LIST=${{ matrix.torch_cuda_arch_list }} diff --git a/.github/workflows/multi-gpu-e2e.yml b/.github/workflows/multi-gpu-e2e.yml index 81f89da887..c79f41c291 100644 --- a/.github/workflows/multi-gpu-e2e.yml +++ b/.github/workflows/multi-gpu-e2e.yml @@ -35,6 +35,7 @@ jobs: # cuda_version: 12.9.1 # python_version: "3.12" # pytorch: 2.9.1 + # torchvision: 0.24.1 # axolotl_extras: "fbgemm-gpu" # num_gpus: 2 # dockerfile: "Dockerfile-uv.jinja" @@ -42,6 +43,7 @@ jobs: cuda_version: 13.0.0 python_version: "3.12" pytorch: 2.12.0 + torchvision: 0.27.0 axolotl_extras: # axolotl_extras: fbgemm-gpu num_gpus: 2 @@ -49,6 +51,7 @@ jobs: cuda_version: 12.8.1 python_version: "3.11" pytorch: 2.10.0 + torchvision: 0.25.0 axolotl_extras: "fbgemm-gpu" num_gpus: 2 runs-on: [self-hosted, modal] @@ -68,6 +71,7 @@ jobs: run: | echo "BASE_TAG=main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV echo "PYTORCH_VERSION=${{ matrix.pytorch}}" >> $GITHUB_ENV + echo "TORCHVISION_VERSION=${{ matrix.torchvision}}" >> $GITHUB_ENV echo "AXOLOTL_ARGS=${{ matrix.axolotl_args}}" >> $GITHUB_ENV echo "AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}" >> $GITHUB_ENV echo "CUDA=${{ matrix.cuda }}" >> $GITHUB_ENV diff --git a/.github/workflows/tests-nightly.yml b/.github/workflows/tests-nightly.yml index 1be17317ee..314fcac3b0 100644 --- a/.github/workflows/tests-nightly.yml +++ b/.github/workflows/tests-nightly.yml @@ -119,6 +119,7 @@ jobs: cuda_version: 12.8.1 python_version: "3.11" pytorch: 2.9.1 + torchvision: 0.24.1 num_gpus: 1 axolotl_extras: nightly_build: "true" @@ -126,12 +127,14 @@ jobs: cuda_version: 12.8.1 python_version: "3.11" pytorch: 2.10.0 + torchvision: 0.25.0 num_gpus: 1 axolotl_extras: - cuda: 130 cuda_version: 13.0.0 python_version: "3.12" pytorch: 2.9.1 + torchvision: 0.24.1 num_gpus: 1 axolotl_extras: nightly_build: "true" @@ -150,6 +153,7 @@ jobs: run: | echo "BASE_TAG=main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV echo "PYTORCH_VERSION=${{ matrix.pytorch}}" >> $GITHUB_ENV + echo "TORCHVISION_VERSION=${{ matrix.torchvision}}" >> $GITHUB_ENV echo "AXOLOTL_ARGS=${{ matrix.axolotl_args}}" >> $GITHUB_ENV echo "AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}" >> $GITHUB_ENV echo "CUDA=${{ matrix.cuda }}" >> $GITHUB_ENV @@ -176,6 +180,7 @@ jobs: cuda_version: 12.8.1 python_version: "3.11" pytorch: 2.9.1 + torchvision: 0.24.1 num_gpus: 2 axolotl_extras: nightly_build: "true" @@ -194,6 +199,7 @@ jobs: run: | echo "BASE_TAG=main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV echo "PYTORCH_VERSION=${{ matrix.pytorch}}" >> $GITHUB_ENV + echo "TORCHVISION_VERSION=${{ matrix.torchvision}}" >> $GITHUB_ENV echo "AXOLOTL_ARGS=${{ matrix.axolotl_args}}" >> $GITHUB_ENV echo "AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}" >> $GITHUB_ENV echo "CUDA=${{ matrix.cuda }}" >> $GITHUB_ENV diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 2d724b644c..9efacfebc8 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -288,6 +288,7 @@ jobs: cuda_version: 13.0.0 python_version: "3.12" pytorch: 2.12.0 + torchvision: 0.27.0 num_gpus: 1 axolotl_extras: steps: @@ -305,6 +306,7 @@ jobs: run: | echo "BASE_TAG=main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV echo "PYTORCH_VERSION=${{ matrix.pytorch}}" >> $GITHUB_ENV + echo "TORCHVISION_VERSION=${{ matrix.torchvision}}" >> $GITHUB_ENV echo "AXOLOTL_ARGS=${{ matrix.axolotl_args}}" >> $GITHUB_ENV echo "AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}" >> $GITHUB_ENV echo "CUDA=${{ matrix.cuda }}" >> $GITHUB_ENV @@ -337,12 +339,14 @@ jobs: cuda_version: 12.8.1 python_version: "3.11" pytorch: 2.10.0 + torchvision: 0.25.0 num_gpus: 1 axolotl_extras: - cuda: 130 cuda_version: 13.0.0 python_version: "3.12" pytorch: 2.11.0 + torchvision: 0.26.0 num_gpus: 1 axolotl_extras: steps: @@ -360,6 +364,7 @@ jobs: run: | echo "BASE_TAG=main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV echo "PYTORCH_VERSION=${{ matrix.pytorch}}" >> $GITHUB_ENV + echo "TORCHVISION_VERSION=${{ matrix.torchvision}}" >> $GITHUB_ENV echo "AXOLOTL_ARGS=${{ matrix.axolotl_args}}" >> $GITHUB_ENV echo "AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}" >> $GITHUB_ENV echo "CUDA=${{ matrix.cuda }}" >> $GITHUB_ENV diff --git a/cicd/Dockerfile-uv.jinja b/cicd/Dockerfile-uv.jinja index acf84ad1d9..57fb9aa10e 100644 --- a/cicd/Dockerfile-uv.jinja +++ b/cicd/Dockerfile-uv.jinja @@ -1,10 +1,12 @@ FROM axolotlai/axolotl-base-uv:{{ BASE_TAG }} +ENV VIRTUAL_ENV="/workspace/axolotl-venv" ENV TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 9.0+PTX" ENV AXOLOTL_EXTRAS="{{ AXOLOTL_EXTRAS }}" ENV AXOLOTL_ARGS="{{ AXOLOTL_ARGS }}" ENV CUDA="{{ CUDA }}" ENV PYTORCH_VERSION="{{ PYTORCH_VERSION }}" +ENV TORCHVISION_VERSION="{{ TORCHVISION_VERSION }}" ENV GITHUB_REF="{{ GITHUB_REF }}" ENV GITHUB_SHA="{{ GITHUB_SHA }}" ENV NIGHTLY_BUILD="{{ NIGHTLY_BUILD }}" @@ -23,13 +25,14 @@ RUN git fetch origin +$GITHUB_REF && \ git checkout FETCH_HEAD RUN uv pip install packaging==26.0 setuptools==78.1.1 -RUN uv pip install torchvision RUN uv pip uninstall causal_conv1d -RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \ - uv pip install --no-build-isolation -e .[deepspeed,optimizers,ray,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS; \ +RUN uv pip freeze | grep -E "^(torch|torchvision)==" > /tmp/torch-pin.txt && \ + if [ "$AXOLOTL_EXTRAS" != "" ] ; then \ + uv pip install --no-build-isolation -e .[deepspeed,optimizers,ray,$AXOLOTL_EXTRAS] $AXOLOTL_ARGS --override /tmp/torch-pin.txt; \ else \ - uv pip install --no-build-isolation -e .[deepspeed,optimizers,ray] $AXOLOTL_ARGS; \ - fi + uv pip install --no-build-isolation -e .[deepspeed,optimizers,ray] $AXOLOTL_ARGS --override /tmp/torch-pin.txt; \ + fi && \ + python -c "import torch, torchvision; torchvision.ops.nms; print('OK', torch.__version__, torchvision.__version__)" # Override with nightly HF packages for nightly builds RUN if [ "$NIGHTLY_BUILD" = "true" ] ; then \ diff --git a/cicd/multigpu.py b/cicd/multigpu.py index 5ee0bc49ab..4b982fcd52 100644 --- a/cicd/multigpu.py +++ b/cicd/multigpu.py @@ -24,6 +24,7 @@ "AXOLOTL_EXTRAS": os.environ.get("AXOLOTL_EXTRAS", ""), "AXOLOTL_ARGS": os.environ.get("AXOLOTL_ARGS", ""), "PYTORCH_VERSION": os.environ.get("PYTORCH_VERSION", "2.6.0"), + "TORCHVISION_VERSION": os.environ.get("TORCHVISION_VERSION", "0.21.0"), "BASE_TAG": os.environ.get("BASE_TAG", "main-base-py3.11-cu126-2.6.0"), "CUDA": os.environ.get("CUDA", "126"), "GITHUB_REF": os.environ.get("GITHUB_REF", "refs/heads/main"), diff --git a/cicd/single_gpu.py b/cicd/single_gpu.py index 9bd8d990e5..ba30293eb0 100644 --- a/cicd/single_gpu.py +++ b/cicd/single_gpu.py @@ -23,6 +23,7 @@ "AXOLOTL_EXTRAS": os.environ.get("AXOLOTL_EXTRAS", ""), "AXOLOTL_ARGS": os.environ.get("AXOLOTL_ARGS", ""), "PYTORCH_VERSION": os.environ.get("PYTORCH_VERSION", "2.6.0"), + "TORCHVISION_VERSION": os.environ.get("TORCHVISION_VERSION", "0.21.0"), "BASE_TAG": os.environ.get("BASE_TAG", "main-base-py3.11-cu126-2.6.0"), "CUDA": os.environ.get("CUDA", "126"), "GITHUB_REF": os.environ.get("GITHUB_REF", "refs/heads/main"), diff --git a/docker/Dockerfile-base b/docker/Dockerfile-base index 08a5ddccd2..46c25a5a58 100644 --- a/docker/Dockerfile-base +++ b/docker/Dockerfile-base @@ -10,7 +10,8 @@ ENV PATH="/root/miniconda3/bin:${PATH}" ARG TARGETARCH ARG PYTHON_VERSION="3.11" -ARG PYTORCH_VERSION="2.1.2" +ARG PYTORCH_VERSION="2.9.1" +ARG TORCHVISION_VERSION="0.24.1" ARG CUDA="128" ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 9.0+PTX" @@ -44,8 +45,9 @@ ENV PATH="/root/miniconda3/envs/py${PYTHON_VERSION}/bin:${PATH}" WORKDIR /workspace RUN python3 -m pip install --upgrade pip && pip3 install -U packaging==26.0 setuptools==75.8.0 wheel psutil && \ - python3 -m pip install --no-cache-dir -U torch==${PYTORCH_VERSION}+cu${CUDA} torchvision --extra-index-url https://download.pytorch.org/whl/cu$CUDA && \ - python3 -m pip cache purge + python3 -m pip install --no-cache-dir -U torch==${PYTORCH_VERSION}+cu${CUDA} torchvision==${TORCHVISION_VERSION}+cu${CUDA} --extra-index-url https://download.pytorch.org/whl/cu$CUDA && \ + python3 -m pip cache purge && \ + python3 -c "import torch, torchvision; torchvision.ops.nms; print('OK', torch.__version__, torchvision.__version__)" RUN if [ "$CUDA" != "130" ] ; then \ CAUSAL_CONV1D_FORCE_CXX11_ABI=TRUE CAUSAL_CONV1D_FORCE_BUILD=TRUE python3 -m pip install --no-cache-dir "causal_conv1d @ git+https://github.com/Dao-AILab/causal-conv1d.git@v1.5.4"; \ diff --git a/docker/Dockerfile-uv b/docker/Dockerfile-uv index c57160d343..ac8dc257cb 100644 --- a/docker/Dockerfile-uv +++ b/docker/Dockerfile-uv @@ -1,6 +1,8 @@ ARG BASE_TAG=main-base FROM axolotlai/axolotl-base-uv:$BASE_TAG +ENV VIRTUAL_ENV="/workspace/axolotl-venv" + ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6+PTX" ARG AXOLOTL_EXTRAS="" ARG AXOLOTL_ARGS="" diff --git a/docker/Dockerfile-uv-base b/docker/Dockerfile-uv-base index c5a2ceb8cc..00bbf3972d 100644 --- a/docker/Dockerfile-uv-base +++ b/docker/Dockerfile-uv-base @@ -9,6 +9,7 @@ FROM nvidia/cuda:$CUDA_VERSION-cudnn$CUDNN_VERSION-devel-ubuntu$UBUNTU_VERSION A ARG TARGETARCH ARG PYTHON_VERSION="3.11" ARG PYTORCH_VERSION="2.6.0" +ARG TORCHVISION_VERSION="0.21.0" ARG CUDA="126" ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 9.0+PTX" @@ -30,10 +31,12 @@ WORKDIR /workspace RUN uv venv --no-project --relocatable axolotl-venv ENV PATH="/workspace/axolotl-venv/bin:${PATH}" +ENV VIRTUAL_ENV="/workspace/axolotl-venv" RUN uv pip install packaging setuptools wheel psutil \ - && uv pip install torch==${PYTORCH_VERSION} torchvision \ - && uv pip install awscli pydantic + && uv pip install torch==${PYTORCH_VERSION} torchvision==${TORCHVISION_VERSION} \ + && uv pip install awscli pydantic \ + && python -c "import torch, torchvision; torchvision.ops.nms; print('OK', torch.__version__, torchvision.__version__)" RUN if [ "$TARGETARCH" = "amd64" ]; then \ MAMBA_SKIP_CUDA_BUILD=TRUE CAUSAL_CONV1D_SKIP_CUDA_BUILD=TRUE uv pip install --no-build-isolation mamba_ssm causal_conv1d; \ diff --git a/pyproject.toml b/pyproject.toml index 01cac4b8b0..b44c8b9517 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,6 +13,7 @@ requires-python = ">=3.10" dependencies = [ # Core ML stack "torch>=2.9.1", + "torchvision>=0.24.1", "packaging==26.0", "huggingface_hub>=1.1.7", "peft>=0.19.1,<0.20.0", diff --git a/src/axolotl/utils/optimizers/qgalore.py b/src/axolotl/utils/optimizers/qgalore.py index 9e2cc82607..93c49d0f59 100644 --- a/src/axolotl/utils/optimizers/qgalore.py +++ b/src/axolotl/utils/optimizers/qgalore.py @@ -1,4 +1,14 @@ -"""Helpers for the Q-GaLore optimizer integration.""" +"""Helpers for the Q-GaLore optimizer integration. + +Q-GaLore (arxiv 2407.08296) projects gradients into a low-rank subspace using a +periodically-refreshed projection matrix P. The upstream wheel +(``q-galore-torch``) exposes ``QGaLoreAdamW8bit``; it discovers which parameters +to project by reading a ``rank`` key on each ``param_group``. This module +builds those param-groups from an Axolotl config. + +The companion INT8-weight-wrapping recipe from the paper is not yet wired up +(see ``check_qgalore`` in :mod:`axolotl.utils.schemas.validation`). +""" from __future__ import annotations @@ -55,19 +65,31 @@ def build_qgalore_param_groups( gamma_proj: int, queue_size: int, ) -> list[dict]: - """Two param-groups: 2D weights matching ``target_modules`` get the Q-GaLore - projection keys; everything else (norms, biases, embeddings) is plain AdamW.""" + """Split ``model``'s trainable parameters into two groups for Q-GaLore. + + The first group carries the Q-GaLore projection settings (``rank``, + ``update_proj_gap`` etc.). The second is a plain AdamW group for everything + that wasn't matched by ``target_modules`` (norms, biases, embeddings, …). + + ``target_modules`` is a list of substring patterns matched against + parameter names — identical semantics to ``optim_target_modules`` for the + upstream HuggingFace GaLore integration. + """ galore, plain = [], [] for name, p in model.named_parameters(): if not p.requires_grad: continue + # Only 2D weight matrices benefit from the low-rank projection; 1D + # tensors (norms, biases) go to the plain AdamW group. if p.dim() == 2 and any(t in name for t in target_modules): galore.append(p) else: plain.append(p) if not galore: raise ValueError( - f"Q-GaLore: no parameters matched optim_target_modules={target_modules!r}" + "Q-GaLore: no parameters matched optim_target_modules=" + f"{target_modules!r}. Check the pattern list against the model's " + "parameter names." ) LOG.info("Q-GaLore param groups: %d projected, %d plain", len(galore), len(plain)) return [ diff --git a/src/axolotl/utils/schemas/enums.py b/src/axolotl/utils/schemas/enums.py index d783983c0c..fca9adc7a4 100644 --- a/src/axolotl/utils/schemas/enums.py +++ b/src/axolotl/utils/schemas/enums.py @@ -90,6 +90,7 @@ class CustomSupportedOptimizers(str, Enum): came_pytorch = "came_pytorch" muon = "muon" dion = "dion" + q_galore_adamw8bit = "q_galore_adamw8bit" flash_adamw = "flash_adamw" flash_adam = "flash_adam" flash_sgd = "flash_sgd"