Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions src/axolotl/loaders/patch_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,13 @@ def _apply_adapter_patches(self):

patch_peft_prep_code()

if self.cfg.adapter:
from axolotl.monkeypatch.peft_modules_to_save import (
patch_peft_modules_to_save_kwargs,
)

patch_peft_modules_to_save_kwargs()

def _apply_flex_attention_patches(self):
"""Apply patches for flexible attention."""
if self.cfg.flex_attention:
Expand Down
57 changes: 57 additions & 0 deletions src/axolotl/monkeypatch/peft_modules_to_save.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
"""Patch PEFT's AuxiliaryTrainingWrapper / ModulesToSaveWrapper so kwargs-only
forward calls work (e.g. Gemma 4 vision_tower / embed_vision in lora_modules_to_save)."""

from axolotl.utils.logging import get_logger

LOG = get_logger(__name__)

_PATCHED_ATTR = "_axolotl_modules_to_save_kwargs_patched"


def _patched_forward(self, *args, **kwargs):
# _check_forward_args only validates len(x) vs len(adapter_names); skip when no positional x.
if args:
self._check_forward_args(*args, **kwargs)

adapter_names = kwargs.pop("adapter_names", None)

if self.disable_adapters or any(
adapter not in self._adapters for adapter in self.active_adapters
):
return self._forward_wrapped_passthrough(*args, **kwargs)

if adapter_names is None:
return self._forward_wrapped(*args, **kwargs)
# Mixed-batch path needs positional input for sub-batch indexing; leave unchanged.
return self._mixed_batch_forward(*args, adapter_names=adapter_names, **kwargs)


def _patched_forward_wrapped(self, *args, **kwargs):
if not self.active_adapters:
return self._forward_wrapped_passthrough(*args, **kwargs)
return self.modules_to_save[self.active_adapters[0]](*args, **kwargs)


def _patched_forward_wrapped_passthrough(self, *args, **kwargs):
return self.original_module(*args, **kwargs)


def patch_peft_modules_to_save_kwargs() -> None:
"""Apply the kwargs-compatible forward patch to PEFT. Idempotent."""
from peft.utils.other import AuxiliaryTrainingWrapper, ModulesToSaveWrapper

if getattr(AuxiliaryTrainingWrapper, _PATCHED_ATTR, False):
return

AuxiliaryTrainingWrapper.forward = _patched_forward
ModulesToSaveWrapper._forward_wrapped = _patched_forward_wrapped
ModulesToSaveWrapper._forward_wrapped_passthrough = (
_patched_forward_wrapped_passthrough
)

setattr(AuxiliaryTrainingWrapper, _PATCHED_ATTR, True)
LOG.debug(
"Patched peft.AuxiliaryTrainingWrapper / ModulesToSaveWrapper to accept "
"kwargs-only forward calls (enables full-FT of modules called with "
"keyword args, e.g. Gemma 4 vision_tower/embed_vision)"
)
148 changes: 148 additions & 0 deletions tests/monkeypatch/test_peft_modules_to_save.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
"""Tests for axolotl.monkeypatch.peft_modules_to_save."""

import pytest
import torch
import torch.nn as nn

pytest.importorskip("peft")


@pytest.fixture(scope="module")
def patched():
from axolotl.monkeypatch.peft_modules_to_save import (
patch_peft_modules_to_save_kwargs,
)

patch_peft_modules_to_save_kwargs()
yield


@pytest.fixture
def wrap_module(patched):
"""Wrap ``module`` like PEFT wraps a ``modules_to_save`` entry."""
from peft.utils.other import ModulesToSaveWrapper

def _wrap(module: nn.Module) -> ModulesToSaveWrapper:
w = ModulesToSaveWrapper(module, "default")
w.set_adapter("default")
return w

return _wrap


def test_kwargs_only_forward(wrap_module):
"""Gemma 4 vision_tower shape: self.vision_tower(pixel_values=...)."""

class KwargsOnly(nn.Module):
def __init__(self):
super().__init__()
self.lin = nn.Linear(8, 8)

def forward(self, *, pixel_values):
return self.lin(pixel_values)

w = wrap_module(KwargsOnly())
out = w(pixel_values=torch.randn(2, 8))
assert out.shape == (2, 8)


def test_positional_only_forward(wrap_module):
"""embed_tokens shape: self.embed_tokens(input_ids)."""

class PosOnly(nn.Module):
def __init__(self):
super().__init__()
self.emb = nn.Embedding(100, 8)

def forward(self, x):
return self.emb(x)

w = wrap_module(PosOnly())
out = w(torch.tensor([1, 2, 3]))
assert out.shape == (3, 8)


def test_mixed_args_kwargs_forward(wrap_module):
"""Module taking both positional and keyword args."""

class Mixed(nn.Module):
def __init__(self):
super().__init__()
self.lin = nn.Linear(8, 8)

def forward(self, x, scale=1.0):
return self.lin(x) * scale

w = wrap_module(Mixed())
out = w(torch.randn(2, 8), scale=2.0)
assert out.shape == (2, 8)


def test_kwargs_only_disabled_adapter(wrap_module):
"""Passthrough branch (enable_adapters(False)) must also accept kwargs-only."""

class KwargsOnly(nn.Module):
def __init__(self):
super().__init__()
self.lin = nn.Linear(8, 8)

def forward(self, *, pixel_values):
return self.lin(pixel_values)

w = wrap_module(KwargsOnly())
w.enable_adapters(False)

out = w(pixel_values=torch.randn(2, 8))
assert out.shape == (2, 8)


def test_trainable_tokens_wrapper_not_broken(patched):
"""Base-class forward replacement must not break TrainableTokensWrapper
(positional embed_tokens path must still work)."""
try:
from peft.utils.other import TrainableTokensWrapper
except ImportError:
pytest.skip("TrainableTokensWrapper not available in installed PEFT")

try:
w = TrainableTokensWrapper(
nn.Embedding(100, 8), "default", token_indices=[0, 1, 2]
)
except TypeError as exc:
pytest.skip(f"TrainableTokensWrapper init signature changed: {exc}")
w.set_adapter("default")

out = w(torch.tensor([0, 1, 2, 5]))
assert out.shape == (4, 8)


def test_mixed_batch_kwargs_only_raises(wrap_module):
"""Document current behavior: adapter_names without positional input is
unsupported (mixed-batch path is deliberately not kwargs-patched)."""

class KwargsOnly(nn.Module):
def __init__(self):
super().__init__()
self.lin = nn.Linear(8, 8)

def forward(self, *, pixel_values):
return self.lin(pixel_values)

w = wrap_module(KwargsOnly())
with pytest.raises(TypeError):
w(pixel_values=torch.randn(2, 8), adapter_names=["default", "default"])


def test_patch_is_idempotent():
"""Applying the patch twice is a no-op."""
from peft.utils.other import AuxiliaryTrainingWrapper

from axolotl.monkeypatch.peft_modules_to_save import (
patch_peft_modules_to_save_kwargs,
)

patch_peft_modules_to_save_kwargs()
first = AuxiliaryTrainingWrapper.forward
patch_peft_modules_to_save_kwargs()
second = AuxiliaryTrainingWrapper.forward
assert first is second, "patch must be idempotent"
Loading