From e7ca23413e90501d493df8183a1769b50fc5c752 Mon Sep 17 00:00:00 2001
From: Salman Mohammadi <salman.mohammadi@outlook.com>
Date: Thu, 8 Jan 2026 13:43:00 +0000
Subject: [PATCH 1/9] bump dep

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index d34f468a1a..d2ae236219 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -13,7 +13,7 @@ packaging==23.2
 huggingface_hub>=1.1.7
 peft>=0.18.0
 tokenizers>=0.22.1
-transformers @ git+https://github.com/huggingface/transformers.git@main
+transformers @ git+https://github.com/huggingface/transformers.git@v5.0.0rc2
 accelerate==1.12.0
 datasets==4.4.2
 deepspeed>=0.18.3

From 324a404e10a3a362a3e8793ba37532788ed941fc Mon Sep 17 00:00:00 2001
From: Wing Lian <wing@axolotl.ai>
Date: Mon, 12 Jan 2026 09:57:21 -0500
Subject: [PATCH 2/9] use latest fbgemm, grab model config as part of fixture,
 un-skip test

---
 setup.py                          |  2 +-
 tests/conftest.py                 | 15 +++++++++++----
 tests/e2e/multigpu/test_gemma3.py |  3 ---
 3 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/setup.py b/setup.py
index 10c9a84539..1257f35331 100644
--- a/setup.py
+++ b/setup.py
@@ -64,7 +64,7 @@ def parse_requirements(extras_require_map):
 
             if (major, minor) >= (2, 9):
                 extras_require_map.pop("fbgemm-gpu")
-                extras_require_map["fbgemm-gpu"] = ["fbgemm-gpu-genai==1.4.1"]
+                extras_require_map["fbgemm-gpu"] = ["fbgemm-gpu-genai==1.4.2"]
                 extras_require_map["vllm"] = ["vllm==0.11.1"]
             elif (major, minor) >= (2, 8):
                 extras_require_map.pop("fbgemm-gpu")
diff --git a/tests/conftest.py b/tests/conftest.py
index 4c8c80cb7f..1b494bef0c 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -66,6 +66,9 @@ def snapshot_download_w_retry(*args, **kwargs):
         except LocalEntryNotFoundError:
             pass
     with hf_offline_context(False):
+        repo_type = kwargs.get("repo_type", "model")
+        if repo_type == "model":
+            _ = AutoConfig.from_pretrained(*args)
         return snapshot_download(*args, **kwargs)
 
 
@@ -251,7 +254,9 @@ def download_llama_1b_model_fixture():
 def download_llama3_8b_model_fixture():
     # download the tokenizer only
     snapshot_download_w_retry(
-        "NousResearch/Meta-Llama-3-8B", repo_type="model", allow_patterns=["*token*"]
+        "NousResearch/Meta-Llama-3-8B",
+        repo_type="model",
+        allow_patterns=["*token*", "config.json"],
     )
 
 
@@ -261,7 +266,7 @@ def download_llama3_8b_instruct_model_fixture():
     snapshot_download_w_retry(
         "NousResearch/Meta-Llama-3-8B-Instruct",
         repo_type="model",
-        allow_patterns=["*token*"],
+        allow_patterns=["*token*", "config.json"],
     )
 
 
@@ -269,7 +274,9 @@ def download_llama3_8b_instruct_model_fixture():
 def download_phi_35_mini_model_fixture():
     # download the tokenizer only
     snapshot_download_w_retry(
-        "microsoft/Phi-3.5-mini-instruct", repo_type="model", allow_patterns=["*token*"]
+        "microsoft/Phi-3.5-mini-instruct",
+        repo_type="model",
+        allow_patterns=["*token*", "config.json"],
     )
 
 
@@ -279,7 +286,7 @@ def download_phi_3_medium_model_fixture():
     snapshot_download_w_retry(
         "microsoft/Phi-3-medium-128k-instruct",
         repo_type="model",
-        allow_patterns=["*token*"],
+        allow_patterns=["*token*", "config.json"],
     )
 
 
diff --git a/tests/e2e/multigpu/test_gemma3.py b/tests/e2e/multigpu/test_gemma3.py
index 3c77c7107e..51ec68b116 100644
--- a/tests/e2e/multigpu/test_gemma3.py
+++ b/tests/e2e/multigpu/test_gemma3.py
@@ -28,9 +28,6 @@ class TestMultiGPUGemma3:
     Test case for Gemma3 models using LoRA
     """
 
-    @pytest.mark.skip(
-        reason="broken in transformers v5 due to embeddings bug fixed in https://github.com/huggingface/transformers/pull/42558"
-    )
     def test_lora_ddp_packed(self, temp_dir):
         cfg = DictDefault(
             {

From d4871db16cbf1d414fa2f75e790c9a577a717612 Mon Sep 17 00:00:00 2001
From: Wing Lian <wing@axolotl.ai>
Date: Mon, 12 Jan 2026 10:13:50 -0500
Subject: [PATCH 3/9] import AutoConfig

---
 tests/conftest.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 1b494bef0c..9c73ab8eb8 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -18,7 +18,7 @@
 from huggingface_hub import snapshot_download
 from huggingface_hub.errors import LocalEntryNotFoundError
 from tokenizers import AddedToken
-from transformers import AutoTokenizer
+from transformers import AutoConfig, AutoTokenizer
 
 from axolotl.utils.dict import DictDefault
 

From 57de5ba5c38f5440bc788b0ab160b0890f75052a Mon Sep 17 00:00:00 2001
From: Wing Lian <wing@axolotl.ai>
Date: Mon, 12 Jan 2026 10:20:55 -0500
Subject: [PATCH 4/9] don't need more problematic autoconfig when specifying
 config.json manually

---
 tests/conftest.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 9c73ab8eb8..f0c9e01d63 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -18,7 +18,7 @@
 from huggingface_hub import snapshot_download
 from huggingface_hub.errors import LocalEntryNotFoundError
 from tokenizers import AddedToken
-from transformers import AutoConfig, AutoTokenizer
+from transformers import AutoTokenizer
 
 from axolotl.utils.dict import DictDefault
 
@@ -66,9 +66,6 @@ def snapshot_download_w_retry(*args, **kwargs):
         except LocalEntryNotFoundError:
             pass
     with hf_offline_context(False):
-        repo_type = kwargs.get("repo_type", "model")
-        if repo_type == "model":
-            _ = AutoConfig.from_pretrained(*args)
         return snapshot_download(*args, **kwargs)
 
 

From 7795590a5d61d1179c9a72e3af85d6130e02ffea Mon Sep 17 00:00:00 2001
From: Wing Lian <wing@axolotl.ai>
Date: Mon, 12 Jan 2026 10:27:51 -0500
Subject: [PATCH 5/9] add fixtures for argilla ultrafeedback datasets

---
 tests/conftest.py | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index f0c9e01d63..fe5534b6d6 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -83,6 +83,12 @@ def download_smollm2_135m_model():
     snapshot_download_w_retry("HuggingFaceTB/SmolLM2-135M", repo_type="model")
 
 
+@pytest.fixture(scope="session", autouse=True)
+def download_smollm2_135m_instruct_model():
+    # download the model
+    snapshot_download_w_retry("HuggingFaceTB/SmolLM2-135M-Instruct", repo_type="model")
+
+
 @pytest.fixture(scope="session", autouse=True)
 def download_smollm2_135m_gptq_model():
     # download the model
@@ -143,12 +149,20 @@ def download_argilla_distilabel_intel_orca_dpo_dataset():
     )
 
 
-# @pytest.fixture(scope="session", autouse=True)
-# def download_argilla_ultrafeedback_binarized_preferences_cleaned_dataset():
-#     # download the dataset
-#     snapshot_download_w_retry(
-#         "argilla/ultrafeedback-binarized-preferences-cleaned", repo_type="dataset"
-#     )
+@pytest.fixture(scope="session", autouse=True)
+def download_argilla_ultrafeedback_binarized_preferences_cleaned_dataset():
+    # download the dataset
+    snapshot_download_w_retry(
+        "argilla/ultrafeedback-binarized-preferences-cleaned", repo_type="dataset"
+    )
+
+
+@pytest.fixture(scope="session", autouse=True)
+def download_argilla_ultrafeedback_binarized_preferences_cleaned_kto_dataset():
+    # download the dataset
+    snapshot_download_w_retry(
+        "argilla/ultrafeedback-binarized-preferences-cleaned-kto", repo_type="dataset"
+    )
 
 
 # @pytest.fixture(scope="session", autouse=True)
@@ -566,6 +580,8 @@ def test_load_fixtures(
     download_mhenrichsen_alpaca_2k_dataset,
     download_mhenrichsen_alpaca_2k_w_revision_dataset,
     download_mlabonne_finetome_100k_dataset,
+    download_argilla_ultrafeedback_binarized_preferences_cleaned_dataset,
+    download_argilla_ultrafeedback_binarized_preferences_cleaned_kto_dataset,
     download_argilla_distilabel_capybara_dpo_7k_binarized_dataset,
     download_arcee_ai_distilabel_intel_orca_dpo_pairs_dataset,
     download_argilla_dpo_pairs_dataset,

From 68d2ce65e7b30f5275d43d1eb7c77c3ebce4eb73 Mon Sep 17 00:00:00 2001
From: Wing Lian <wing@axolotl.ai>
Date: Mon, 12 Jan 2026 10:32:58 -0500
Subject: [PATCH 6/9] download phi4-reasoning

---
 tests/conftest.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/tests/conftest.py b/tests/conftest.py
index fe5534b6d6..b079ee3c5b 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -291,6 +291,16 @@ def download_phi_35_mini_model_fixture():
     )
 
 
+@pytest.fixture(scope="session", autouse=True)
+def download_phi_4_reasoning_model_fixture():
+    # download the tokenizer only
+    snapshot_download_w_retry(
+        tokenizer="microsoft/Phi-4-reasoning",
+        repo_type="model",
+        allow_patterns=["*token*", "config.json"],
+    )
+
+
 @pytest.fixture(scope="session", autouse=True)
 def download_phi_3_medium_model_fixture():
     # download the tokenizer only
@@ -593,6 +603,7 @@ def test_load_fixtures(
     download_llama3_8b_instruct_model_fixture,
     download_phi_35_mini_model_fixture,
     download_phi_3_medium_model_fixture,
+    download_phi_4_reasoning_model_fixture,
     download_mistral_7b_model_fixture,
     download_gemma_2b_model_fixture,
     download_gemma2_9b_model_fixture,

From 76b1fc7003be35c3e9e65218877d140d401b09f5 Mon Sep 17 00:00:00 2001
From: Wing Lian <wing@axolotl.ai>
Date: Mon, 12 Jan 2026 10:33:54 -0500
Subject: [PATCH 7/9] fix arg

---
 tests/conftest.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index b079ee3c5b..b542d377ba 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -295,7 +295,7 @@ def download_phi_35_mini_model_fixture():
 def download_phi_4_reasoning_model_fixture():
     # download the tokenizer only
     snapshot_download_w_retry(
-        tokenizer="microsoft/Phi-4-reasoning",
+        "microsoft/Phi-4-reasoning",
         repo_type="model",
         allow_patterns=["*token*", "config.json"],
     )

From ff253a202b5050a28b1c9044b00e12328cf8e45c Mon Sep 17 00:00:00 2001
From: Wing Lian <wing@axolotl.ai>
Date: Mon, 12 Jan 2026 12:45:41 -0500
Subject: [PATCH 8/9] update tests for phi fast tokenizer changes

---
 tests/prompt_strategies/test_chat_templates.py | 10 +++++-----
 tests/test_tokenizers.py                       |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/prompt_strategies/test_chat_templates.py b/tests/prompt_strategies/test_chat_templates.py
index 911a97922c..90e0e274b7 100644
--- a/tests/prompt_strategies/test_chat_templates.py
+++ b/tests/prompt_strategies/test_chat_templates.py
@@ -140,13 +140,13 @@ def test_phi35(self, phi35_tokenizer, assistant_dataset):
         # fmt: off
         expected_input_ids = [
             32010,  # user
-            12199, 32007,  # user eot
+            22172, 32007,  # user eot
             32001,  # assistant
-            12199, 32007,  # assistant eot
+            22172, 32007,  # assistant eot
             32010,  # user
-            16773, 26966, 32007,  # user eot
+            1781, 26966, 32007,  # user eot
             32001,  # assistant
-            16773, 26966, 32007,  # assistant eot
+            1781, 26966, 32007,  # assistant eot
         ]
         expected_labels = [
             -100,  # user
@@ -156,7 +156,7 @@ def test_phi35(self, phi35_tokenizer, assistant_dataset):
             -100,  # user
             -100, -100, -100,  # user eot
             -100,  # assistant
-            16773, 26966, 32007,  # assistant eot
+            1781, 26966, 32007,  # assistant eot
         ]
         # fmt: on
         LOG.debug(f"Expected input_ids: {expected_input_ids}")
diff --git a/tests/test_tokenizers.py b/tests/test_tokenizers.py
index f308efbef3..114c2bea2d 100644
--- a/tests/test_tokenizers.py
+++ b/tests/test_tokenizers.py
@@ -84,7 +84,7 @@ def test_add_additional_special_tokens(self):
             }
         )
         tokenizer = load_tokenizer(cfg)
-        assert tokenizer("<|im_start|>user")["input_ids"] == [1, 32000, 1792]
+        assert tokenizer("<|im_start|>user")["input_ids"] == [1, 32000, 1404]
         assert len(tokenizer) == 32001
 
         # ensure reloading the tokenizer again from cfg results in same vocab length

From 33c027da159cb89acbe53ecde2e89e25b74da8a7 Mon Sep 17 00:00:00 2001
From: Wing Lian <wing@axolotl.ai>
Date: Wed, 14 Jan 2026 09:42:18 -0500
Subject: [PATCH 9/9] use explicit model types for gemma3

---
 tests/e2e/multigpu/test_gemma3.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/e2e/multigpu/test_gemma3.py b/tests/e2e/multigpu/test_gemma3.py
index 51ec68b116..4b74fcbb04 100644
--- a/tests/e2e/multigpu/test_gemma3.py
+++ b/tests/e2e/multigpu/test_gemma3.py
@@ -32,6 +32,8 @@ def test_lora_ddp_packed(self, temp_dir):
         cfg = DictDefault(
             {
                 "base_model": "axolotl-mirrors/gemma-3-4b-pt",
+                "model_type": "Gemma3ForCausalLM",
+                "cls_model_config": "Gemma3TextConfig",
                 "sequence_len": 2048,
                 "ddp_find_unused_parameters": True,
                 "sample_packing": True,