From 451fe5f0dab94b9147eb8a17ff14458cfed78b32 Mon Sep 17 00:00:00 2001
From: NanoCode012 <nano@axolotl.ai>
Date: Thu, 7 Aug 2025 11:14:19 +0700
Subject: [PATCH 1/8] feat: add arcee

---
 examples/colab-notebooks/colab-axolotl-example.ipynb   | 2 +-
 scripts/cutcrossentropy_install.py                     | 2 +-
 src/axolotl/integrations/cut_cross_entropy/README.md   | 3 ++-
 src/axolotl/integrations/cut_cross_entropy/__init__.py | 2 +-
 src/axolotl/monkeypatch/multipack.py                   | 1 +
 5 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/examples/colab-notebooks/colab-axolotl-example.ipynb b/examples/colab-notebooks/colab-axolotl-example.ipynb
index c283092bee..d79c2fb09e 100644
--- a/examples/colab-notebooks/colab-axolotl-example.ipynb
+++ b/examples/colab-notebooks/colab-axolotl-example.ipynb
@@ -40,7 +40,7 @@
         "%%capture\n",
         "# This step can take ~5-10 minutes to install dependencies\n",
         "!pip install --no-build-isolation axolotl[flash-attn]>=0.9.1\n",
-        "!pip install \"cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@48b5169\""
+        "!pip install \"cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@bb8d9f8\""
       ]
     },
     {
diff --git a/scripts/cutcrossentropy_install.py b/scripts/cutcrossentropy_install.py
index cf9ced60c6..195aac2e2c 100644
--- a/scripts/cutcrossentropy_install.py
+++ b/scripts/cutcrossentropy_install.py
@@ -29,5 +29,5 @@
 
 print(
     UNINSTALL_PREFIX
-    + f'{UV_PREFIX}pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@48b5169"'
+    + f'{UV_PREFIX}pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@bb8d9f8"'
 )
diff --git a/src/axolotl/integrations/cut_cross_entropy/README.md b/src/axolotl/integrations/cut_cross_entropy/README.md
index e0ff14db8f..39b78dd401 100644
--- a/src/axolotl/integrations/cut_cross_entropy/README.md
+++ b/src/axolotl/integrations/cut_cross_entropy/README.md
@@ -19,7 +19,7 @@ python scripts/cutcrossentropy_install.py | sh
 
 - If you are installing from pip
 ```bash
-pip3 uninstall -y cut-cross-entropy && pip3 install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@48b5169"
+pip3 uninstall -y cut-cross-entropy && pip3 install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@bb8d9f8"
 ```
 
 ## Usage
@@ -31,6 +31,7 @@ plugins:
 
 ## Supported Models
 
+- arcee
 - cohere
 - cohere2
 - gemma
diff --git a/src/axolotl/integrations/cut_cross_entropy/__init__.py b/src/axolotl/integrations/cut_cross_entropy/__init__.py
index 24cd7b6a7d..6f529f10e3 100644
--- a/src/axolotl/integrations/cut_cross_entropy/__init__.py
+++ b/src/axolotl/integrations/cut_cross_entropy/__init__.py
@@ -34,7 +34,7 @@
 
 _CCE_INSTALL_MESSAGE = (
     "Please install Axolotl's fork of cut_cross_entropy with transformers support using "
-    '`pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@48b5169"`'
+    '`pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@bb8d9f8"`'
 )
 
 
diff --git a/src/axolotl/monkeypatch/multipack.py b/src/axolotl/monkeypatch/multipack.py
index 5fc5ae856b..7df9877d78 100644
--- a/src/axolotl/monkeypatch/multipack.py
+++ b/src/axolotl/monkeypatch/multipack.py
@@ -37,6 +37,7 @@
     "glm4",
     "smollm3",
     "gpt_oss",
+    "arcee",
 ]
 
 

From e1a221c8b3c95e46b03a78828afc3d0235d99cbe Mon Sep 17 00:00:00 2001
From: NanoCode012 <nano@axolotl.ai>
Date: Thu, 7 Aug 2025 11:15:32 +0700
Subject: [PATCH 2/8] feat: add latest models supported by cce

---
 src/axolotl/integrations/cut_cross_entropy/README.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/axolotl/integrations/cut_cross_entropy/README.md b/src/axolotl/integrations/cut_cross_entropy/README.md
index 39b78dd401..7924d34720 100644
--- a/src/axolotl/integrations/cut_cross_entropy/README.md
+++ b/src/axolotl/integrations/cut_cross_entropy/README.md
@@ -42,13 +42,17 @@ plugins:
 - gemma3n_text
 - glm
 - glm4
+- gpt_oss
 - granite
 - granitemoe
+- hunyuan_v1_dense
+- hunyuan_v1_moe
 - llama
 - llama4
 - llama4_text
 - mistral
 - mistral3
+- mixtral
 - mllama
 - phi
 - phi3

From 2759ad347a277c25a552b0bcee4a966343271c41 Mon Sep 17 00:00:00 2001
From: NanoCode012 <nano@axolotl.ai>
Date: Thu, 7 Aug 2025 11:28:50 +0700
Subject: [PATCH 3/8] feat: add arcee example config

---
 examples/arcee/README.md           | 53 +++++++++++++++++++++++++
 examples/arcee/afm-4.5b-qlora.yaml | 64 ++++++++++++++++++++++++++++++
 2 files changed, 117 insertions(+)
 create mode 100644 examples/arcee/README.md
 create mode 100644 examples/arcee/afm-4.5b-qlora.yaml

diff --git a/examples/arcee/README.md b/examples/arcee/README.md
new file mode 100644
index 0000000000..118036b3d9
--- /dev/null
+++ b/examples/arcee/README.md
@@ -0,0 +1,53 @@
+# Finetune ArceeAI's AFM with Axolotl
+
+[Arcee Foundation Models (AFM)]((https://huggingface.co/collections/arcee-ai/afm-45b-68823397c351603014963473)) are a family of 4.5B parameter open weight models trained by Arcee.ai.
+
+This guide shows how to fine-tune it with Axolotl with multi-turn conversations and proper masking.
+
+Thanks to the team at Arcee.ai for using Axolotl in supervised fine-tuning the AFM model.
+
+## Getting started
+
+1. Install Axolotl following the [installation guide](https://docs.axolotl.ai/docs/installation.html). You need to install from main as AFM is only on nightly or use our latest [Docker images](https://docs.axolotl.ai/docs/docker.html).
+
+    Here is an example of how to install from main for pip:
+
+```bash
+# Ensure you have Pytorch installed (Pytorch 2.6.0 min)
+git clone https://github.com/axolotl-ai-cloud/axolotl.git
+cd axolotl
+
+pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja
+pip3 install --no-build-isolation -e '.[flash-attn]'
+```
+
+2. Run the finetuning example:
+
+```bash
+axolotl train examples/arcee/afm-4.5b-qlora.yaml
+```
+
+This config uses about (---) VRAM.
+
+Let us know how it goes. Happy finetuning! 🚀
+
+### TIPS
+
+- For inference, the official Arcee.ai team recommends `top_p: 0.95`, `temperature: 0.5`, `top_k: 50`, and `repeat_penalty: 1.1`.
+- You can run a full finetuning by removing the `adapter: qlora` and `load_in_4bit: true` from the config.
+- Read more on how to load your own dataset at [docs](https://docs.axolotl.ai/docs/dataset_loading.html).
+- The dataset format follows the OpenAI Messages format as seen [here](https://docs.axolotl.ai/docs/dataset-formats/conversation.html#chat_template).
+
+## Optimization Guides
+
+- [Multi-GPU Training](https://docs.axolotl.ai/docs/multi-gpu.html)
+- [Multi-Node Training](https://docs.axolotl.ai/docs/multi-node.html)
+- [LoRA Optimizations](https://docs.axolotl.ai/docs/lora_optims.html)
+
+## Related Resources
+
+- [AFM Blog](https://docs.arcee.ai/arcee-foundation-models/introduction-to-arcee-foundation-models)
+- [Axolotl Docs](https://docs.axolotl.ai)
+- [Axolotl Website](https://axolotl.ai)
+- [Axolotl GitHub](https://github.com/axolotl-ai-cloud/axolotl)
+- [Axolotl Discord](https://discord.gg/7m9sfhzaf3)
diff --git a/examples/arcee/afm-4.5b-qlora.yaml b/examples/arcee/afm-4.5b-qlora.yaml
new file mode 100644
index 0000000000..00fc45c3ca
--- /dev/null
+++ b/examples/arcee/afm-4.5b-qlora.yaml
@@ -0,0 +1,64 @@
+base_model: arcee-ai/AFM-4.5B-Bas
+
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
+
+plugins:
+  - axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin
+
+load_in_8bit: false
+load_in_4bit: true
+
+datasets:
+  - path: fozziethebeat/alpaca_messages_2k_test
+    type: chat_template
+
+dataset_prepared_path: last_run_prepared
+val_set_size: 0.1
+output_dir: ./outputs/lora-out
+
+adapter: qlora
+lora_model_dir:
+
+sequence_len: 2048
+sample_packing: true
+
+lora_r: 32
+lora_alpha: 16
+lora_dropout: 0.05
+lora_target_linear: true
+lora_target_modules:
+  - gate_proj
+  - down_proj
+  - up_proj
+  - q_proj
+  - v_proj
+  - k_proj
+  - o_proj
+
+wandb_project:
+wandb_entity:
+wandb_watch:
+wandb_name:
+wandb_log_model:
+
+gradient_accumulation_steps: 4
+micro_batch_size: 2
+num_epochs: 1
+optimizer: adamw_bnb_8bit
+lr_scheduler: cosine
+learning_rate: 0.0002
+
+bf16: auto
+tf32: false
+
+gradient_checkpointing: true
+resume_from_checkpoint:
+logging_steps: 1
+flash_attention: true
+
+warmup_ratio: 0.1
+evals_per_epoch: 1
+saves_per_epoch: 1
+
+# save_first_step: true  # uncomment this to validate checkpoint saving works with your config

From f3c3538c68fd59bdda650635f1a0c49be4b557c7 Mon Sep 17 00:00:00 2001
From: NanoCode012 <nano@axolotl.ai>
Date: Thu, 7 Aug 2025 11:28:58 +0700
Subject: [PATCH 4/8] chore: lint

---
 examples/magistral/magistral-small-fsdp-qlora.yaml  | 1 -
 examples/magistral/magistral-small-qlora.yaml       | 1 -
 examples/magistral/magistral-small-think-qlora.yaml | 1 -
 3 files changed, 3 deletions(-)

diff --git a/examples/magistral/magistral-small-fsdp-qlora.yaml b/examples/magistral/magistral-small-fsdp-qlora.yaml
index 14a7ee2192..d46c49fe05 100644
--- a/examples/magistral/magistral-small-fsdp-qlora.yaml
+++ b/examples/magistral/magistral-small-fsdp-qlora.yaml
@@ -27,7 +27,6 @@ sequence_len: 2048
 sample_packing: true
 eval_sample_packing: false
 
-
 lora_r: 32
 lora_alpha: 16
 lora_dropout: 0.05
diff --git a/examples/magistral/magistral-small-qlora.yaml b/examples/magistral/magistral-small-qlora.yaml
index 5ec2f0fbf5..188924d393 100644
--- a/examples/magistral/magistral-small-qlora.yaml
+++ b/examples/magistral/magistral-small-qlora.yaml
@@ -26,7 +26,6 @@ lora_model_dir:
 sequence_len: 2048
 sample_packing: true
 
-
 lora_r: 32
 lora_alpha: 16
 lora_dropout: 0.05
diff --git a/examples/magistral/magistral-small-think-qlora.yaml b/examples/magistral/magistral-small-think-qlora.yaml
index 0e8a9c1f7f..b715b31560 100644
--- a/examples/magistral/magistral-small-think-qlora.yaml
+++ b/examples/magistral/magistral-small-think-qlora.yaml
@@ -26,7 +26,6 @@ lora_model_dir:
 sequence_len: 2048
 sample_packing: true
 
-
 lora_r: 32
 lora_alpha: 16
 lora_dropout: 0.05

From 4a26eeca7b3b59cf2647ca95ccb0aa95527252ef Mon Sep 17 00:00:00 2001
From: NanoCode012 <nano@axolotl.ai>
Date: Thu, 7 Aug 2025 12:05:45 +0700
Subject: [PATCH 5/8] fix: typo

---
 examples/arcee/afm-4.5b-qlora.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/arcee/afm-4.5b-qlora.yaml b/examples/arcee/afm-4.5b-qlora.yaml
index 00fc45c3ca..f5a0de347b 100644
--- a/examples/arcee/afm-4.5b-qlora.yaml
+++ b/examples/arcee/afm-4.5b-qlora.yaml
@@ -1,4 +1,4 @@
-base_model: arcee-ai/AFM-4.5B-Bas
+base_model: arcee-ai/AFM-4.5B-Base
 
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name

From fec52304a0893c378ff49cd37060823e13b3fc0e Mon Sep 17 00:00:00 2001
From: NanoCode012 <nano@axolotl.ai>
Date: Thu, 7 Aug 2025 12:16:02 +0700
Subject: [PATCH 6/8] feat: change to instruct

---
 examples/arcee/afm-4.5b-qlora.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/arcee/afm-4.5b-qlora.yaml b/examples/arcee/afm-4.5b-qlora.yaml
index f5a0de347b..2cb42cacda 100644
--- a/examples/arcee/afm-4.5b-qlora.yaml
+++ b/examples/arcee/afm-4.5b-qlora.yaml
@@ -1,4 +1,4 @@
-base_model: arcee-ai/AFM-4.5B-Base
+base_model: arcee-ai/AFM-4.5B
 
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name

From a2997d9996ebf6528833b017147e0eab80c7d97e Mon Sep 17 00:00:00 2001
From: NanoCode012 <nano@axolotl.ai>
Date: Thu, 7 Aug 2025 12:51:07 +0700
Subject: [PATCH 7/8] feat: add vram usage

---
 examples/arcee/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/arcee/README.md b/examples/arcee/README.md
index 118036b3d9..cede8e17c0 100644
--- a/examples/arcee/README.md
+++ b/examples/arcee/README.md
@@ -27,7 +27,7 @@ pip3 install --no-build-isolation -e '.[flash-attn]'
 axolotl train examples/arcee/afm-4.5b-qlora.yaml
 ```
 
-This config uses about (---) VRAM.
+This config uses about 7.8GiB VRAM.
 
 Let us know how it goes. Happy finetuning! 🚀
 

From eb0f8608b25828a7e85f521197046790104190f5 Mon Sep 17 00:00:00 2001
From: NanoCode012 <nano@axolotl.ai>
Date: Fri, 8 Aug 2025 10:14:12 +0700
Subject: [PATCH 8/8] Update README.md

---
 examples/arcee/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/arcee/README.md b/examples/arcee/README.md
index cede8e17c0..2178933065 100644
--- a/examples/arcee/README.md
+++ b/examples/arcee/README.md
@@ -1,6 +1,6 @@
 # Finetune ArceeAI's AFM with Axolotl
 
-[Arcee Foundation Models (AFM)]((https://huggingface.co/collections/arcee-ai/afm-45b-68823397c351603014963473)) are a family of 4.5B parameter open weight models trained by Arcee.ai.
+[Arcee Foundation Models (AFM)](https://huggingface.co/collections/arcee-ai/afm-45b-68823397c351603014963473) are a family of 4.5B parameter open weight models trained by Arcee.ai.
 
 This guide shows how to fine-tune it with Axolotl with multi-turn conversations and proper masking.