From 451fe5f0dab94b9147eb8a17ff14458cfed78b32 Mon Sep 17 00:00:00 2001 From: NanoCode012 Date: Thu, 7 Aug 2025 11:14:19 +0700 Subject: [PATCH 1/8] feat: add arcee --- examples/colab-notebooks/colab-axolotl-example.ipynb | 2 +- scripts/cutcrossentropy_install.py | 2 +- src/axolotl/integrations/cut_cross_entropy/README.md | 3 ++- src/axolotl/integrations/cut_cross_entropy/__init__.py | 2 +- src/axolotl/monkeypatch/multipack.py | 1 + 5 files changed, 6 insertions(+), 4 deletions(-) diff --git a/examples/colab-notebooks/colab-axolotl-example.ipynb b/examples/colab-notebooks/colab-axolotl-example.ipynb index c283092bee..d79c2fb09e 100644 --- a/examples/colab-notebooks/colab-axolotl-example.ipynb +++ b/examples/colab-notebooks/colab-axolotl-example.ipynb @@ -40,7 +40,7 @@ "%%capture\n", "# This step can take ~5-10 minutes to install dependencies\n", "!pip install --no-build-isolation axolotl[flash-attn]>=0.9.1\n", - "!pip install \"cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@48b5169\"" + "!pip install \"cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@bb8d9f8\"" ] }, { diff --git a/scripts/cutcrossentropy_install.py b/scripts/cutcrossentropy_install.py index cf9ced60c6..195aac2e2c 100644 --- a/scripts/cutcrossentropy_install.py +++ b/scripts/cutcrossentropy_install.py @@ -29,5 +29,5 @@ print( UNINSTALL_PREFIX - + f'{UV_PREFIX}pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@48b5169"' + + f'{UV_PREFIX}pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@bb8d9f8"' ) diff --git a/src/axolotl/integrations/cut_cross_entropy/README.md b/src/axolotl/integrations/cut_cross_entropy/README.md index e0ff14db8f..39b78dd401 100644 --- a/src/axolotl/integrations/cut_cross_entropy/README.md +++ b/src/axolotl/integrations/cut_cross_entropy/README.md @@ -19,7 +19,7 @@ python scripts/cutcrossentropy_install.py | sh - If you are installing from pip ```bash -pip3 uninstall -y cut-cross-entropy && pip3 install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@48b5169" +pip3 uninstall -y cut-cross-entropy && pip3 install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@bb8d9f8" ``` ## Usage @@ -31,6 +31,7 @@ plugins: ## Supported Models +- arcee - cohere - cohere2 - gemma diff --git a/src/axolotl/integrations/cut_cross_entropy/__init__.py b/src/axolotl/integrations/cut_cross_entropy/__init__.py index 24cd7b6a7d..6f529f10e3 100644 --- a/src/axolotl/integrations/cut_cross_entropy/__init__.py +++ b/src/axolotl/integrations/cut_cross_entropy/__init__.py @@ -34,7 +34,7 @@ _CCE_INSTALL_MESSAGE = ( "Please install Axolotl's fork of cut_cross_entropy with transformers support using " - '`pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@48b5169"`' + '`pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@bb8d9f8"`' ) diff --git a/src/axolotl/monkeypatch/multipack.py b/src/axolotl/monkeypatch/multipack.py index 5fc5ae856b..7df9877d78 100644 --- a/src/axolotl/monkeypatch/multipack.py +++ b/src/axolotl/monkeypatch/multipack.py @@ -37,6 +37,7 @@ "glm4", "smollm3", "gpt_oss", + "arcee", ] From e1a221c8b3c95e46b03a78828afc3d0235d99cbe Mon Sep 17 00:00:00 2001 From: NanoCode012 Date: Thu, 7 Aug 2025 11:15:32 +0700 Subject: [PATCH 2/8] feat: add latest models supported by cce --- src/axolotl/integrations/cut_cross_entropy/README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/axolotl/integrations/cut_cross_entropy/README.md b/src/axolotl/integrations/cut_cross_entropy/README.md index 39b78dd401..7924d34720 100644 --- a/src/axolotl/integrations/cut_cross_entropy/README.md +++ b/src/axolotl/integrations/cut_cross_entropy/README.md @@ -42,13 +42,17 @@ plugins: - gemma3n_text - glm - glm4 +- gpt_oss - granite - granitemoe +- hunyuan_v1_dense +- hunyuan_v1_moe - llama - llama4 - llama4_text - mistral - mistral3 +- mixtral - mllama - phi - phi3 From 2759ad347a277c25a552b0bcee4a966343271c41 Mon Sep 17 00:00:00 2001 From: NanoCode012 Date: Thu, 7 Aug 2025 11:28:50 +0700 Subject: [PATCH 3/8] feat: add arcee example config --- examples/arcee/README.md | 53 +++++++++++++++++++++++++ examples/arcee/afm-4.5b-qlora.yaml | 64 ++++++++++++++++++++++++++++++ 2 files changed, 117 insertions(+) create mode 100644 examples/arcee/README.md create mode 100644 examples/arcee/afm-4.5b-qlora.yaml diff --git a/examples/arcee/README.md b/examples/arcee/README.md new file mode 100644 index 0000000000..118036b3d9 --- /dev/null +++ b/examples/arcee/README.md @@ -0,0 +1,53 @@ +# Finetune ArceeAI's AFM with Axolotl + +[Arcee Foundation Models (AFM)]((https://huggingface.co/collections/arcee-ai/afm-45b-68823397c351603014963473)) are a family of 4.5B parameter open weight models trained by Arcee.ai. + +This guide shows how to fine-tune it with Axolotl with multi-turn conversations and proper masking. + +Thanks to the team at Arcee.ai for using Axolotl in supervised fine-tuning the AFM model. + +## Getting started + +1. Install Axolotl following the [installation guide](https://docs.axolotl.ai/docs/installation.html). You need to install from main as AFM is only on nightly or use our latest [Docker images](https://docs.axolotl.ai/docs/docker.html). + + Here is an example of how to install from main for pip: + +```bash +# Ensure you have Pytorch installed (Pytorch 2.6.0 min) +git clone https://github.com/axolotl-ai-cloud/axolotl.git +cd axolotl + +pip3 install packaging==23.2 setuptools==75.8.0 wheel ninja +pip3 install --no-build-isolation -e '.[flash-attn]' +``` + +2. Run the finetuning example: + +```bash +axolotl train examples/arcee/afm-4.5b-qlora.yaml +``` + +This config uses about (---) VRAM. + +Let us know how it goes. Happy finetuning! 🚀 + +### TIPS + +- For inference, the official Arcee.ai team recommends `top_p: 0.95`, `temperature: 0.5`, `top_k: 50`, and `repeat_penalty: 1.1`. +- You can run a full finetuning by removing the `adapter: qlora` and `load_in_4bit: true` from the config. +- Read more on how to load your own dataset at [docs](https://docs.axolotl.ai/docs/dataset_loading.html). +- The dataset format follows the OpenAI Messages format as seen [here](https://docs.axolotl.ai/docs/dataset-formats/conversation.html#chat_template). + +## Optimization Guides + +- [Multi-GPU Training](https://docs.axolotl.ai/docs/multi-gpu.html) +- [Multi-Node Training](https://docs.axolotl.ai/docs/multi-node.html) +- [LoRA Optimizations](https://docs.axolotl.ai/docs/lora_optims.html) + +## Related Resources + +- [AFM Blog](https://docs.arcee.ai/arcee-foundation-models/introduction-to-arcee-foundation-models) +- [Axolotl Docs](https://docs.axolotl.ai) +- [Axolotl Website](https://axolotl.ai) +- [Axolotl GitHub](https://github.com/axolotl-ai-cloud/axolotl) +- [Axolotl Discord](https://discord.gg/7m9sfhzaf3) diff --git a/examples/arcee/afm-4.5b-qlora.yaml b/examples/arcee/afm-4.5b-qlora.yaml new file mode 100644 index 0000000000..00fc45c3ca --- /dev/null +++ b/examples/arcee/afm-4.5b-qlora.yaml @@ -0,0 +1,64 @@ +base_model: arcee-ai/AFM-4.5B-Bas + +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name + +plugins: + - axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin + +load_in_8bit: false +load_in_4bit: true + +datasets: + - path: fozziethebeat/alpaca_messages_2k_test + type: chat_template + +dataset_prepared_path: last_run_prepared +val_set_size: 0.1 +output_dir: ./outputs/lora-out + +adapter: qlora +lora_model_dir: + +sequence_len: 2048 +sample_packing: true + +lora_r: 32 +lora_alpha: 16 +lora_dropout: 0.05 +lora_target_linear: true +lora_target_modules: + - gate_proj + - down_proj + - up_proj + - q_proj + - v_proj + - k_proj + - o_proj + +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: + +gradient_accumulation_steps: 4 +micro_batch_size: 2 +num_epochs: 1 +optimizer: adamw_bnb_8bit +lr_scheduler: cosine +learning_rate: 0.0002 + +bf16: auto +tf32: false + +gradient_checkpointing: true +resume_from_checkpoint: +logging_steps: 1 +flash_attention: true + +warmup_ratio: 0.1 +evals_per_epoch: 1 +saves_per_epoch: 1 + +# save_first_step: true # uncomment this to validate checkpoint saving works with your config From f3c3538c68fd59bdda650635f1a0c49be4b557c7 Mon Sep 17 00:00:00 2001 From: NanoCode012 Date: Thu, 7 Aug 2025 11:28:58 +0700 Subject: [PATCH 4/8] chore: lint --- examples/magistral/magistral-small-fsdp-qlora.yaml | 1 - examples/magistral/magistral-small-qlora.yaml | 1 - examples/magistral/magistral-small-think-qlora.yaml | 1 - 3 files changed, 3 deletions(-) diff --git a/examples/magistral/magistral-small-fsdp-qlora.yaml b/examples/magistral/magistral-small-fsdp-qlora.yaml index 14a7ee2192..d46c49fe05 100644 --- a/examples/magistral/magistral-small-fsdp-qlora.yaml +++ b/examples/magistral/magistral-small-fsdp-qlora.yaml @@ -27,7 +27,6 @@ sequence_len: 2048 sample_packing: true eval_sample_packing: false - lora_r: 32 lora_alpha: 16 lora_dropout: 0.05 diff --git a/examples/magistral/magistral-small-qlora.yaml b/examples/magistral/magistral-small-qlora.yaml index 5ec2f0fbf5..188924d393 100644 --- a/examples/magistral/magistral-small-qlora.yaml +++ b/examples/magistral/magistral-small-qlora.yaml @@ -26,7 +26,6 @@ lora_model_dir: sequence_len: 2048 sample_packing: true - lora_r: 32 lora_alpha: 16 lora_dropout: 0.05 diff --git a/examples/magistral/magistral-small-think-qlora.yaml b/examples/magistral/magistral-small-think-qlora.yaml index 0e8a9c1f7f..b715b31560 100644 --- a/examples/magistral/magistral-small-think-qlora.yaml +++ b/examples/magistral/magistral-small-think-qlora.yaml @@ -26,7 +26,6 @@ lora_model_dir: sequence_len: 2048 sample_packing: true - lora_r: 32 lora_alpha: 16 lora_dropout: 0.05 From 4a26eeca7b3b59cf2647ca95ccb0aa95527252ef Mon Sep 17 00:00:00 2001 From: NanoCode012 Date: Thu, 7 Aug 2025 12:05:45 +0700 Subject: [PATCH 5/8] fix: typo --- examples/arcee/afm-4.5b-qlora.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/arcee/afm-4.5b-qlora.yaml b/examples/arcee/afm-4.5b-qlora.yaml index 00fc45c3ca..f5a0de347b 100644 --- a/examples/arcee/afm-4.5b-qlora.yaml +++ b/examples/arcee/afm-4.5b-qlora.yaml @@ -1,4 +1,4 @@ -base_model: arcee-ai/AFM-4.5B-Bas +base_model: arcee-ai/AFM-4.5B-Base # Automatically upload checkpoint and final model to HF # hub_model_id: username/custom_model_name From fec52304a0893c378ff49cd37060823e13b3fc0e Mon Sep 17 00:00:00 2001 From: NanoCode012 Date: Thu, 7 Aug 2025 12:16:02 +0700 Subject: [PATCH 6/8] feat: change to instruct --- examples/arcee/afm-4.5b-qlora.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/arcee/afm-4.5b-qlora.yaml b/examples/arcee/afm-4.5b-qlora.yaml index f5a0de347b..2cb42cacda 100644 --- a/examples/arcee/afm-4.5b-qlora.yaml +++ b/examples/arcee/afm-4.5b-qlora.yaml @@ -1,4 +1,4 @@ -base_model: arcee-ai/AFM-4.5B-Base +base_model: arcee-ai/AFM-4.5B # Automatically upload checkpoint and final model to HF # hub_model_id: username/custom_model_name From a2997d9996ebf6528833b017147e0eab80c7d97e Mon Sep 17 00:00:00 2001 From: NanoCode012 Date: Thu, 7 Aug 2025 12:51:07 +0700 Subject: [PATCH 7/8] feat: add vram usage --- examples/arcee/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/arcee/README.md b/examples/arcee/README.md index 118036b3d9..cede8e17c0 100644 --- a/examples/arcee/README.md +++ b/examples/arcee/README.md @@ -27,7 +27,7 @@ pip3 install --no-build-isolation -e '.[flash-attn]' axolotl train examples/arcee/afm-4.5b-qlora.yaml ``` -This config uses about (---) VRAM. +This config uses about 7.8GiB VRAM. Let us know how it goes. Happy finetuning! 🚀 From eb0f8608b25828a7e85f521197046790104190f5 Mon Sep 17 00:00:00 2001 From: NanoCode012 Date: Fri, 8 Aug 2025 10:14:12 +0700 Subject: [PATCH 8/8] Update README.md --- examples/arcee/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/arcee/README.md b/examples/arcee/README.md index cede8e17c0..2178933065 100644 --- a/examples/arcee/README.md +++ b/examples/arcee/README.md @@ -1,6 +1,6 @@ # Finetune ArceeAI's AFM with Axolotl -[Arcee Foundation Models (AFM)]((https://huggingface.co/collections/arcee-ai/afm-45b-68823397c351603014963473)) are a family of 4.5B parameter open weight models trained by Arcee.ai. +[Arcee Foundation Models (AFM)](https://huggingface.co/collections/arcee-ai/afm-45b-68823397c351603014963473) are a family of 4.5B parameter open weight models trained by Arcee.ai. This guide shows how to fine-tune it with Axolotl with multi-turn conversations and proper masking.