axolotl-ai-cloud · winglian · Oct 9, 2025 · Oct 9, 2025 · Oct 9, 2025 · coderabbitai
diff --git a/examples/LiquidAI/README.md b/examples/LiquidAI/README.md
@@ -6,6 +6,8 @@ LFM2 features a new hybrid Liquid architecture with multiplicative gates, short-
 
 This guide shows how to fine-tune both the LFM2 and LFM2-VL models with Axolotl.
 
+Thanks to the team at LiquidAI for giving us early access to prepare for these releases.
+
 ## Getting Started
 
 1.  Install Axolotl following the [installation guide](https://docs.axolotl.ai/docs/installation.html).
@@ -31,6 +33,14 @@ This guide shows how to fine-tune both the LFM2 and LFM2-VL models with Axolotl.
     axolotl train examples/LiquidAI/lfm2-vl-lora.yaml
     ```
 
+    **LFM2-MoE**
+    ```bash
+    pip install git+https://github.com/huggingface/transformers.git@0c9a72e4576fe4c84077f066e585129c97bfd4e6
+
+    # LoRA SFT (1x48GB @ 16.2GiB)
+    axolotl train examples/LiquidAI/lfm2-8b-a1b-lora.yaml
+    ```
+
 ### TIPS
 
 - **Installation Error**: If you encounter `ImportError: ... undefined symbol ...` or `ModuleNotFoundError: No module named 'causal_conv1d_cuda'`, the `causal-conv1d` package may have been installed incorrectly. Try uninstalling it:
@@ -45,14 +55,13 @@ This guide shows how to fine-tune both the LFM2 and LFM2-VL models with Axolotl.
 
 ## Optimization Guides
 
-- [Multi-GPU Training](https://docs.axolotl.ai/docs/multi-gpu.html)
-- [LoRA Optimizations](https://docs.axolotl.ai/docs/lora_optims.html)
-- [Multi-Node Training](https://docs.axolotl.ai/docs/multi-node.html)
+- [Optimizations Guide](https://docs.axolotl.ai/docs/optimizations.html)
 
 ## Related Resources
 
 - [LFM2 Blog](https://www.liquid.ai/blog/liquid-foundation-models-v2-our-second-series-of-generative-ai-models)
 - [LFM2-VL Blog](https://www.liquid.ai/blog/lfm2-vl-efficient-vision-language-models)
+- [LFM2-MoE Blog](https://www.liquid.ai/blog/lfm2-8b-a1b-an-efficient-on-device-mixture-of-experts)
 - [Axolotl Docs](https://docs.axolotl.ai)
 - [Axolotl GitHub](https://github.com/axolotl-ai-cloud/axolotl)
 - [Axolotl Discord](https://discord.gg/7m9sfhzaf3)
diff --git a/examples/LiquidAI/lfm2-350m-fft.yaml b/examples/LiquidAI/lfm2-350m-fft.yaml
@@ -1,6 +1,7 @@
 base_model: LiquidAI/LFM2-350M
 
-chunked_cross_entropy: true
+plugins:
+  - axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin
 
 eot_tokens:
   - "<|im_end|>"

diff --git a/examples/LiquidAI/lfm2-8b-a1b-lora.yaml b/examples/LiquidAI/lfm2-8b-a1b-lora.yaml
@@ -0,0 +1,59 @@
+base_model: LiquidAI/LFM2-8B-A1B
+
+plugins:
+  - axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin
+
+load_in_8bit: true
+
+eot_tokens:
+  - "<|im_end|>"
+datasets:
+  - path: mlabonne/FineTome-100k
+    type: chat_template
+    split: train[:20%]
+    field_messages: conversations
+    message_field_role: from
+    message_field_content: value
+dataset_prepared_path: last_run_prepared
+val_set_size: 0.05
+output_dir: ./outputs/out
+
+sequence_len: 4096
+sample_packing: true
+
+adapter: lora
+lora_model_dir:
+
+lora_r: 32
+lora_alpha: 16
+lora_dropout: 0.05
+lora_target_modules: 'model.layers.[\d]+.(mlp|cross_attn|self_attn).(up|down|gate|q|k|v|o)_proj'
+
+wandb_project:
+wandb_entity:
+wandb_watch:
+wandb_name:
+wandb_log_model:
+
+gradient_accumulation_steps: 2
+micro_batch_size: 4
+num_epochs: 1
+optimizer: adamw_torch_fused
+lr_scheduler: cosine
+learning_rate: 5e-5
+
+bf16: true
+tf32: true
+
+gradient_checkpointing: true
+resume_from_checkpoint:
+logging_steps: 1
+flash_attention: true
+
+warmup_ratio: 0.1
+evals_per_epoch: 2
+saves_per_epoch: 1
+
+weight_decay: 0.0
+
+# save_first_step: true  # uncomment this to validate checkpoint saving works with your config
diff --git a/examples/LiquidAI/lfm2-vl-lora.yaml b/examples/LiquidAI/lfm2-vl-lora.yaml
@@ -3,6 +3,9 @@ trust_remote_code: true
 model_type: AutoModelForImageTextToText
 processor_type: AutoProcessor
 
+plugins:
+  - axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin
+
 # these 3 lines are needed for now to handle vision chat templates w images
 skip_prepare_dataset: true
 remove_unused_columns: false

diff --git a/examples/colab-notebooks/colab-axolotl-example.ipynb b/examples/colab-notebooks/colab-axolotl-example.ipynb
@@ -40,7 +40,7 @@
     "%%capture\n",
     "# This step can take ~5-10 minutes to install dependencies\n",
     "!pip install --no-build-isolation axolotl[flash-attn]>=0.9.1\n",
-    "!pip install \"cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@147ea28\""
+    "!pip install \"cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@49f3308\""
    ]
   },
   {

diff --git a/scripts/cutcrossentropy_install.py b/scripts/cutcrossentropy_install.py
@@ -29,5 +29,5 @@
 
 print(
     UNINSTALL_PREFIX
-    + f'{UV_PREFIX}pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@147ea28"'
+    + f'{UV_PREFIX}pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@49f3308"'
 )
diff --git a/src/axolotl/common/architectures.py b/src/axolotl/common/architectures.py
@@ -14,4 +14,5 @@
     "qwen3_moe": "Qwen3MoeSparseMoeBlock",
     "deepseek_v2": "DeepseekV2MoE",
     "gpt_oss": "GptOssDecoderLayer",
+    "lfm2_moe": "Lfm2MoeSparseMoeBlock",
 }
diff --git a/src/axolotl/integrations/cut_cross_entropy/README.md b/src/axolotl/integrations/cut_cross_entropy/README.md
@@ -19,7 +19,7 @@ python scripts/cutcrossentropy_install.py | sh
 
 - If you are installing from pip
 ```bash
-pip3 uninstall -y cut-cross-entropy && pip3 install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@147ea28"
+pip3 uninstall -y cut-cross-entropy && pip3 install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@49f3308"
 ```
 
 ## Usage
@@ -54,9 +54,13 @@ plugins:
 - granitemoehybrid
 - hunyuan_v1_dense
 - hunyuan_v1_moe
+- lfm2
+- lfm2_moe
+- lfm2_vl
 - llama
 - llama4
 - llama4_text
+- llava
 - mistral
 - mistral3
 - mixtral

diff --git a/src/axolotl/integrations/cut_cross_entropy/__init__.py b/src/axolotl/integrations/cut_cross_entropy/__init__.py
@@ -35,7 +35,7 @@
 
 _CCE_INSTALL_MESSAGE = (
     "Please install Axolotl's fork of cut_cross_entropy with transformers support using "
-    '`pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@147ea28"`'
+    '`pip install "cut-cross-entropy[transformers] @ git+https://github.com/axolotl-ai-cloud/ml-cross-entropy.git@49f3308"`'
 )
 
 

diff --git a/src/axolotl/monkeypatch/multipack.py b/src/axolotl/monkeypatch/multipack.py
@@ -45,6 +45,8 @@
     "gpt_oss",
     "arcee",
     "seed_oss",
+    "lfm2",
+    "lfm2_moe",
 ]