From 82be91f1584e1c9e19ed476ac98454a9262539ba Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Fri, 11 Jul 2025 10:09:42 -0400 Subject: [PATCH 1/5] LFM2 support --- examples/lfm2/README.md | 0 examples/lfm2/lfm2-350m-fft.yaml | 46 ++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+) create mode 100644 examples/lfm2/README.md create mode 100644 examples/lfm2/lfm2-350m-fft.yaml diff --git a/examples/lfm2/README.md b/examples/lfm2/README.md new file mode 100644 index 0000000000..e69de29bb2 diff --git a/examples/lfm2/lfm2-350m-fft.yaml b/examples/lfm2/lfm2-350m-fft.yaml new file mode 100644 index 0000000000..ab97f5e2af --- /dev/null +++ b/examples/lfm2/lfm2-350m-fft.yaml @@ -0,0 +1,46 @@ +base_model: LiquidAI/LFM2-350M + +chat_template: tokenizer_default +eot_tokens: + - "<|im_end|>" +datasets: + - path: mlabonne/FineTome-100k + type: chat_template + split: train[:20%] + field_messages: conversations + message_field_role: from + message_field_content: value +dataset_prepared_path: last_run_prepared +val_set_size: 0.05 +output_dir: ./outputs/out + +sequence_len: 4096 +sample_packing: false # not currently supported yet due to conv1d not supporting varlen +pad_to_sequence_len: false + +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: + +gradient_accumulation_steps: 2 +micro_batch_size: 4 +num_epochs: 1 +optimizer: adamw_torch_fused +lr_scheduler: cosine +learning_rate: 5e-5 + +bf16: true +tf32: true + +gradient_checkpointing: false +resume_from_checkpoint: +logging_steps: 1 +flash_attention: true + +warmup_ratio: 0.1 +evals_per_epoch: 2 +saves_per_epoch: 1 + +weight_decay: 0.0 From 9ea9a7da8c6e0e252dd5182f760957fb024f384d Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Fri, 11 Jul 2025 10:23:34 -0400 Subject: [PATCH 2/5] docs --- examples/lfm2/README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/examples/lfm2/README.md b/examples/lfm2/README.md index e69de29bb2..4d2022496b 100644 --- a/examples/lfm2/README.md +++ b/examples/lfm2/README.md @@ -0,0 +1,7 @@ +# Liquid Foundation Models 2 + +LFM2 support in transformers exists in the main branch, but is not yet included in the transformers release. + +```bash +pip install git+https://github.com/huggingface/transformers.git +``` From ec848acbf70a7277c3326c8bec4d4ade26e430ff Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Fri, 11 Jul 2025 12:44:11 -0400 Subject: [PATCH 3/5] packing seems to work --- examples/lfm2/lfm2-350m-fft.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/lfm2/lfm2-350m-fft.yaml b/examples/lfm2/lfm2-350m-fft.yaml index ab97f5e2af..fc5b7f60b3 100644 --- a/examples/lfm2/lfm2-350m-fft.yaml +++ b/examples/lfm2/lfm2-350m-fft.yaml @@ -15,8 +15,8 @@ val_set_size: 0.05 output_dir: ./outputs/out sequence_len: 4096 -sample_packing: false # not currently supported yet due to conv1d not supporting varlen -pad_to_sequence_len: false +sample_packing: true +pad_to_sequence_len: true wandb_project: wandb_entity: From 310caf23bc53182398492a53dbe050eb8998fb4b Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Fri, 11 Jul 2025 13:38:40 -0400 Subject: [PATCH 4/5] update install to force install in case already on dev version --- examples/lfm2/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/lfm2/README.md b/examples/lfm2/README.md index 4d2022496b..eb9ca911f9 100644 --- a/examples/lfm2/README.md +++ b/examples/lfm2/README.md @@ -3,5 +3,5 @@ LFM2 support in transformers exists in the main branch, but is not yet included in the transformers release. ```bash -pip install git+https://github.com/huggingface/transformers.git +pip install --upgrade --no-deps --force-reinstall git+https://github.com/huggingface/transformers.git ``` From 7c6c10efe44f90cda2e2bfa6747f18dada04a991 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Sat, 12 Jul 2025 00:06:26 -0400 Subject: [PATCH 5/5] default to use chunked cross entropy --- examples/lfm2/lfm2-350m-fft.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/examples/lfm2/lfm2-350m-fft.yaml b/examples/lfm2/lfm2-350m-fft.yaml index fc5b7f60b3..95961557e3 100644 --- a/examples/lfm2/lfm2-350m-fft.yaml +++ b/examples/lfm2/lfm2-350m-fft.yaml @@ -1,5 +1,7 @@ base_model: LiquidAI/LFM2-350M +chunked_cross_entropy: true + chat_template: tokenizer_default eot_tokens: - "<|im_end|>"