From 63da6278a989e2b3f221ce73a3437a4b9d6f8864 Mon Sep 17 00:00:00 2001 From: vovanphuc Date: Thu, 8 Jan 2026 10:29:10 +0700 Subject: [PATCH 1/8] [model] add LFM2AudioPlugin for LFM2.5-Audio support --- src/llamafactory/data/mm_plugin.py | 149 +++++++++++++++++++++++++++++ 1 file changed, 149 insertions(+) diff --git a/src/llamafactory/data/mm_plugin.py b/src/llamafactory/data/mm_plugin.py index b0a345e1c2..d47373dc09 100644 --- a/src/llamafactory/data/mm_plugin.py +++ b/src/llamafactory/data/mm_plugin.py @@ -2159,6 +2159,154 @@ def process_messages( return messages +@dataclass +class LFM2AudioPlugin(BasePlugin): + r"""Plugin for LFM2.5-Audio models. + + LFM2.5-Audio Architecture: + - FastConformer audio encoder (16kHz input, 8x subsampling) + - Audio markers: <|audio_start|> ... <|text_start|> + - Uses liquid_audio package for feature extraction (optional) + + Token Structure: + - <|audio_start|> (token 128): Audio region start + - <|text_start|> (token 129): Audio region end / text start + - audio_token: Placeholder token repeated for sequence length + """ + + audio_bos_token: str = "<|audio_start|>" + audio_eos_token: str = "<|text_start|>" + + @override + def _validate_input( + self, + processor: Optional["MMProcessor"], + images: list["ImageInput"], + videos: list["VideoInput"], + audios: list["AudioInput"], + ) -> None: + r"""Validate inputs. Allow audio without standard HF feature_extractor. + + LFM2.5-Audio uses liquid_audio package for audio processing, not standard + HuggingFace feature_extractor. We skip the audio validation here. + """ + # Only validate images/videos, skip audio feature_extractor check + if len(images) != 0 or len(videos) != 0: + super()._validate_input(processor, images, videos, []) + + @override + def _get_mm_inputs( + self, + images: list["ImageInput"], + videos: list["VideoInput"], + audios: list["AudioInput"], + processor: Optional["MMProcessor"], + ) -> dict[str, "torch.Tensor"]: + r"""Extract audio features using liquid_audio or HF processor. + + LFM2.5-Audio uses custom liquid_audio processor, not standard HuggingFace. + This method tries to extract features if a compatible processor is available. + """ + mm_inputs: dict[str, torch.Tensor] = {} + + if len(audios) == 0 or processor is None: + return mm_inputs + + # Try liquid_audio processor first (has audio_processor attribute) + if hasattr(processor, "audio_processor") and processor.audio_processor is not None: + audio_processor = processor.audio_processor + audios_regularized = self._regularize_audios(audios, sampling_rate=16000)["audios"] + # liquid_audio returns log-mel features + features = audio_processor(audios_regularized, sampling_rate=16000) + mm_inputs["audio_features"] = features + # Calculate sequence lengths from feature shapes (8x subsampling in FastConformer) + if hasattr(features, "shape"): + seq_len = (features.shape[-1] - 1) // 8 + 1 + mm_inputs["audio_seq_lengths"] = [seq_len] * len(audios) + # Fallback: standard HF feature_extractor + elif hasattr(processor, "feature_extractor") and processor.feature_extractor is not None: + feature_extractor: SequenceFeatureExtractor = processor.feature_extractor + audios_regularized = self._regularize_audios(audios, sampling_rate=16000)["audios"] + mm_inputs.update( + feature_extractor( + audios_regularized, + sampling_rate=16000, + return_attention_mask=True, + padding="max_length", + return_tensors="pt", + ) + ) + mm_inputs["feature_attention_mask"] = mm_inputs.pop("attention_mask", None) + + return mm_inputs + + @override + def process_messages( + self, + messages: list[dict[str, str]], + images: list["ImageInput"], + videos: list["VideoInput"], + audios: list["AudioInput"], + processor: Optional["MMProcessor"], + ) -> list[dict[str, str]]: + r"""Replace audio placeholders with boundary-wrapped tokens. + + Produces: <|audio_start|>{audio_token * seqlen}<|text_start|> + """ + self._validate_input(processor, images, videos, audios) + self._validate_messages(messages, images, videos, audios) + + num_audio_tokens = 0 + messages = deepcopy(messages) + + # Calculate audio sequence lengths if processor is available + audio_seqlens: list[int] = [] + if self.expand_mm_tokens and processor is not None: + mm_inputs = self._get_mm_inputs([], [], audios, processor) + if "audio_seq_lengths" in mm_inputs: + # liquid_audio path + audio_seqlens = mm_inputs["audio_seq_lengths"] + elif "feature_attention_mask" in mm_inputs and mm_inputs["feature_attention_mask"] is not None: + # HF path - calculate from attention mask (8x subsampling) + input_lengths = mm_inputs["feature_attention_mask"].sum(-1).numpy() + audio_seqlens = [(int(length) - 1) // 8 + 1 for length in input_lengths] + + for message in messages: + content = message["content"] + while AUDIO_PLACEHOLDER in content: + # Get audio sequence length + if self.expand_mm_tokens and num_audio_tokens < len(audio_seqlens): + audio_seqlen = audio_seqlens[num_audio_tokens] + else: + audio_seqlen = 1 # Fallback: single token + + # Build: <|audio_start|>{audio_token * seqlen}<|text_start|> + audio_tokens = self.audio_token * audio_seqlen if self.audio_token else "" + replacement = f"{self.audio_bos_token}{audio_tokens}{self.audio_eos_token}" + + content = content.replace(AUDIO_PLACEHOLDER, replacement, 1) + num_audio_tokens += 1 + + message["content"] = content + + return messages + + @override + def get_mm_inputs( + self, + images: list["ImageInput"], + videos: list["VideoInput"], + audios: list["AudioInput"], + imglens: list[int], + vidlens: list[int], + audlens: list[int], + batch_ids: list[list[int]], + processor: Optional["MMProcessor"], + ) -> dict[str, Union[list[int], "torch.Tensor"]]: + self._validate_input(processor, images, videos, audios) + return self._get_mm_inputs(images, videos, audios, processor) + + PLUGINS = { "base": BasePlugin, "ernie_vl": ErnieVLPlugin, @@ -2172,6 +2320,7 @@ def process_messages( "llava_next": LlavaNextPlugin, "llava_next_video": LlavaNextVideoPlugin, "lfm2_vl": LFMVLPlugin, + "lfm2_audio": LFM2AudioPlugin, "minicpm_v": MiniCPMVPlugin, "mllama": MllamaPlugin, "paligemma": PaliGemmaPlugin, From d9c7a61443a50914d10d7006629c8bfaec233784 Mon Sep 17 00:00:00 2001 From: vovanphuc Date: Thu, 8 Jan 2026 10:29:32 +0700 Subject: [PATCH 2/8] [feature] add lfm2_audio template and model registration --- src/llamafactory/data/template.py | 26 ++++++++++++++++++++++++++ src/llamafactory/extras/constants.py | 11 +++++++++++ 2 files changed, 37 insertions(+) diff --git a/src/llamafactory/data/template.py b/src/llamafactory/data/template.py index ef1c5db698..b88124ed04 100644 --- a/src/llamafactory/data/template.py +++ b/src/llamafactory/data/template.py @@ -1371,6 +1371,32 @@ def get_template_and_fix_tokenizer(tokenizer: "PreTrainedTokenizer", data_args: ) +register_template( + name="lfm2_audio", + format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]), + format_assistant=StringFormatter(slots=["{{content}}<|im_end|>\n"]), + format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]), + format_function=FunctionFormatter(slots=["{{content}}<|im_end|>\n"], tool_format="lfm2"), + format_observation=StringFormatter( + slots=[ + "<|im_start|>tool\n<|tool_response_start|>{{content}}<|tool_response_end|><|im_end|>\n" + "<|im_start|>assistant\n" + ] + ), + format_tools=ToolFormatter(tool_format="lfm2"), + default_system="You are a helpful audio assistant by Liquid AI.", + stop_words=["<|im_end|>"], + tool_call_words=("<|tool_call_start|>", "<|tool_call_end|>"), + replace_eos=True, + mm_plugin=get_mm_plugin( + name="lfm2_audio", + audio_token="<|reserved_1|>", # Token ID 17 - placeholder between markers + audio_bos_token="<|audio_start|>", # Token ID 128 + audio_eos_token="<|text_start|>", # Token ID 129 + ), +) + + register_template( name="llama2", format_user=StringFormatter(slots=[{"bos_token"}, "[INST] {{content}} [/INST]"]), diff --git a/src/llamafactory/extras/constants.py b/src/llamafactory/extras/constants.py index 0b4d35ef18..9648f0f253 100644 --- a/src/llamafactory/extras/constants.py +++ b/src/llamafactory/extras/constants.py @@ -1517,6 +1517,17 @@ def register_model_group( ) +register_model_group( + models={ + "LFM2.5-Audio-1.5B": { + DownloadSource.DEFAULT: "LiquidAI/LFM2.5-Audio-1.5B", + }, + }, + template="lfm2_audio", + multimodal=True, +) + + register_model_group( models={ "Llama-7B": { From 130996f81274e26b5f4e19a8881a2bcc3028c54b Mon Sep 17 00:00:00 2001 From: vovanphuc Date: Thu, 8 Jan 2026 10:29:42 +0700 Subject: [PATCH 3/8] [model] add LFM2.5-Audio model loader with liquid_audio integration --- src/llamafactory/extras/packages.py | 4 + src/llamafactory/model/loader.py | 13 + .../model/model_utils/lfm2_audio.py | 347 ++++++++++++++++++ 3 files changed, 364 insertions(+) create mode 100644 src/llamafactory/model/model_utils/lfm2_audio.py diff --git a/src/llamafactory/extras/packages.py b/src/llamafactory/extras/packages.py index c6328a7b02..d259ca0461 100644 --- a/src/llamafactory/extras/packages.py +++ b/src/llamafactory/extras/packages.py @@ -122,3 +122,7 @@ def is_uvicorn_available(): def is_vllm_available(): return _is_package_available("vllm") + + +def is_liquid_audio_available(): + return _is_package_available("liquid_audio") diff --git a/src/llamafactory/model/loader.py b/src/llamafactory/model/loader.py index 8c24622381..44ca540639 100644 --- a/src/llamafactory/model/loader.py +++ b/src/llamafactory/model/loader.py @@ -33,6 +33,7 @@ from ..extras.packages import is_torch_version_greater_than from .adapter import init_adapter from .model_utils.ktransformers import load_kt_pretrained_model +from .model_utils.lfm2_audio import is_lfm2_audio_model, load_lfm2_audio_pretrained_model from .model_utils.liger_kernel import apply_liger_kernel from .model_utils.misc import register_autoclass from .model_utils.mod import convert_pretrained_model_to_mod, load_mod_pretrained_model @@ -127,6 +128,14 @@ def load_tokenizer(model_args: "ModelArguments") -> "TokenizerModule": def load_config(model_args: "ModelArguments") -> "PretrainedConfig": r"""Load model config.""" init_kwargs = _get_init_kwargs(model_args) + + # Special handling for LFM2.5-Audio models + if is_lfm2_audio_model(model_args.model_name_or_path): + from .model_utils.lfm2_audio import LFM2AudioConfig + + logger.info_rank0("Detected LFM2.5-Audio model, using custom config loader.") + return LFM2AudioConfig() + return AutoConfig.from_pretrained(model_args.model_name_or_path, **init_kwargs) @@ -155,6 +164,10 @@ def load_model( lazy_load = True elif is_trainable: model = load_unsloth_pretrained_model(config, model_args, finetuning_args) + elif is_lfm2_audio_model(model_args.model_name_or_path): + # Load LFM2.5-Audio model using liquid_audio package + logger.info_rank0("Loading LFM2.5-Audio model with liquid_audio package...") + model = load_lfm2_audio_pretrained_model(model_args, **init_kwargs) if model is None and not lazy_load: init_kwargs["config"] = config diff --git a/src/llamafactory/model/model_utils/lfm2_audio.py b/src/llamafactory/model/model_utils/lfm2_audio.py new file mode 100644 index 0000000000..04d5d7d89a --- /dev/null +++ b/src/llamafactory/model/model_utils/lfm2_audio.py @@ -0,0 +1,347 @@ +# Copyright 2025 the LlamaFactory team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Custom model loader for LFM2.5-Audio models using liquid_audio package. + +LFM2.5-Audio models use a custom architecture that requires the liquid_audio package +for proper model loading and audio processing. +""" + +from typing import TYPE_CHECKING, Any, Optional + +import torch +from transformers import GenerationConfig, PretrainedConfig, PreTrainedModel +from transformers.generation import GenerationMixin +from transformers.modeling_outputs import CausalLMOutputWithPast + +from ...extras import logging +from ...extras.packages import is_liquid_audio_available + + +if TYPE_CHECKING: + from ...hparams import ModelArguments + + +logger = logging.get_logger(__name__) + + +class LFM2AudioConfig(PretrainedConfig): + """Config class for LFM2.5-Audio models to enable HuggingFace compatibility.""" + + model_type = "lfm2_audio" + + def __init__( + self, + vocab_size: int = 65536, + hidden_size: int = 2048, + num_hidden_layers: int = 16, + num_attention_heads: int = 32, + num_key_value_heads: int = 8, + codebooks: int = 8, + **kwargs, + ): + self.vocab_size = vocab_size + self.hidden_size = hidden_size + self.num_hidden_layers = num_hidden_layers + self.num_attention_heads = num_attention_heads + self.num_key_value_heads = num_key_value_heads + self.codebooks = codebooks + super().__init__(**kwargs) + + +class LFM2AudioModelForCausalLM(PreTrainedModel, GenerationMixin): + """HuggingFace-compatible wrapper for LFM2AudioModel from liquid_audio. + + This wrapper enables LFM2.5-Audio models to be used with LLaMA-Factory's + training pipeline while leveraging the liquid_audio package for model loading. + """ + + config_class = LFM2AudioConfig + base_model_prefix = "model" + supports_gradient_checkpointing = True + _no_split_modules = ["Lfm2DecoderLayer", "ConformerBlock"] + main_input_name = "input_ids" + _supports_cache_class = True + + def __init__(self, config: LFM2AudioConfig): + super().__init__(config) + self._liquid_model = None + self._is_loaded = False + # Initialize generation_config for HuggingFace compatibility + self.generation_config = GenerationConfig( + eos_token_id=config.eos_token_id if hasattr(config, "eos_token_id") else 7, + pad_token_id=config.pad_token_id if hasattr(config, "pad_token_id") else 0, + ) + + @classmethod + def from_pretrained( + cls, + pretrained_model_name_or_path: str, + *model_args, + config: Optional[LFM2AudioConfig] = None, + torch_dtype: Optional[torch.dtype] = None, + device_map: Optional[str] = None, + **kwargs, + ) -> "LFM2AudioModelForCausalLM": + """Load LFM2.5-Audio model using liquid_audio package.""" + if not is_liquid_audio_available(): + raise ImportError( + "liquid-audio package is required for LFM2.5-Audio models. " + "Please install it with: pip install liquid-audio" + ) + + from liquid_audio import LFM2AudioModel + + # Determine dtype + if torch_dtype is None or torch_dtype == "auto": + torch_dtype = torch.bfloat16 + + # Determine device - liquid_audio expects string or torch.device, not dict + if device_map is None or device_map == "auto": + device = "cuda" if torch.cuda.is_available() else "cpu" + elif isinstance(device_map, dict): + # Handle dict device_map like {'': device(type='cuda', index=0)} + if "" in device_map: + dev = device_map[""] + device = str(dev) if hasattr(dev, "type") else "cuda" + else: + device = "cuda" if torch.cuda.is_available() else "cpu" + elif isinstance(device_map, str): + device = device_map + else: + device = "cuda" if torch.cuda.is_available() else "cpu" + + logger.info_rank0(f"Loading LFM2.5-Audio model from {pretrained_model_name_or_path}") + logger.info_rank0(f"Using dtype={torch_dtype}, device={device}") + + # Load using liquid_audio + liquid_model = LFM2AudioModel.from_pretrained( + pretrained_model_name_or_path, + dtype=torch_dtype, + device=device, + revision=kwargs.get("revision"), + ) + + # Create config from liquid model + lfm_config = liquid_model.conf.lfm + if config is None: + config = LFM2AudioConfig( + vocab_size=lfm_config.vocab_size, + hidden_size=lfm_config.hidden_size, + num_hidden_layers=lfm_config.num_hidden_layers, + num_attention_heads=lfm_config.num_attention_heads, + num_key_value_heads=lfm_config.num_key_value_heads, + codebooks=liquid_model.conf.codebooks, + torch_dtype=torch_dtype, + ) + + # Create wrapper instance + wrapper = cls(config) + wrapper._liquid_model = liquid_model + wrapper._is_loaded = True + + return wrapper + + @property + def model(self): + """Return the underlying liquid_audio model.""" + return self._liquid_model + + @property + def lfm(self): + """Return the LFM2 backbone (HuggingFace Lfm2Model).""" + if self._liquid_model is not None: + return self._liquid_model.lfm + return None + + def get_input_embeddings(self): + """Get text embeddings from the LFM backbone.""" + if self.lfm is not None: + return self.lfm.embed_tokens + return None + + def set_input_embeddings(self, value): + """Set text embeddings in the LFM backbone.""" + if self.lfm is not None: + self.lfm.embed_tokens = value + + def get_output_embeddings(self): + """LFM2 uses tied embeddings, return the same as input.""" + return self.get_input_embeddings() + + def set_output_embeddings(self, new_embeddings): + """Set output embeddings (tied with input).""" + self.set_input_embeddings(new_embeddings) + + def forward( + self, + input_ids: Optional[torch.LongTensor] = None, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.LongTensor] = None, + past_key_values: Optional[Any] = None, + inputs_embeds: Optional[torch.FloatTensor] = None, + labels: Optional[torch.LongTensor] = None, + use_cache: Optional[bool] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + **kwargs, + ) -> CausalLMOutputWithPast: + """Forward pass for training. + + For training, we use the LFM backbone directly for text-only forward pass. + The audio processing is handled by the mm_plugin during data preprocessing. + """ + return_dict = return_dict if return_dict is not None else self.config.use_return_dict + + if self._liquid_model is None: + raise RuntimeError("Model not loaded. Call from_pretrained first.") + + # Use the LFM backbone for forward pass + lfm = self._liquid_model.lfm + + # Get embeddings + if inputs_embeds is None and input_ids is not None: + inputs_embeds = lfm.embed_tokens(input_ids) + + # Forward through LFM backbone + outputs = lfm( + inputs_embeds=inputs_embeds, + attention_mask=attention_mask, + position_ids=position_ids, + past_key_values=past_key_values, + use_cache=use_cache, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=True, + ) + + hidden_states = outputs.last_hidden_state + + # Compute logits using tied embeddings + logits = torch.nn.functional.linear(hidden_states, lfm.embed_tokens.weight) + + loss = None + if labels is not None: + # Shift for next token prediction + shift_logits = logits[..., :-1, :].contiguous() + shift_labels = labels[..., 1:].contiguous() + + loss_fct = torch.nn.CrossEntropyLoss() + shift_logits = shift_logits.view(-1, self.config.vocab_size) + shift_labels = shift_labels.view(-1) + shift_labels = shift_labels.to(shift_logits.device) + loss = loss_fct(shift_logits, shift_labels) + + if not return_dict: + output = (logits,) + outputs[1:] + return (loss,) + output if loss is not None else output + + return CausalLMOutputWithPast( + loss=loss, + logits=logits, + past_key_values=outputs.past_key_values, + hidden_states=outputs.hidden_states, + attentions=outputs.attentions, + ) + + def prepare_inputs_for_generation( + self, + input_ids: torch.LongTensor, + past_key_values: Optional[Any] = None, + attention_mask: Optional[torch.Tensor] = None, + inputs_embeds: Optional[torch.FloatTensor] = None, + **kwargs, + ) -> dict[str, Any]: + """Prepare inputs for generation.""" + if past_key_values is not None: + input_ids = input_ids[:, -1:] + + model_inputs = { + "input_ids": input_ids, + "past_key_values": past_key_values, + "use_cache": kwargs.get("use_cache"), + "attention_mask": attention_mask, + } + + if inputs_embeds is not None and past_key_values is None: + model_inputs["inputs_embeds"] = inputs_embeds + model_inputs["input_ids"] = None + + return model_inputs + + def _reorder_cache(self, past_key_values, beam_idx): + """Reorder cache for beam search.""" + if self.lfm is not None and hasattr(self.lfm, "_reorder_cache"): + return self.lfm._reorder_cache(past_key_values, beam_idx) + return past_key_values + + @classmethod + def can_generate(cls) -> bool: + """Return True to indicate this model can generate sequences.""" + return True + + @property + def device(self) -> torch.device: + """Return the device of the model.""" + if self.lfm is not None: + return next(self.lfm.parameters()).device + return torch.device("cpu") + + @property + def dtype(self) -> torch.dtype: + """Return the dtype of the model.""" + if self.lfm is not None: + return next(self.lfm.parameters()).dtype + return torch.float32 + + +def is_lfm2_audio_model(model_name_or_path: str) -> bool: + """Check if the model is an LFM2.5-Audio model.""" + lfm2_audio_patterns = [ + "LFM2.5-Audio", + "lfm2.5-audio", + "lfm2-audio", + "LFM2-Audio", + ] + return any(pattern.lower() in model_name_or_path.lower() for pattern in lfm2_audio_patterns) + + +def load_lfm2_audio_pretrained_model( + model_args: "ModelArguments", + **kwargs, +) -> "LFM2AudioModelForCausalLM": + """Load LFM2.5-Audio model using liquid_audio package. + + Args: + model_args: Model arguments containing model path and configuration. + **kwargs: Additional arguments passed to from_pretrained. + + Returns: + LFM2AudioModelForCausalLM: Loaded model wrapper. + """ + if not is_liquid_audio_available(): + raise ImportError( + "LFM2.5-Audio models require the liquid-audio package. Please install it with: pip install liquid-audio" + ) + + return LFM2AudioModelForCausalLM.from_pretrained( + model_args.model_name_or_path, + torch_dtype=kwargs.get("torch_dtype", "auto"), + device_map=kwargs.get("device_map"), + revision=model_args.model_revision, + cache_dir=model_args.cache_dir, + token=model_args.hf_hub_token, + trust_remote_code=model_args.trust_remote_code, + ) From 2e2062caa4e91a5658ad39a48bedd9fddbd18928 Mon Sep 17 00:00:00 2001 From: vovanphuc Date: Thu, 8 Jan 2026 10:29:52 +0700 Subject: [PATCH 4/8] [test] add LFM2.5-Audio plugin tests --- tests/data/test_mm_plugin.py | 41 ++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/tests/data/test_mm_plugin.py b/tests/data/test_mm_plugin.py index 3187004aa5..18a6810b3c 100644 --- a/tests/data/test_mm_plugin.py +++ b/tests/data/test_mm_plugin.py @@ -431,3 +431,44 @@ def test_lfm2_vl_plugin(): assert lfm2_vl_plugin.video_token is None assert lfm2_vl_plugin.audio_token is None assert lfm2_vl_plugin.__class__.__name__ == "LFMVLPlugin" + + +@pytest.mark.runs_on(["cpu", "mps"]) +def test_lfm2_audio_plugin(): + """Test LFM2.5-Audio plugin instantiation.""" + # Test plugin can be instantiated with correct tokens + lfm2_audio_plugin = get_mm_plugin( + name="lfm2_audio", + audio_token="<|reserved_1|>", + audio_bos_token="<|audio_start|>", + audio_eos_token="<|text_start|>", + ) + assert lfm2_audio_plugin is not None + assert lfm2_audio_plugin.audio_token == "<|reserved_1|>" + assert lfm2_audio_plugin.audio_bos_token == "<|audio_start|>" + assert lfm2_audio_plugin.audio_eos_token == "<|text_start|>" + assert lfm2_audio_plugin.image_token is None + assert lfm2_audio_plugin.video_token is None + assert lfm2_audio_plugin.__class__.__name__ == "LFM2AudioPlugin" + + +@pytest.mark.runs_on(["cpu", "mps"]) +def test_lfm2_audio_plugin_process_messages(): + """Test LFM2.5-Audio placeholder replacement.""" + lfm2_audio_plugin = get_mm_plugin( + name="lfm2_audio", + audio_token="<|reserved_1|>", + audio_bos_token="<|audio_start|>", + audio_eos_token="<|text_start|>", + ) + messages = [{"content": "Transcribe this: