Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
3b53b5f
model : add LFM2-ColBert-350M
tdakhran Jan 4, 2026
af74ec1
Use n_cls_out for pooling rank
tdakhran Jan 5, 2026
888dd47
memory : add llama_memory_hybrid_iswa
tdakhran Jan 4, 2026
40e7a15
Add istft audio utils
tdakhran Jan 5, 2026
4017949
model : add Lfm25AudioTokenizer
tdakhran Jan 5, 2026
8784689
LFM2.5-Audio-1.5B
tdakhran Jan 5, 2026
e1a8fd1
Small fixes
tdakhran Jan 6, 2026
ec2890d
Remove pimpl from mtmd-audio
tdakhran Jan 6, 2026
b48b93e
mtmd: mtmd_audio_streaming_istft
tdakhran Jan 6, 2026
07bf242
Merge remote-tracking branch 'tdakhran/tarek/dev/istft-upstream' into…
tdakhran Jan 6, 2026
b0d4293
Read n_layer from gguf [no ci]
tdakhran Jan 7, 2026
4a2f68a
Move save_wav implementation to mtmd [no ci]
tdakhran Jan 7, 2026
9adc060
Move implementation to mtmd-audio, tts works
tdakhran Jan 14, 2026
5e81b61
Support interleaved mode
tdakhran Jan 14, 2026
b7ab09f
Fix examples and chat mode
tdakhran Jan 14, 2026
88c90a7
memory : add llama_memory_hybrid_iswa
tdakhran Jan 4, 2026
99e3092
Update src/llama-memory-hybrid-iswa.cpp
tdakhran Jan 16, 2026
a3e8f98
Merge branch 'tarek/feat/memory-hybrid-iswa' into tarek/feat/os-lfm2.…
tdakhran Jan 16, 2026
fc3525a
mtmd : Fix ASR for LFM2.5-Audio-1.5B
tdakhran Jan 16, 2026
6706849
reset decoder for each chat
tdakhran Jan 16, 2026
b5848e1
Move temperature selection to decoder
tdakhran Jan 16, 2026
6bb696d
audio output works with llama-server
tdakhran Jan 16, 2026
a147d82
chat history works
tdakhran Jan 16, 2026
3391aa8
small fixes and refactors
tdakhran Jan 16, 2026
0c3c1d0
memory : add llama_memory_hybrid_iswa
tdakhran Jan 4, 2026
d6a45a4
Update src/llama-memory-hybrid-iswa.cpp
tdakhran Jan 16, 2026
660d88e
Update server-context.cpp
tdakhran Jan 21, 2026
d64c8d4
Merge remote-tracking branch 'tdakhran/tarek/feat/memory-hybrid-iswa'…
tdakhran Jan 21, 2026
9960b91
Fix cli [no ci]
tdakhran Jan 21, 2026
f270033
Merge remote-tracking branch 'upstream/master' into tarek/feat/os-lfm…
tdakhran Jan 21, 2026
982a33e
Do not output special tokens
tdakhran Jan 21, 2026
c71f178
Read n_layer from gguf [no ci]
tdakhran Jan 23, 2026
e30495d
Send reset_context for compatibility with llama-liquid-audio-server
tdakhran Jan 29, 2026
d763051
Rework llama-liquid-audio-server to have single queue
tdakhran Feb 10, 2026
ac98513
Single thread for server to avoid arena growth
tdakhran Feb 13, 2026
39ff210
Make vocoder and audiotokenizer optional [no ci]
tdakhran Feb 13, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 8 additions & 7 deletions common/arg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ static std::initializer_list<enum llama_example> mmproj_examples = {
LLAMA_EXAMPLE_MTMD,
LLAMA_EXAMPLE_SERVER,
LLAMA_EXAMPLE_CLI,
LLAMA_EXAMPLE_LIQUID_AUDIO,
};

static std::string read_file(const std::string & fname) {
Expand Down Expand Up @@ -1339,7 +1340,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
[](common_params & params, const std::string & value) {
params.system_prompt = value;
}
).set_examples({LLAMA_EXAMPLE_COMPLETION, LLAMA_EXAMPLE_CLI, LLAMA_EXAMPLE_DIFFUSION, LLAMA_EXAMPLE_MTMD}));
).set_examples({LLAMA_EXAMPLE_COMPLETION, LLAMA_EXAMPLE_CLI, LLAMA_EXAMPLE_DIFFUSION, LLAMA_EXAMPLE_MTMD, LLAMA_EXAMPLE_LIQUID_AUDIO}));
add_opt(common_arg(
{"--perf"},
{"--no-perf"},
Expand Down Expand Up @@ -2159,7 +2160,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
params.image.emplace_back(item);
}
}
).set_examples({LLAMA_EXAMPLE_MTMD, LLAMA_EXAMPLE_CLI}));
).set_examples({LLAMA_EXAMPLE_MTMD, LLAMA_EXAMPLE_CLI, LLAMA_EXAMPLE_LIQUID_AUDIO}));
add_opt(common_arg(
{"--image-min-tokens"}, "N",
"minimum number of tokens each image can take, only used by vision models with dynamic resolution (default: read from model)",
Expand Down Expand Up @@ -2639,7 +2640,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
[](common_params & params, const std::string & value) {
params.out_file = value;
}
).set_examples({LLAMA_EXAMPLE_IMATRIX, LLAMA_EXAMPLE_CVECTOR_GENERATOR, LLAMA_EXAMPLE_EXPORT_LORA, LLAMA_EXAMPLE_TTS, LLAMA_EXAMPLE_FINETUNE}));
).set_examples({LLAMA_EXAMPLE_IMATRIX, LLAMA_EXAMPLE_CVECTOR_GENERATOR, LLAMA_EXAMPLE_EXPORT_LORA, LLAMA_EXAMPLE_TTS, LLAMA_EXAMPLE_FINETUNE, LLAMA_EXAMPLE_LIQUID_AUDIO}));
add_opt(common_arg(
{"-ofreq", "--output-frequency"}, "N",
string_format("output the imatrix every N iterations (default: %d)", params.n_out_freq),
Expand Down Expand Up @@ -2771,14 +2772,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
[](common_params & params, const std::string & value) {
params.hostname = value;
}
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_HOST"));
).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_LIQUID_AUDIO}).set_env("LLAMA_ARG_HOST"));
add_opt(common_arg(
{"--port"}, "PORT",
string_format("port to listen (default: %d)", params.port),
[](common_params & params, int value) {
params.port = value;
}
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_PORT"));
).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_LIQUID_AUDIO}).set_env("LLAMA_ARG_PORT"));
add_opt(common_arg(
{"--path"}, "PATH",
string_format("path to serve static files from (default: %s)", params.public_path.c_str()),
Expand Down Expand Up @@ -3425,7 +3426,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
[](common_params & params, const std::string & value) {
params.vocoder.model.path = value;
}
).set_examples({LLAMA_EXAMPLE_TTS, LLAMA_EXAMPLE_SERVER}));
).set_examples({LLAMA_EXAMPLE_TTS, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_LIQUID_AUDIO}));
add_opt(common_arg(
{"--tts-use-guide-tokens"},
"Use guide tokens to improve TTS word recall",
Expand All @@ -3439,7 +3440,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
[](common_params & params, const std::string & value) {
params.vocoder.speaker_file = value;
}
).set_examples({LLAMA_EXAMPLE_TTS}));
).set_examples({LLAMA_EXAMPLE_TTS, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_LIQUID_AUDIO}));

add_opt(common_arg(
{"--diffusion-steps"}, "N",
Expand Down
1 change: 1 addition & 0 deletions common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ enum llama_example {
LLAMA_EXAMPLE_DIFFUSION,
LLAMA_EXAMPLE_FINETUNE,
LLAMA_EXAMPLE_FIT_PARAMS,
LLAMA_EXAMPLE_LIQUID_AUDIO,

LLAMA_EXAMPLE_COUNT,
};
Expand Down
21 changes: 20 additions & 1 deletion convert_hf_to_gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -10296,7 +10296,7 @@ def _add_feed_forward_length(self):
def set_gguf_parameters(self):
# set num_key_value_heads only for attention layers
self.hparams["num_key_value_heads"] = [
self.hparams["num_key_value_heads"] if layer_type == "full_attention" else 0
self.hparams["num_key_value_heads"] if layer_type != "conv" else 0
for layer_type in self.hparams["layer_types"]
]

Expand Down Expand Up @@ -10345,6 +10345,25 @@ def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
yield f"{self.dense_tensor_name}.weight", tensor.clone()


@ModelBase.register("Lfm25AudioTokenizer")
class LFM25AudioTokenizer(LFM2Model):
model_arch = gguf.MODEL_ARCH.LFM2

def set_gguf_parameters(self):
super().set_gguf_parameters()
self.gguf_writer.add_sliding_window(self.hparams["sliding_window"])
self.gguf_writer.add_embedding_length_out(self.hparams.get("output_size"))

def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
if name == "istft.window" or name.startswith("emb.emb"):
return []

if name.startswith("lin"):
name = name.replace("lin", "dense_2_out")

return super().modify_tensors(data_torch, name, bid)


@ModelBase.register("Lfm2MoeForCausalLM")
class LFM2MoeModel(TextModel):
model_arch = gguf.MODEL_ARCH.LFM2MOE
Expand Down
Loading