Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion common/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1067,7 +1067,7 @@ common_init_result::common_init_result(common_params & params) :

const llama_vocab * vocab = llama_model_get_vocab(model);

// load and optionally apply lora adapters (must be loaded before context creation)
// load and optionally apply lora adapters
for (auto & la : params.lora_adapters) {
llama_adapter_lora_ptr lora;
lora.reset(llama_adapter_lora_init(model, la.path.c_str()));
Expand Down
4 changes: 1 addition & 3 deletions include/llama-cpp.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,7 @@ struct llama_sampler_deleter {
};

struct llama_adapter_lora_deleter {
void operator()(llama_adapter_lora *) {
// llama_adapter_lora_free is deprecated
}
void operator()(llama_adapter_lora * adapter) { llama_adapter_lora_free(adapter); }
};

typedef std::unique_ptr<llama_model, llama_model_deleter> llama_model_ptr;
Expand Down
6 changes: 2 additions & 4 deletions include/llama.h
Original file line number Diff line number Diff line change
Expand Up @@ -623,7 +623,6 @@ extern "C" {

// Load a LoRA adapter from file
// The adapter is valid as long as the associated model is not freed
// All adapters must be loaded before context creation
LLAMA_API struct llama_adapter_lora * llama_adapter_lora_init(
struct llama_model * model,
const char * path_lora);
Expand All @@ -647,9 +646,8 @@ extern "C" {
LLAMA_API int32_t llama_adapter_meta_val_str_by_index(const struct llama_adapter_lora * adapter, int32_t i, char * buf, size_t buf_size);

// Manually free a LoRA adapter
// NOTE: loaded adapters will be free when the associated model is deleted
LLAMA_API DEPRECATED(void llama_adapter_lora_free(struct llama_adapter_lora * adapter),
"adapters are now freed together with the associated model");
// NOTE: loaded adapters that are not manually freed will be freed when the associated model is deleted
LLAMA_API void llama_adapter_lora_free(struct llama_adapter_lora * adapter);

// Get the invocation tokens if the current lora is an alora
LLAMA_API uint64_t llama_adapter_get_alora_n_invocation_tokens(const struct llama_adapter_lora * adapter);
Expand Down
15 changes: 12 additions & 3 deletions src/llama-adapter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,7 @@ static void llama_adapter_lora_init_impl(llama_model & model, const char * path_
}

llama_adapter_lora * llama_adapter_lora_init(llama_model * model, const char * path_lora) {
llama_adapter_lora * adapter = new llama_adapter_lora();
llama_adapter_lora * adapter = new llama_adapter_lora(model);

try {
llama_adapter_lora_init_impl(*model, path_lora, *adapter);
Expand Down Expand Up @@ -471,8 +471,17 @@ int32_t llama_adapter_meta_val_str_by_index(const llama_adapter_lora * adapter,
return snprintf(buf, buf_size, "%s", it->second.c_str());
}

void llama_adapter_lora_free(llama_adapter_lora *) {
// deprecated: adapters are freed by llama_model's destructor
void llama_adapter_lora_free(llama_adapter_lora * adapter) {
if (adapter == nullptr) {
return;
}

if (adapter->model != nullptr) {
adapter->model->loras.erase(adapter);
adapter->model = nullptr;
}

delete adapter;
}

uint64_t llama_adapter_get_alora_n_invocation_tokens(const struct llama_adapter_lora * adapter) {
Expand Down
4 changes: 3 additions & 1 deletion src/llama-adapter.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ struct llama_adapter_lora_weight {
};

struct llama_adapter_lora {
llama_model * model = nullptr;

// map tensor name to lora_a_b
std::unordered_map<std::string, llama_adapter_lora_weight> ab_map;

Expand All @@ -75,7 +77,7 @@ struct llama_adapter_lora {
// activated lora (aLoRA)
std::vector<llama_token> alora_invocation_tokens;

llama_adapter_lora() = default;
explicit llama_adapter_lora(llama_model * model) : model(model) {}
~llama_adapter_lora() = default;

llama_adapter_lora_weight * get_weight(ggml_tensor * w);
Expand Down
Loading