diff --git a/include/llama.h b/include/llama.h
index 4f0124fdc8..8b3c8a7b10 100644
--- a/include/llama.h
+++ b/include/llama.h
@@ -607,6 +607,8 @@ extern "C" {
     //
 
     // Load a LoRA adapter from file
+    // The adapter is valid as long as the associated model is not freed
+    // All adapters must be loaded before context creation
     LLAMA_API struct llama_adapter_lora * llama_adapter_lora_init(
             struct llama_model * model,
             const char * path_lora);
diff --git a/src/llama-adapter.cpp b/src/llama-adapter.cpp
index d8eef75a7a..b77e3a7032 100644
--- a/src/llama-adapter.cpp
+++ b/src/llama-adapter.cpp
@@ -146,9 +146,11 @@ llama_adapter_lora_weight * llama_adapter_lora::get_weight(ggml_tensor * w) {
     return nullptr;
 }
 
-static void llama_adapter_lora_init_impl(llama_model & model, const char * path_lora, llama_adapter_lora & adapter) {
+static void llama_adapter_lora_init_impl(const char * path_lora, llama_adapter_lora & adapter) {
     LLAMA_LOG_INFO("%s: loading lora adapter from '%s' ...\n", __func__, path_lora);
 
+    llama_model & model = adapter.model;
+
     ggml_context * ctx_init;
     gguf_init_params meta_gguf_params = {
         /* .no_alloc = */ true,
@@ -411,14 +413,17 @@ static void llama_adapter_lora_init_impl(llama_model & model, const char * path_
         }
     }
 
+    // update number of nodes used
+    adapter.model.n_lora_nodes += adapter.get_n_nodes();
+
     LLAMA_LOG_INFO("%s: loaded %zu tensors from lora file\n", __func__, adapter.ab_map.size()*2);
 }
 
 llama_adapter_lora * llama_adapter_lora_init(llama_model * model, const char * path_lora) {
-    llama_adapter_lora * adapter = new llama_adapter_lora();
+    llama_adapter_lora * adapter = new llama_adapter_lora(*model);
 
     try {
-        llama_adapter_lora_init_impl(*model, path_lora, *adapter);
+        llama_adapter_lora_init_impl(path_lora, *adapter);
         return adapter;
     } catch (const std::exception & err) {
         LLAMA_LOG_ERROR("%s: failed to apply lora adapter: %s\n", __func__, err.what());
@@ -469,6 +474,10 @@ int32_t llama_adapter_meta_val_str_by_index(const llama_adapter_lora * adapter,
 }
 
 void llama_adapter_lora_free(llama_adapter_lora * adapter) {
+    // update number of nodes used
+    GGML_ASSERT(adapter->model.n_lora_nodes >= adapter->get_n_nodes());
+    adapter->model.n_lora_nodes -= adapter->get_n_nodes();
+
     delete adapter;
 }
 
diff --git a/src/llama-adapter.h b/src/llama-adapter.h
index 4f65247c0f..d0dad8a789 100644
--- a/src/llama-adapter.h
+++ b/src/llama-adapter.h
@@ -59,6 +59,8 @@ struct llama_adapter_lora_weight {
 };
 
 struct llama_adapter_lora {
+    llama_model & model;
+
     // map tensor name to lora_a_b
     std::unordered_map<std::string, llama_adapter_lora_weight> ab_map;
 
@@ -73,10 +75,14 @@ struct llama_adapter_lora {
     // activated lora (aLoRA)
     std::vector<llama_token> alora_invocation_tokens;
 
-    llama_adapter_lora() = default;
+    llama_adapter_lora(llama_model & model) : model(model) {}
     ~llama_adapter_lora() = default;
 
     llama_adapter_lora_weight * get_weight(ggml_tensor * w);
+
+    uint32_t get_n_nodes() const {
+        return ab_map.size() * 4u; // scale, add, 2 x mul_mat
+    }
 };
 
 using llama_adapter_loras = std::unordered_map<llama_adapter_lora *, float>;
diff --git a/src/llama-context.cpp b/src/llama-context.cpp
index 1c530fdc91..34dfcd4724 100644
--- a/src/llama-context.cpp
+++ b/src/llama-context.cpp
@@ -1442,7 +1442,9 @@ uint32_t llama_context::graph_max_nodes(uint32_t n_tokens) const {
     if (model.arch == LLM_ARCH_QWEN3NEXT) {
         return std::max<uint32_t>(n_tokens * 40, 32u * model.n_tensors());
     }
-    return std::max<uint32_t>(1024u, 8u*model.n_tensors());
+    uint32_t res = std::max<uint32_t>(1024u, 8u*model.n_tensors());
+    res += model.n_lora_nodes;
+    return res;
 }
 
 llm_graph_result * llama_context::get_gf_res_reserve() const {
diff --git a/src/llama-model.h b/src/llama-model.h
index dbe5edc153..f4f44a92b6 100644
--- a/src/llama-model.h
+++ b/src/llama-model.h
@@ -475,6 +475,9 @@ struct llama_model {
     // for quantize-stats only
     std::vector<std::pair<std::string, struct ggml_tensor *>> tensors_by_name;
 
+    // for keeping track of extra nodes used by lora adapters
+    uint32_t n_lora_nodes = 0;
+
     int64_t t_load_us  = 0;
     int64_t t_start_us = 0;