From c69a95b4bf453f4fb83ad0a8e07a97dfcd827f70 Mon Sep 17 00:00:00 2001
From: Marcus Edel <marcus.edel@collabora.com>
Date: Mon, 4 May 2026 14:43:47 +0000
Subject: [PATCH 1/2] Port LlamacppUtils.hpp helpers to common_init_result_ptr
 API.

Signed-off-by: Marcus Edel <marcus.edel@collabora.com>
---
 .../LlamacppUtils.hpp                         | 27 +++++++++++--------
 1 file changed, 16 insertions(+), 11 deletions(-)
diff --git a/packages/qvac-lib-inference-addon-cpp/src/qvac-lib-inference-addon-cpp/LlamacppUtils.hpp b/packages/qvac-lib-inference-addon-cpp/src/qvac-lib-inference-addon-cpp/LlamacppUtils.hpp
index c0b912f64b..32a723c393 100644
--- a/packages/qvac-lib-inference-addon-cpp/src/qvac-lib-inference-addon-cpp/LlamacppUtils.hpp
+++ b/packages/qvac-lib-inference-addon-cpp/src/qvac-lib-inference-addon-cpp/LlamacppUtils.hpp
@@ -1,5 +1,6 @@
 #pragma once
 
+#include <filesystem>
 #include <iostream>
 #include <ranges>
 #include <streambuf>
@@ -11,14 +12,13 @@
 #include "common/common.h"
 
 /// @note async version
-inline common_init_result initFromShards(
+inline common_init_result_ptr initFromShards(
     const GGUFShards& shards, common_params& params,
     const std::string& loadingContext) {
   LOG_INF(
       "%s: load the model from async shards and apply lora adapter, if any.\n",
       __func__);
   llama_model_params mparams = common_model_params_to_llama(params);
-  common_init_result iparams;
   auto pathsView =
       shards.gguf_files |
       std::views::transform([](const std::string& str) { return str.c_str(); });
@@ -29,31 +29,30 @@ inline common_init_result initFromShards(
       loadingContext.c_str(),
       shards.tensors_file.c_str(),
       mparams);
-  return common_init_from_model_and_params(model, std::move(iparams), params);
+  return common_init_from_model_and_params(model, params);
 }
 
 /// @note from disk
-inline common_init_result
+inline common_init_result_ptr
 initFromShards(const GGUFShards& shards, common_params& params) {
   LOG_INF(
       "%s: load the model from disk shards and apply lora adapter, if any.\n",
       __func__);
   llama_model_params mparams = common_model_params_to_llama(params);
-  common_init_result iparams;
   auto pathsView =
       shards.gguf_files |
       std::views::transform([](const std::string& str) { return str.c_str(); });
   std::vector<const char*> pathsVec(pathsView.begin(), pathsView.end());
   llama_model* model =
       llama_model_load_from_splits(pathsVec.data(), pathsVec.size(), mparams);
-  return common_init_from_model_and_params(model, std::move(iparams), params);
+  return common_init_from_model_and_params(model, params);
 }
 
 /// @brief Initializes a model from a single gguf stream stored in memory
 /// @note For performance reasons `initFromShards` should be preferably used
 /// with streams. However, this function is still offered to unify the Js
 /// interface of the addon and separate concerns.
-inline common_init_result initFromMemory(
+inline common_init_result_ptr initFromMemory(
     std::unique_ptr<std::basic_streambuf<char>>&& streambuf,
     common_params& params) {
   LOG_INF(
@@ -61,7 +60,6 @@ inline common_init_result initFromMemory(
       "any.\n",
       __func__);
   llama_model_params mparams = common_model_params_to_llama(params);
-  common_init_result iparams;
 
   // Transfer the (Js) blobs to a contiguous memory block
   // Potential for optimization here. However for performance reasons,
@@ -83,7 +81,7 @@ inline common_init_result initFromMemory(
 
   llama_model* model =
       llama_model_load_from_buffer(std::move(contiguousData), mparams);
-  return common_init_from_model_and_params(model, std::move(iparams), params);
+  return common_init_from_model_and_params(model, params);
 }
 
 /// @brief Initialize a model handling streaming, not-streaming, sharded or
@@ -97,13 +95,13 @@ inline common_init_result initFromMemory(
 /// @param isStreaming Should be set to true when `setWeightsForFile` is
 /// being used to populate `singleGgufStreamedFiles` or call
 /// `llama_model_load_fulfill_split_future`
-inline common_init_result initFromConfig(
+inline common_init_result_ptr initFromConfig(
     common_params& params, const std::string& modelPath,
     std::map<std::string, std::unique_ptr<std::basic_streambuf<char>>>&
         singleGgufStreamedFiles,
     const GGUFShards& shards, const std::string loading_context,
     const bool isStreaming, const char* AddonID, const std::string& error) {
-  common_init_result llamaInit;
+  common_init_result_ptr llamaInit;
   // Stream should have been awaited by the time activate is called from JS
   // and init is triggered. isStreaming should be (thread) safe to use at this
   // point because `setWeightsForFile` has already finished.
@@ -153,6 +151,13 @@ inline common_init_result initFromConfig(
       LOG_INF(
           "%s: load the model from disk file and apply lora adapter, if any.\n",
           __func__);
+      if (!std::filesystem::exists(modelPath)) {
+        throw qvac_errors::StatusError(
+            AddonID,
+            error,
+            string_format(
+                "%s: model file not found: %s\n", __func__, modelPath.c_str()));
+      }
       llamaInit = std::move(common_init_from_params(params));
     } else {
       LOG_INF(

From 188489e775bb1645bd23fc35d805a4a06ca1c6b1 Mon Sep 17 00:00:00 2001
From: gianni-cor <gianfranco.cordella@tether.io>
Date: Mon, 4 May 2026 16:47:04 +0200
Subject: [PATCH 2/2] Update vcpkg.json

---
 packages/qvac-lib-inference-addon-cpp/vcpkg.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/qvac-lib-inference-addon-cpp/vcpkg.json b/packages/qvac-lib-inference-addon-cpp/vcpkg.json
index 71c690b6fb..cc467d7aa1 100644
--- a/packages/qvac-lib-inference-addon-cpp/vcpkg.json
+++ b/packages/qvac-lib-inference-addon-cpp/vcpkg.json
@@ -1,6 +1,6 @@
 {
   "name": "qvac-lib-inference-addon-cpp",
-  "version": "1.1.6",
+  "version": "1.1.7",
   "dependencies": [
     {
       "name": "qvac-lint-cpp",