From c90e0ae2d230761dde794560d399712440afc5de Mon Sep 17 00:00:00 2001
From: Mikhail Sotnikov <mialsot@gmail.com>
Date: Wed, 11 Feb 2026 05:26:49 +0300
Subject: [PATCH 01/11] (experiment) llm tools: position before user prompt and
 after

---
 .../examples/toolCalling.js                   | 79 ++++++++++++++++---
 1 file changed, 66 insertions(+), 13 deletions(-)

diff --git a/packages/qvac-lib-infer-llamacpp-llm/examples/toolCalling.js b/packages/qvac-lib-infer-llamacpp-llm/examples/toolCalling.js
index 084c8d326d..c4afe814b4 100644
--- a/packages/qvac-lib-infer-llamacpp-llm/examples/toolCalling.js
+++ b/packages/qvac-lib-infer-llamacpp-llm/examples/toolCalling.js
@@ -125,8 +125,7 @@ async function main () {
       content: 'You are a helpful assistant with access to various tools. If request is ambiguous,skip tool calls.'
     }
 
-    const toolQuery1 = [
-      systemMessageAmbiguous,
+    const tools1 = [
       // Test handled by this function:
       // - Multiple parameters with different types
       // - Complex multiple tools with array parameters
@@ -194,14 +193,27 @@ async function main () {
           required: ['table', 'conditions']
         }
       },
+
+    ]
+
+    const toolsFirstQuery1 = [
+      systemMessageAmbiguous,
+      ...tools1,
       {
         role: 'user',
         content: 'Search laptops under $1000 and add 2 with ID "laptop-123" to cart. Also, query users table age > 25 limit 50 with metadata.'
-      }
+      },
     ]
-
-    const toolQuery2 = [
+    const toolsLastQuery1 = [
       systemMessageAmbiguous,
+      {
+        role: 'user',
+        content: 'Search laptops under $1000 and add 2 with ID "laptop-123" to cart. Also, query users table age > 25 limit 50 with metadata.'
+      },
+      ...tools1,
+    ]
+
+    const tools2 = [
       // Test handled by this function:
       // - Math/computation tool
       {
@@ -234,17 +246,27 @@ async function main () {
           required: ['lat1', 'lon1', 'lat2', 'lon2']
         }
       },
+    ]
+
+    const toolsFirstQuery2 = [
+      systemMessageAmbiguous,
+      ...tools2,
       {
         role: 'user',
         content: 'calculate 156 * 23 precision 0. Also, How far is here from there?'
-      }
+      },
     ]
 
-    const toolQuery3 = [
+    const toolsLastQuery2 = [
+      systemMessageAmbiguous,
       {
-        role: 'system',
-        content: 'You are a personal assistant.'
+        role: 'user',
+        content: 'calculate 156 * 23 precision 0. Also, How far is here from there?'
       },
+      ...tools2,
+    ]
+
+    const tools3 = [
       // Test handled by this function:
       // - Part of conversation context tool test
       {
@@ -277,6 +299,14 @@ async function main () {
           required: ['title', 'date']
         }
       },
+    ]
+
+    const toolsFirstQuery3 = [
+      {
+        role: 'system',
+        content: 'You are a personal assistant.'
+      },
+      ...tools3,
       {
         role: 'user',
         content: 'What is the weather in Seattle on April 10th?'
@@ -288,14 +318,37 @@ async function main () {
       {
         role: 'user',
         content: 'Daily is fine. Also, schedule a team meeting on April 10th at 2 PM for 60 minutes.'
-      }
+      },
+    ]
+
+    const toolsLastQuery3 = [
+      {
+        role: 'system',
+        content: 'You are a personal assistant.'
+      },
+      {
+        role: 'user',
+        content: 'What is the weather in Seattle on April 10th?'
+      },
+      {
+        role: 'assistant',
+        content: 'Let me check that for you. Do you need hourly or just daily?'
+      },
+      {
+        role: 'user',
+        content: 'Daily is fine. Also, schedule a team meeting on April 10th at 2 PM for 60 minutes.'
+      },
+      ...tools3,
     ]
 
     // 5. Running tool calling queries
     const queries = [
-      { name: 'Query 1: Complex tool calling with multiple parameters', prompt: toolQuery1 },
-      { name: 'Query 2: Math calculation and ambiguous query', prompt: toolQuery2 },
-      { name: 'Query 3: Conversation context with tools', prompt: toolQuery3 }
+      { name: 'Query 1 (tools first): Complex tool calling with multiple parameters', prompt: toolsFirstQuery1 },
+      { name: 'Query 1 (tools last): Complex tool calling with multiple parameters', prompt: toolsLastQuery1 },
+      { name: 'Query 2 (tools first): Math calculation and ambiguous query', prompt: toolsFirstQuery2 },
+      { name: 'Query 2 (tools last): Math calculation and ambiguous query', prompt: toolsLastQuery2 },
+      { name: 'Query 3 (tools first): Conversation context with tools', prompt: toolsFirstQuery3 },
+      { name: 'Query 3 (tools last): Conversation context with tools', prompt: toolsLastQuery3 },
     ]
 
     const toolCallResults = []

From 31bd802beea80c853a222d0ee4378ccf4248d98c Mon Sep 17 00:00:00 2001
From: Mikhail Sotnikov <mialsot@gmail.com>
Date: Wed, 11 Feb 2026 05:35:55 +0300
Subject: [PATCH 02/11] (chore) linter auto-fix

---
 .../examples/toolCalling.js                   | 20 +++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/packages/qvac-lib-infer-llamacpp-llm/examples/toolCalling.js b/packages/qvac-lib-infer-llamacpp-llm/examples/toolCalling.js
index c4afe814b4..b124fcea23 100644
--- a/packages/qvac-lib-infer-llamacpp-llm/examples/toolCalling.js
+++ b/packages/qvac-lib-infer-llamacpp-llm/examples/toolCalling.js
@@ -192,7 +192,7 @@ async function main () {
           },
           required: ['table', 'conditions']
         }
-      },
+      }
 
     ]
 
@@ -202,7 +202,7 @@ async function main () {
       {
         role: 'user',
         content: 'Search laptops under $1000 and add 2 with ID "laptop-123" to cart. Also, query users table age > 25 limit 50 with metadata.'
-      },
+      }
     ]
     const toolsLastQuery1 = [
       systemMessageAmbiguous,
@@ -210,7 +210,7 @@ async function main () {
         role: 'user',
         content: 'Search laptops under $1000 and add 2 with ID "laptop-123" to cart. Also, query users table age > 25 limit 50 with metadata.'
       },
-      ...tools1,
+      ...tools1
     ]
 
     const tools2 = [
@@ -245,7 +245,7 @@ async function main () {
           },
           required: ['lat1', 'lon1', 'lat2', 'lon2']
         }
-      },
+      }
     ]
 
     const toolsFirstQuery2 = [
@@ -254,7 +254,7 @@ async function main () {
       {
         role: 'user',
         content: 'calculate 156 * 23 precision 0. Also, How far is here from there?'
-      },
+      }
     ]
 
     const toolsLastQuery2 = [
@@ -263,7 +263,7 @@ async function main () {
         role: 'user',
         content: 'calculate 156 * 23 precision 0. Also, How far is here from there?'
       },
-      ...tools2,
+      ...tools2
     ]
 
     const tools3 = [
@@ -298,7 +298,7 @@ async function main () {
           },
           required: ['title', 'date']
         }
-      },
+      }
     ]
 
     const toolsFirstQuery3 = [
@@ -318,7 +318,7 @@ async function main () {
       {
         role: 'user',
         content: 'Daily is fine. Also, schedule a team meeting on April 10th at 2 PM for 60 minutes.'
-      },
+      }
     ]
 
     const toolsLastQuery3 = [
@@ -338,7 +338,7 @@ async function main () {
         role: 'user',
         content: 'Daily is fine. Also, schedule a team meeting on April 10th at 2 PM for 60 minutes.'
       },
-      ...tools3,
+      ...tools3
     ]
 
     // 5. Running tool calling queries
@@ -348,7 +348,7 @@ async function main () {
       { name: 'Query 2 (tools first): Math calculation and ambiguous query', prompt: toolsFirstQuery2 },
       { name: 'Query 2 (tools last): Math calculation and ambiguous query', prompt: toolsLastQuery2 },
       { name: 'Query 3 (tools first): Conversation context with tools', prompt: toolsFirstQuery3 },
-      { name: 'Query 3 (tools last): Conversation context with tools', prompt: toolsLastQuery3 },
+      { name: 'Query 3 (tools last): Conversation context with tools', prompt: toolsLastQuery3 }
     ]
 
     const toolCallResults = []

From 4aded94c966cff7d55e23dcc22bf66bc492f1e27 Mon Sep 17 00:00:00 2001
From: Mikhail Sotnikov <mialsot@gmail.com>
Date: Wed, 11 Feb 2026 13:18:35 +0300
Subject: [PATCH 03/11] (experiment) tool call: LFM model

---
 .../examples/toolCalling.js                   | 62 +++++++++++++++----
 1 file changed, 49 insertions(+), 13 deletions(-)

diff --git a/packages/qvac-lib-infer-llamacpp-llm/examples/toolCalling.js b/packages/qvac-lib-infer-llamacpp-llm/examples/toolCalling.js
index b124fcea23..fe9b4e8c8e 100644
--- a/packages/qvac-lib-infer-llamacpp-llm/examples/toolCalling.js
+++ b/packages/qvac-lib-infer-llamacpp-llm/examples/toolCalling.js
@@ -10,7 +10,7 @@ function createSeparator (char = '=', length = 80) {
   return char.repeat(length)
 }
 
-function extractToolCalls (response) {
+function extractToolCallsQwen (response) {
   const toolCalls = []
   const toolCallRegex = /<tool_call>([\s\S]*?)<\/tool_call>/g
   let match
@@ -26,7 +26,24 @@ function extractToolCalls (response) {
   return toolCalls
 }
 
-async function runQuery (model, query) {
+const extractToolCallsLFM = (response) => {
+  const toolCallParts = response.split(']')
+  if (toolCallParts.length < 2) {
+    return []
+  }
+  const toolCallsStr = toolCallParts.slice(0, -1).join(']').concat(']')
+  try {
+    const toolCalls = JSON.parse(toolCallsStr)
+    return toolCalls
+  } catch (e) {
+    console.error('ERROR: extractToolCallsLFM: unable to extract toolCalls\n')
+    console.error(toolCallsStr)
+    console.error(e)
+  }
+  return []
+}
+
+async function runQuery (model, query, extractToolCalls) {
   console.log(`\n${createSeparator()}`)
   console.log(query.name)
   console.log(createSeparator())
@@ -70,6 +87,19 @@ function printToolCallSummary (results) {
   console.log(`\n${createSeparator()}`)
 }
 
+const modelMap = {
+  'LFM': {
+    hdKey: 'f41503e44a2c0a537d9a9665984cb2d87eb2216e6301e898ffea60f5ce6c904d',
+    modelName: 'LFM2.5-1.2B-Instruct-Q4_K_M.gguf',
+    extractToolCalls: extractToolCallsLFM,
+  },
+  'Qwen3': {
+    hdKey: '05d3d7ad9cd650f53c28f85e312ef09a645dd487845897958b3be8a19cb3aab9',
+    modelName: 'Qwen3-1.7B-Q4_0.gguf',
+    extractToolCalls: extractToolCallsQwen,
+  }
+}
+
 async function main () {
   console.log('Tool Calling Example: Demonstrates tool calling capabilities')
   console.log('============================================================')
@@ -78,7 +108,10 @@ async function main () {
   const store = new Corestore('./store')
   const hdStore = store.namespace('hd')
 
-  const hdKey = '05d3d7ad9cd650f53c28f85e312ef09a645dd487845897958b3be8a19cb3aab9'
+  // CHANGE ME
+  const { hdKey, modelName, extractToolCalls } = modelMap['LFM']
+  // const { hdKey, modelName, extractToolCalls } = modelMap['Qwen3']
+
   const hdDL = new HyperDriveDL({
     key: `hd://${hdKey}`,
     store: hdStore
@@ -89,7 +122,7 @@ async function main () {
     loader: hdDL,
     opts: { stats: true },
     logger: console,
-    modelName: 'Qwen3-1.7B-Q4_0.gguf',
+    modelName,
     diskPath: './models'
   }
 
@@ -122,7 +155,8 @@ async function main () {
     // 4. Defining tool queries with function schemas
     const systemMessageAmbiguous = {
       role: 'system',
-      content: 'You are a helpful assistant with access to various tools. If request is ambiguous,skip tool calls.'
+      content: 'You are a helpful assistant with access to various tools. If request is ambiguous,skip tool calls. Output function calls as JSON.'
+      // content: 'Output function calls as JSON. You are a helpful assistant with access to various tools. If request is ambiguous,skip tool calls.'
     }
 
     const tools1 = [
@@ -197,8 +231,10 @@ async function main () {
     ]
 
     const toolsFirstQuery1 = [
-      systemMessageAmbiguous,
-      ...tools1,
+      {
+        ...systemMessageAmbiguous,
+        content: systemMessageAmbiguous.content.concat(`List of tools: ${JSON.stringify(tools1)}`)
+      },
       {
         role: 'user',
         content: 'Search laptops under $1000 and add 2 with ID "laptop-123" to cart. Also, query users table age > 25 limit 50 with metadata.'
@@ -344,16 +380,16 @@ async function main () {
     // 5. Running tool calling queries
     const queries = [
       { name: 'Query 1 (tools first): Complex tool calling with multiple parameters', prompt: toolsFirstQuery1 },
-      { name: 'Query 1 (tools last): Complex tool calling with multiple parameters', prompt: toolsLastQuery1 },
-      { name: 'Query 2 (tools first): Math calculation and ambiguous query', prompt: toolsFirstQuery2 },
-      { name: 'Query 2 (tools last): Math calculation and ambiguous query', prompt: toolsLastQuery2 },
-      { name: 'Query 3 (tools first): Conversation context with tools', prompt: toolsFirstQuery3 },
-      { name: 'Query 3 (tools last): Conversation context with tools', prompt: toolsLastQuery3 }
+      // { name: 'Query 1 (tools last): Complex tool calling with multiple parameters', prompt: toolsLastQuery1 },
+      // { name: 'Query 2 (tools first): Math calculation and ambiguous query', prompt: toolsFirstQuery2 },
+      // { name: 'Query 2 (tools last): Math calculation and ambiguous query', prompt: toolsLastQuery2 },
+      // { name: 'Query 3 (tools first): Conversation context with tools', prompt: toolsFirstQuery3 },
+      // { name: 'Query 3 (tools last): Conversation context with tools', prompt: toolsLastQuery3 }
     ]
 
     const toolCallResults = []
     for (const query of queries) {
-      const result = await runQuery(model, query)
+      const result = await runQuery(model, query, extractToolCalls)
       toolCallResults.push(result)
     }
 

From 006a1393ecf37018e2be42bac1c2f1a9b56015d6 Mon Sep 17 00:00:00 2001
From: Mikhail Sotnikov <mialsot@gmail.com>
Date: Wed, 11 Feb 2026 15:02:24 +0300
Subject: [PATCH 04/11] Revert "(experiment) tool call: LFM model"

This reverts commit 4aded94c966cff7d55e23dcc22bf66bc492f1e27.
---
 .../examples/toolCalling.js                   | 62 ++++---------------
 1 file changed, 13 insertions(+), 49 deletions(-)

diff --git a/packages/qvac-lib-infer-llamacpp-llm/examples/toolCalling.js b/packages/qvac-lib-infer-llamacpp-llm/examples/toolCalling.js
index fe9b4e8c8e..b124fcea23 100644
--- a/packages/qvac-lib-infer-llamacpp-llm/examples/toolCalling.js
+++ b/packages/qvac-lib-infer-llamacpp-llm/examples/toolCalling.js
@@ -10,7 +10,7 @@ function createSeparator (char = '=', length = 80) {
   return char.repeat(length)
 }
 
-function extractToolCallsQwen (response) {
+function extractToolCalls (response) {
   const toolCalls = []
   const toolCallRegex = /<tool_call>([\s\S]*?)<\/tool_call>/g
   let match
@@ -26,24 +26,7 @@ function extractToolCallsQwen (response) {
   return toolCalls
 }
 
-const extractToolCallsLFM = (response) => {
-  const toolCallParts = response.split(']')
-  if (toolCallParts.length < 2) {
-    return []
-  }
-  const toolCallsStr = toolCallParts.slice(0, -1).join(']').concat(']')
-  try {
-    const toolCalls = JSON.parse(toolCallsStr)
-    return toolCalls
-  } catch (e) {
-    console.error('ERROR: extractToolCallsLFM: unable to extract toolCalls\n')
-    console.error(toolCallsStr)
-    console.error(e)
-  }
-  return []
-}
-
-async function runQuery (model, query, extractToolCalls) {
+async function runQuery (model, query) {
   console.log(`\n${createSeparator()}`)
   console.log(query.name)
   console.log(createSeparator())
@@ -87,19 +70,6 @@ function printToolCallSummary (results) {
   console.log(`\n${createSeparator()}`)
 }
 
-const modelMap = {
-  'LFM': {
-    hdKey: 'f41503e44a2c0a537d9a9665984cb2d87eb2216e6301e898ffea60f5ce6c904d',
-    modelName: 'LFM2.5-1.2B-Instruct-Q4_K_M.gguf',
-    extractToolCalls: extractToolCallsLFM,
-  },
-  'Qwen3': {
-    hdKey: '05d3d7ad9cd650f53c28f85e312ef09a645dd487845897958b3be8a19cb3aab9',
-    modelName: 'Qwen3-1.7B-Q4_0.gguf',
-    extractToolCalls: extractToolCallsQwen,
-  }
-}
-
 async function main () {
   console.log('Tool Calling Example: Demonstrates tool calling capabilities')
   console.log('============================================================')
@@ -108,10 +78,7 @@ async function main () {
   const store = new Corestore('./store')
   const hdStore = store.namespace('hd')
 
-  // CHANGE ME
-  const { hdKey, modelName, extractToolCalls } = modelMap['LFM']
-  // const { hdKey, modelName, extractToolCalls } = modelMap['Qwen3']
-
+  const hdKey = '05d3d7ad9cd650f53c28f85e312ef09a645dd487845897958b3be8a19cb3aab9'
   const hdDL = new HyperDriveDL({
     key: `hd://${hdKey}`,
     store: hdStore
@@ -122,7 +89,7 @@ async function main () {
     loader: hdDL,
     opts: { stats: true },
     logger: console,
-    modelName,
+    modelName: 'Qwen3-1.7B-Q4_0.gguf',
     diskPath: './models'
   }
 
@@ -155,8 +122,7 @@ async function main () {
     // 4. Defining tool queries with function schemas
     const systemMessageAmbiguous = {
       role: 'system',
-      content: 'You are a helpful assistant with access to various tools. If request is ambiguous,skip tool calls. Output function calls as JSON.'
-      // content: 'Output function calls as JSON. You are a helpful assistant with access to various tools. If request is ambiguous,skip tool calls.'
+      content: 'You are a helpful assistant with access to various tools. If request is ambiguous,skip tool calls.'
     }
 
     const tools1 = [
@@ -231,10 +197,8 @@ async function main () {
     ]
 
     const toolsFirstQuery1 = [
-      {
-        ...systemMessageAmbiguous,
-        content: systemMessageAmbiguous.content.concat(`List of tools: ${JSON.stringify(tools1)}`)
-      },
+      systemMessageAmbiguous,
+      ...tools1,
       {
         role: 'user',
         content: 'Search laptops under $1000 and add 2 with ID "laptop-123" to cart. Also, query users table age > 25 limit 50 with metadata.'
@@ -380,16 +344,16 @@ async function main () {
     // 5. Running tool calling queries
     const queries = [
       { name: 'Query 1 (tools first): Complex tool calling with multiple parameters', prompt: toolsFirstQuery1 },
-      // { name: 'Query 1 (tools last): Complex tool calling with multiple parameters', prompt: toolsLastQuery1 },
-      // { name: 'Query 2 (tools first): Math calculation and ambiguous query', prompt: toolsFirstQuery2 },
-      // { name: 'Query 2 (tools last): Math calculation and ambiguous query', prompt: toolsLastQuery2 },
-      // { name: 'Query 3 (tools first): Conversation context with tools', prompt: toolsFirstQuery3 },
-      // { name: 'Query 3 (tools last): Conversation context with tools', prompt: toolsLastQuery3 }
+      { name: 'Query 1 (tools last): Complex tool calling with multiple parameters', prompt: toolsLastQuery1 },
+      { name: 'Query 2 (tools first): Math calculation and ambiguous query', prompt: toolsFirstQuery2 },
+      { name: 'Query 2 (tools last): Math calculation and ambiguous query', prompt: toolsLastQuery2 },
+      { name: 'Query 3 (tools first): Conversation context with tools', prompt: toolsFirstQuery3 },
+      { name: 'Query 3 (tools last): Conversation context with tools', prompt: toolsLastQuery3 }
     ]
 
     const toolCallResults = []
     for (const query of queries) {
-      const result = await runQuery(model, query, extractToolCalls)
+      const result = await runQuery(model, query)
       toolCallResults.push(result)
     }
 

From 53a87745bace0e54c071e28c1ab289ce7f2f9b5a Mon Sep 17 00:00:00 2001
From: Mikhail Sotnikov <mialsot@gmail.com>
Date: Thu, 12 Feb 2026 12:20:31 +0300
Subject: [PATCH 05/11] (draft) logger experiments

---
 .../examples/toolCalling.js                   | 20 +++++++++++++++++++
 .../qvac-lib-infer-llamacpp-llm/package.json  |  1 +
 .../qvac-lib-infer-llamacpp-llm/sample.json   | 14 +++++++++++++
 3 files changed, 35 insertions(+)
 create mode 100644 packages/qvac-lib-infer-llamacpp-llm/sample.json

diff --git a/packages/qvac-lib-infer-llamacpp-llm/examples/toolCalling.js b/packages/qvac-lib-infer-llamacpp-llm/examples/toolCalling.js
index b124fcea23..cd80da3e36 100644
--- a/packages/qvac-lib-infer-llamacpp-llm/examples/toolCalling.js
+++ b/packages/qvac-lib-infer-llamacpp-llm/examples/toolCalling.js
@@ -3,6 +3,7 @@
 const Corestore = require('corestore')
 const HyperDriveDL = require('@qvac/dl-hyperdrive')
 const LlmLlamacpp = require('../index')
+const { setLogger, releaseLogger } = require('../addonLogging')
 const process = require('bare-process')
 
 // Helper functions
@@ -74,6 +75,24 @@ async function main () {
   console.log('Tool Calling Example: Demonstrates tool calling capabilities')
   console.log('============================================================')
 
+  // IMPORTANT: Set up the logger FIRST, before creating any addon instances
+  console.log('Setting up C++ logger...')
+
+  setLogger((priority, message) => {
+    const priorityNames = {
+      0: 'ERROR',
+      1: 'WARNING',
+      2: 'INFO',
+      3: 'DEBUG',
+      4: 'OFF'
+    }
+
+    const priorityName = priorityNames[priority] || 'UNKNOWN'
+    const timestamp = new Date().toISOString()
+
+    console.log(`[${timestamp}] [C++ log] [${priorityName}]: ${message}`)
+  })
+
   // 1. Initializing data loader
   const store = new Corestore('./store')
   const hdStore = store.namespace('hd')
@@ -368,6 +387,7 @@ async function main () {
     await store.close()
     await hdDL.close()
     await model.unload()
+    releaseLogger()
   }
 }
 
diff --git a/packages/qvac-lib-infer-llamacpp-llm/package.json b/packages/qvac-lib-infer-llamacpp-llm/package.json
index 5a2e947735..1858e6c3dd 100644
--- a/packages/qvac-lib-infer-llamacpp-llm/package.json
+++ b/packages/qvac-lib-infer-llamacpp-llm/package.json
@@ -70,6 +70,7 @@
   },
   "dependencies": {
     "@qvac/infer-base": "^0.2.2",
+    "@qvac/llm-llamacpp": "^0.8.9",
     "bare-path": "^3.0.0",
     "bare-process": "^4.2.2"
   },
diff --git a/packages/qvac-lib-infer-llamacpp-llm/sample.json b/packages/qvac-lib-infer-llamacpp-llm/sample.json
new file mode 100644
index 0000000000..cbfdd20c8a
--- /dev/null
+++ b/packages/qvac-lib-infer-llamacpp-llm/sample.json
@@ -0,0 +1,14 @@
+[
+  {
+    "name": "searchProducts",
+    "arguments": {"query": "laptops under $1000", "category": "electronics", "maxPrice": 1000}
+  },
+  {
+    "name": "addToCart",
+    "arguments": {"items": [{"productId": "laptop-123", "quantity": 2}}
+  },
+  {
+    "name": "queryDB",
+    "arguments": {"table": "users", "conditions": {"field": "age", "operator": "greaterThan", "value": 25}, "limit": 50, "includeMetadata": true}
+  }
+]

From 0f1ba887e1174f6cad841e47d754e6ba3f01036c Mon Sep 17 00:00:00 2001
From: Mikhail Sotnikov <mialsot@gmail.com>
Date: Fri, 27 Feb 2026 01:19:12 +0300
Subject: [PATCH 06/11] (internal) dynamic tools: test cases with kvCache

---
 .../examples/llamacpp-dynamic-tools.ts        | 219 ++++++++++++++++++
 1 file changed, 219 insertions(+)
 create mode 100644 packages/qvac-sdk/examples/llamacpp-dynamic-tools.ts

diff --git a/packages/qvac-sdk/examples/llamacpp-dynamic-tools.ts b/packages/qvac-sdk/examples/llamacpp-dynamic-tools.ts
new file mode 100644
index 0000000000..98d1e4b4d1
--- /dev/null
+++ b/packages/qvac-sdk/examples/llamacpp-dynamic-tools.ts
@@ -0,0 +1,219 @@
+import { z } from "zod";
+import {
+  completion,
+  loadModel,
+  unloadModel,
+  type ToolCall,
+  type CompletionStats,
+  QWEN_3_1_7B_INST_Q4,
+} from "@/index";
+
+// Define Zod schemas for tool parameters
+const weatherSchema = z.object({
+  city: z.string().describe("City name"),
+  country: z.string().describe("Country code").optional(),
+});
+
+const horoscopeSchema = z.object({
+  sign: z.string().describe("An astrological sign like Taurus or Aquarius"),
+});
+
+// Map tool names to their schemas for runtime validation
+const toolSchemas = {
+  get_weather: weatherSchema,
+  get_horoscope: horoscopeSchema,
+};
+
+// Simple tool definitions - just name, description, and Zod schema!
+const tools1 = [
+  {
+    name: "get_weather",
+    description: "Get current weather for a city",
+    parameters: weatherSchema,
+  },
+];
+
+const tools2 = [
+  {
+    name: "get_horoscope",
+    description: "Get today's horoscope for an astrological sign",
+    parameters: horoscopeSchema,
+  },
+];
+
+const toolsAll = [
+  {
+    name: "get_weather",
+    description: "Get current weather for a city",
+    parameters: weatherSchema,
+  },
+  {
+    name: "get_horoscope",
+    description: "Get today's horoscope for an astrological sign",
+    parameters: horoscopeSchema,
+  },
+];
+
+async function chatSession ({ modelId, history, tools, kvCache }) {
+  const result = completion({ modelId, history, kvCache, stream: true, tools });
+
+  // Consume token stream
+  const tokensTask = (async () => {
+    for await (const token of result.tokenStream) {
+      process.stdout.write(token);
+    }
+  })();
+
+  // Consume tool call events
+  const toolsTask = (async () => {
+    for await (const evt of result.toolCallStream) {
+      if (evt.type === "toolCall") {
+        console.log(
+          `\n\n→ Tool Call Detected: ${evt.call.name}(${JSON.stringify(evt.call.arguments)})`,
+        );
+        console.log(`   ID: ${evt.call.id}`);
+      } else if (evt.type === "toolCallError") {
+        console.warn(`\n⚠️  Tool Error: ${evt.error.message}`);
+        console.warn(`   Code: ${evt.error.code}`);
+      }
+    }
+  })();
+
+  await Promise.all([tokensTask, toolsTask]);
+
+  const stats: CompletionStats | undefined = await result.stats;
+  const toolCalls: ToolCall[] = await result.toolCalls;
+
+  console.log("\n\n📋 Parsed Tool Calls:");
+  if (toolCalls.length > 0) {
+    for (const call of toolCalls) {
+      console.log(`  - ${call.name}(${JSON.stringify(call.arguments)})`);
+
+      const schema = toolSchemas[call.name as keyof typeof toolSchemas];
+      if (schema) {
+        const validated = schema.safeParse(call.arguments);
+        if (validated.success) {
+          console.log(`    ✓ Arguments validated with Zod`);
+        } else {
+          console.log(`    ✗ Validation failed:`, validated.error);
+        }
+      }
+    }
+  } else {
+    console.log("  No tool calls detected in response");
+  }
+
+  console.log("\n📊 Performance Stats:", stats);
+
+  // Execute tool calls and send results back to the model
+  if (toolCalls.length > 0) {
+    console.log("\n\n🔧 Simulating Tool Execution...");
+
+    // Simulate tool execution (in a real app, you'd call actual APIs)
+    const toolResults = toolCalls.map((call) => {
+      let result = "";
+      if (call.name === "get_weather") {
+        const args = call.arguments as { city: string; country?: string };
+        result = `The weather in ${args.city} is sunny, 22°C with light clouds.`;
+      } else if (call.name === "get_horoscope") {
+        const args = call.arguments as { sign: string };
+        result = `Horoscope for ${args.sign}: Today is a great day for new beginnings and creative endeavors!`;
+      }
+      console.log(`  ✓ ${call.name}: ${result}`);
+      return { toolCallId: call.id, result };
+    });
+
+    // Add tool results to conversation history
+    history.push({
+      role: "assistant",
+      content: await result.text,
+    });
+
+    // Add tool results as tool messages
+    for (const toolResult of toolResults) {
+      history.push({
+        role: "tool",
+        content: toolResult.result,
+      });
+    }
+  }
+
+  // Send follow-up question with tool results
+  console.log("\n\n🤖 Follow-up Response with Tool Results:");
+  const followUpResult = completion({
+    modelId,
+    history,
+    stream: true,
+    kvCache,
+    tools,
+  });
+
+  history.push({
+    role: "assistant",
+    content: await followUpResult.text,
+  });
+
+  for await (const token of followUpResult.tokenStream) {
+    process.stdout.write(token);
+  }
+
+
+  const followUpStats = await followUpResult.stats;
+  console.log("\n\n📊 Follow-up Stats:", followUpStats);
+}
+
+async function runTest({ kvCache, toolVariants }) {
+  console.log('run cache id=', kvCache)
+  try {
+    // Load model from provided file path with tools support enabled
+    const modelId = await loadModel({
+      modelSrc: QWEN_3_1_7B_INST_Q4,
+      modelType: "llm",
+      modelConfig: {
+        ctx_size: 4096,
+        tools: true, // Enable tools support
+      },
+      onProgress: (progress) =>
+        console.log(`Loading: ${progress.percentage.toFixed(1)}%`),
+    });
+    console.log(`✅ Model loaded successfully! Model ID: ${modelId}`);
+
+    // Create conversation history
+    const history = [
+      {
+        role: "system",
+        content:
+          "You are a helpful assistant that can use tools.",
+          // "You are a helpful assistant that can use tools to get the weather and horoscope.",
+      },
+      {
+        role: "user",
+        content: "What's the weather in Tokyo?",
+      },
+    ];
+
+
+
+    console.log("\n🤖 AI Response:");
+    console.log("(Streaming with tool definitions in prompt)\n");
+
+    await chatSession({ modelId, history, tools: toolVariants[0], kvCache })
+
+    history.push({
+      role: "user",
+      content: "if the weather in Tokyo is good, could you check my horoscope for Aquarius? ",
+    })
+
+    await chatSession({ modelId, history, tools: toolVariants[1], kvCache })
+
+
+    console.log("\n\n🎉 Completed!");
+    await unloadModel({ modelId, clearStorage: false });
+  } catch (error) {
+    console.error("❌ Error:", error);
+    process.exit(1);
+  }
+}
+// using same kvCache for a single session
+await runTest({ kvCache: `id-${Date.now()}`, toolVariants: [toolsAll, toolsAll] })
+await runTest({ kvCache: `id-${Date.now()}`, toolVariants: [tools1, tools2] })

From a9c66c13c8f97a1fca7324da369dfde3e41b4c97 Mon Sep 17 00:00:00 2001
From: Mikhail Sotnikov <mialsot@gmail.com>
Date: Fri, 27 Feb 2026 01:47:12 +0300
Subject: [PATCH 07/11] (internal) dynamic tools cache: enable log, skip checks

---
 packages/qvac-sdk/examples/llamacpp-dynamic-tools.ts            | 2 ++
 .../server/bare/addons/llamacpp-completion/cache-logger.ts      | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/packages/qvac-sdk/examples/llamacpp-dynamic-tools.ts b/packages/qvac-sdk/examples/llamacpp-dynamic-tools.ts
index 98d1e4b4d1..00a74e35bc 100644
--- a/packages/qvac-sdk/examples/llamacpp-dynamic-tools.ts
+++ b/packages/qvac-sdk/examples/llamacpp-dynamic-tools.ts
@@ -1,3 +1,5 @@
+/* eslint-disable */
+// @ts-nocheck
 import { z } from "zod";
 import {
   completion,
diff --git a/packages/qvac-sdk/server/bare/addons/llamacpp-completion/cache-logger.ts b/packages/qvac-sdk/server/bare/addons/llamacpp-completion/cache-logger.ts
index 1790cd878f..b06d25cc84 100644
--- a/packages/qvac-sdk/server/bare/addons/llamacpp-completion/cache-logger.ts
+++ b/packages/qvac-sdk/server/bare/addons/llamacpp-completion/cache-logger.ts
@@ -25,7 +25,7 @@ function formatMessages(messages: ChatMessage[]): string {
 
 export function logCacheStatus(cacheKey: string, isReusing: boolean): void {
   const status = isReusing ? "REUSING" : "CREATING";
-  logger.debug(`[kv-cache] [${cacheKey}] ${status} cache`);
+  logger.info(`[kv-cache] [${cacheKey}] ${status} cache`);
 }
 
 export function logCacheInit(

From 5a9afc3a9dcd0791e44d71bffa13f114d161dec7 Mon Sep 17 00:00:00 2001
From: Mikhail Sotnikov <mialsot@gmail.com>
Date: Fri, 27 Feb 2026 23:51:12 +0300
Subject: [PATCH 08/11] (improvement) llamacpp-llm addon: qwen template dynamic
 tools

---
 .../addon/src/utils/QwenTemplate.cpp          |  26 +-
 .../examples/dynamicToolCalling.js            | 426 ++++++++++++++++++
 .../examples/llamacpp-dynamic-tools.ts        |   2 +-
 3 files changed, 438 insertions(+), 16 deletions(-)
 create mode 100644 packages/qvac-lib-infer-llamacpp-llm/examples/dynamicToolCalling.js

diff --git a/packages/qvac-lib-infer-llamacpp-llm/addon/src/utils/QwenTemplate.cpp b/packages/qvac-lib-infer-llamacpp-llm/addon/src/utils/QwenTemplate.cpp
index 70bb99a753..94d9ea4290 100644
--- a/packages/qvac-lib-infer-llamacpp-llm/addon/src/utils/QwenTemplate.cpp
+++ b/packages/qvac-lib-infer-llamacpp-llm/addon/src/utils/QwenTemplate.cpp
@@ -4,21 +4,8 @@ namespace qvac_lib_inference_addon_llama {
 namespace utils {
 
 const char* getFixedQwen3Template() {
-  return R"({%- if tools %}
-    {{- '<|im_start|>system\n' }}
-    {%- if messages[0].role == 'system' %}
-        {{- messages[0].content + '\n\n' }}
-    {%- endif %}
-    {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
-    {%- for tool in tools %}
-        {{- "\n" }}
-        {{- tool | tojson }}
-    {%- endfor %}
-    {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
-{%- else %}
-    {%- if messages[0].role == 'system' %}
-        {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
-    {%- endif %}
+  return R"({%- if messages[0].role == 'system' %}
+    {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
 {%- endif %}
 {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
 {%- for message in messages[::-1] %}
@@ -81,6 +68,15 @@ const char* getFixedQwen3Template() {
         {%- endif %}
     {%- endif %}
 {%- endfor %}
+{%- if tools %}
+    {{- '<|im_start|>system\n' }}
+    {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
+    {%- for tool in tools %}
+        {{- "\n" }}
+        {{- tool | tojson }}
+    {%- endfor %}
+    {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
+{%- endif %}
 {%- if add_generation_prompt %}
     {{- '<|im_start|>assistant\n' }}
     {%- if enable_thinking is defined and enable_thinking is false %}
diff --git a/packages/qvac-lib-infer-llamacpp-llm/examples/dynamicToolCalling.js b/packages/qvac-lib-infer-llamacpp-llm/examples/dynamicToolCalling.js
new file mode 100644
index 0000000000..a66f524d22
--- /dev/null
+++ b/packages/qvac-lib-infer-llamacpp-llm/examples/dynamicToolCalling.js
@@ -0,0 +1,426 @@
+'use strict'
+
+const Corestore = require('corestore')
+const HyperDriveDL = require('@qvac/dl-hyperdrive')
+const LlmLlamacpp = require('../index')
+const { setLogger, releaseLogger } = require('../addonLogging')
+const process = require('bare-process')
+
+// Helper functions
+function createSeparator (char = '=', length = 80) {
+  return char.repeat(length)
+}
+
+function extractToolCalls (response) {
+  const toolCalls = []
+  const toolCallRegex = /<tool_call>([\s\S]*?)<\/tool_call>/g
+  let match
+  while ((match = toolCallRegex.exec(response)) !== null) {
+    try {
+      const toolCallJson = match[1].trim()
+      const toolCall = JSON.parse(toolCallJson)
+      toolCalls.push(toolCall)
+    } catch (e) {
+      // Skip invalid JSON
+    }
+  }
+  return toolCalls
+}
+
+async function runQuery (model, query) {
+  console.log(`\n${createSeparator()}`)
+  console.log(query.name)
+  console.log(createSeparator())
+  console.log('\nThinking and Response:')
+  console.log(createSeparator('-'))
+
+  const response = await model.run(query.prompt)
+  let fullResponse = ''
+
+  await response
+    .onUpdate(data => {
+      process.stdout.write(data)
+      fullResponse += data
+    })
+    .await()
+
+  console.log('\n')
+  console.log(createSeparator('-'))
+  console.log('\nFull Response:')
+  console.log(fullResponse)
+  console.log(`\nInference Stats: ${JSON.stringify(response.stats, null, 2)}`)
+  console.log('\n')
+
+  return { name: query.name, toolCalls: extractToolCalls(fullResponse) }
+}
+
+function printToolCallSummary (results) {
+  console.log(`\n${createSeparator()}`)
+  console.log('Tool Call Summary')
+  console.log(createSeparator())
+  for (const result of results) {
+    console.log(`\n${result.name}:`)
+    if (result.toolCalls.length === 0) {
+      console.log('  No tool calls found')
+    } else {
+      for (const toolCall of result.toolCalls) {
+        console.log(`  ${toolCall.name} ${JSON.stringify(toolCall.arguments)}`)
+      }
+    }
+  }
+  console.log(`\n${createSeparator()}`)
+}
+
+async function main () {
+  console.log('Tool Calling Example: Demonstrates tool calling capabilities')
+  console.log('============================================================')
+
+  // IMPORTANT: Set up the logger FIRST, before creating any addon instances
+  console.log('Setting up C++ logger...')
+
+  setLogger((priority, message) => {
+    const priorityNames = {
+      0: 'ERROR',
+      1: 'WARNING',
+      2: 'INFO',
+      3: 'DEBUG',
+      4: 'OFF'
+    }
+
+    const priorityName = priorityNames[priority] || 'UNKNOWN'
+    const timestamp = new Date().toISOString()
+
+    console.log(`[${timestamp}] [C++ log] [${priorityName}]: ${message}`)
+  })
+
+  // 1. Initializing data loader
+  const store = new Corestore('./store')
+  const hdStore = store.namespace('hd')
+
+  const hdKey = '05d3d7ad9cd650f53c28f85e312ef09a645dd487845897958b3be8a19cb3aab9'
+  const hdDL = new HyperDriveDL({
+    key: `hd://${hdKey}`,
+    store: hdStore
+  })
+
+  // 2. Configuring model settings
+  const args = {
+    loader: hdDL,
+    opts: { stats: true },
+    logger: console,
+    modelName: 'Qwen3-1.7B-Q4_0.gguf',
+    diskPath: './models'
+  }
+
+  const config = {
+    device: 'gpu',
+    gpu_layers: '999',
+    ctx_size: '2048',
+    tools: 'true'
+  }
+
+  // 3. Loading model
+  await hdDL.ready()
+  const model = new LlmLlamacpp(args, config)
+  const closeLoader = true
+  let totalProgress = 0
+  const reportProgressCallback = (report) => {
+    if (typeof report === 'object' && Number(report.overallProgress) > totalProgress) {
+      process.stdout.write(
+        `\r${report.overallProgress}%: ${report.action} [${report.filesProcessed}/${report.totalFiles}] ${report.currentFileProgress}% ${report.currentFile}`
+      )
+      if (Number(report.currentFileProgress) === 100) {
+        process.stdout.write('\n')
+      }
+      totalProgress = Number(report.overallProgress)
+    }
+  }
+  await model.load(closeLoader, reportProgressCallback)
+
+  try {
+    // 4. Defining tool queries with function schemas
+    const systemMessageAmbiguous = {
+      role: 'system',
+      content: 'You are a helpful assistant with access to various tools. If request is ambiguous,skip tool calls.'
+    }
+
+    const tools1 = [
+      // Test handled by this function:
+      // - Multiple parameters with different types
+      // - Complex multiple tools with array parameters
+      {
+        type: 'function',
+        name: 'searchProducts',
+        description: 'Search products',
+        parameters: {
+          type: 'object',
+          properties: {
+            query: { type: 'string', description: 'Query' },
+            category: { type: 'string', enum: ['electronics', 'clothing', 'books'], description: 'Category' },
+            maxPrice: { type: 'number', minimum: 0, description: 'Max price' }
+          },
+          required: ['query']
+        },
+        handler: async ({ query, category, maxPrice }) => {
+          if (!query) throw new Error('searchProducts: invalid empty "query" field')
+          if (category !== 'electronics' || maxPrice !== 1000) {
+            return { data: '' }
+          }
+          return {
+            data: [
+              { productId: 'laptop-1', price: '$300' },
+              { productId: 'laptop-2', price: '$550' },
+              { productId: 'laptop-3', price: '$800' },
+            ]
+          }
+        },
+      },
+      // Test handled by this function:
+      // - Part of Complex multiple tools with array parameters test
+      {
+        type: 'function',
+        name: 'addToCart',
+        description: 'Add items to cart',
+        parameters: {
+          type: 'object',
+          properties: {
+            items: {
+              type: 'array',
+              items: {
+                type: 'object',
+                properties: {
+                  productId: { type: 'string', description: 'Product ID' },
+                  quantity: { type: 'integer', minimum: 1, description: 'Quantity' }
+                },
+                required: ['productId', 'quantity']
+              }
+            }
+          },
+          required: ['items']
+        }
+      },
+      // Test handled by this function:
+      // - Tool with boolean and optional parameters
+      // - Part of Complex multiple tools with nested object parameters test
+      {
+        type: 'function',
+        name: 'queryDB',
+        description: 'Query database',
+        parameters: {
+          type: 'object',
+          properties: {
+            table: { type: 'string', description: 'Table' },
+            conditions: {
+              type: 'object',
+              properties: {
+                field: { type: 'string', description: 'Field' },
+                operator: { type: 'string', enum: ['equals', 'greaterThan'], description: 'Operator' },
+                value: { type: 'string', description: 'Value' }
+              },
+              required: ['field', 'operator', 'value']
+            },
+            limit: { type: 'integer', minimum: 1, default: 10, description: 'Limit' },
+            includeMetadata: { type: 'boolean', default: false, description: 'Include metadata' }
+          },
+          required: ['table', 'conditions']
+        }
+      }
+
+    ]
+
+    const laptopPrompt = 'Search laptops under $1000 and add 2 items above $500 to a cart. Also, query users table age > 25 limit 50 with metadata.'
+
+    const toolsFirstQuery1 = [
+      systemMessageAmbiguous,
+      ...tools1,
+      {
+        role: 'user',
+        content: laptopPrompt,
+      }
+    ]
+    // { toolCallId: call.id, result }
+    const toolsFirstQuery1_1 = [
+      ...toolsFirstQuery1,
+      {
+        role: 'user',
+        content: 'Thank you!',
+      }
+    ]
+    const toolsLastQuery1 = [
+      systemMessageAmbiguous,
+      {
+        role: 'user',
+        content: laptopPrompt,
+      },
+      ...tools1
+    ]
+
+    const tools2 = [
+      // Test handled by this function:
+      // - Math/computation tool
+      {
+        type: 'function',
+        name: 'calculate',
+        description: 'Calculate math',
+        parameters: {
+          type: 'object',
+          properties: {
+            expression: { type: 'string', description: 'Expression' },
+            precision: { type: 'integer', minimum: 0, maximum: 10, default: 2, description: 'Precision' }
+          },
+          required: ['expression']
+        }
+      },
+      // Test handled by this function:
+      // - Invalid/ambiguous query
+      {
+        type: 'function',
+        name: 'calculateDistance',
+        description: 'Calculate distance between two coordinates',
+        parameters: {
+          type: 'object',
+          properties: {
+            lat1: { type: 'number', description: 'Latitude of point 1' },
+            lon1: { type: 'number', description: 'Longitude of point 1' },
+            lat2: { type: 'number', description: 'Latitude of point 2' },
+            lon2: { type: 'number', description: 'Longitude of point 2' }
+          },
+          required: ['lat1', 'lon1', 'lat2', 'lon2']
+        }
+      }
+    ]
+
+    const toolsFirstQuery2 = [
+      systemMessageAmbiguous,
+      ...tools2,
+      {
+        role: 'user',
+        content: 'calculate 156 * 23 precision 0. Also, How far is here from there?'
+      }
+    ]
+
+    const toolsLastQuery2 = [
+      systemMessageAmbiguous,
+      {
+        role: 'user',
+        content: 'calculate 156 * 23 precision 0. Also, How far is here from there?'
+      },
+      ...tools2
+    ]
+
+    const tools3 = [
+      // Test handled by this function:
+      // - Part of conversation context tool test
+      {
+        type: 'function',
+        name: 'getWeather',
+        description: 'Get weather forecast for a city',
+        parameters: {
+          type: 'object',
+          properties: {
+            city: { type: 'string', description: 'City name' },
+            date: { type: 'string', description: 'Date in YYYY-MM-DD' }
+          },
+          required: ['city', 'date']
+        }
+      },
+      // Test handled by this function:
+      // - Part of conversation context tool test
+      {
+        type: 'function',
+        name: 'createCalendarEvent',
+        description: 'Create a calendar event',
+        parameters: {
+          type: 'object',
+          properties: {
+            title: { type: 'string', description: 'Event title' },
+            date: { type: 'string', description: 'Event date (YYYY-MM-DD)' },
+            time: { type: 'string', description: 'Start time (HH:MM)' },
+            duration: { type: 'integer', description: 'Duration in minutes' }
+          },
+          required: ['title', 'date']
+        }
+      }
+    ]
+
+    const toolsFirstQuery3 = [
+      {
+        role: 'system',
+        content: 'You are a personal assistant.'
+      },
+      ...tools3,
+      {
+        role: 'user',
+        content: 'What is the weather in Seattle on April 10th?'
+      },
+      {
+        role: 'assistant',
+        content: 'Let me check that for you. Do you need hourly or just daily?'
+      },
+      {
+        role: 'user',
+        content: 'Daily is fine. Also, schedule a team meeting on April 10th at 2 PM for 60 minutes.'
+      }
+    ]
+
+    const toolsLastQuery3 = [
+      {
+        role: 'system',
+        content: 'You are a personal assistant.'
+      },
+      {
+        role: 'user',
+        content: 'What is the weather in Seattle on April 10th?'
+      },
+      {
+        role: 'assistant',
+        content: 'Let me check that for you. Do you need hourly or just daily?'
+      },
+      {
+        role: 'user',
+        content: 'Daily is fine. Also, schedule a team meeting on April 10th at 2 PM for 60 minutes.'
+      },
+      ...tools3
+    ]
+
+    // 5. Running tool calling queries
+    const queries = [
+      { name: 'Query 1 (tools first): Complex tool calling with multiple parameters', prompt: toolsFirstQuery1 },
+      { name: 'Query 1 (tools first-thanks): Complex tool calling with multiple parameters', prompt: toolsFirstQuery1_1 },
+      // { name: 'Query 1 (tools last): Complex tool calling with multiple parameters', prompt: toolsLastQuery1 },
+      // { name: 'Query 2 (tools first): Math calculation and ambiguous query', prompt: toolsFirstQuery2 },
+      // { name: 'Query 2 (tools last): Math calculation and ambiguous query', prompt: toolsLastQuery2 },
+      // { name: 'Query 3 (tools first): Conversation context with tools', prompt: toolsFirstQuery3 },
+      // { name: 'Query 3 (tools last): Conversation context with tools', prompt: toolsLastQuery3 }
+    ]
+
+    const toolCallResults = []
+    for (const query of queries) {
+      const result = await runQuery(model, query)
+      toolCallResults.push(result)
+    }
+
+    // Print all tool calls together at the end
+    printToolCallSummary(toolCallResults)
+  } catch (error) {
+    const errorMessage = error?.message || error?.toString() || String(error)
+    console.error('Error occurred:', errorMessage)
+    console.error('Error details:', error)
+  } finally {
+    // 6. Cleaning up resources
+    await store.close()
+    await hdDL.close()
+    await model.unload()
+    releaseLogger()
+  }
+}
+
+main().catch(error => {
+  console.error('Fatal error in main function:', {
+    error: error.message,
+    stack: error.stack,
+    timestamp: new Date().toISOString()
+  })
+  process.exit(1)
+})
+
diff --git a/packages/qvac-sdk/examples/llamacpp-dynamic-tools.ts b/packages/qvac-sdk/examples/llamacpp-dynamic-tools.ts
index 00a74e35bc..9cd6bb2526 100644
--- a/packages/qvac-sdk/examples/llamacpp-dynamic-tools.ts
+++ b/packages/qvac-sdk/examples/llamacpp-dynamic-tools.ts
@@ -169,7 +169,7 @@ async function runTest({ kvCache, toolVariants }) {
   try {
     // Load model from provided file path with tools support enabled
     const modelId = await loadModel({
-      modelSrc: QWEN_3_1_7B_INST_Q4,
+      modelSrc: 'dynamic-tools_Qwen3-1.7B-Q4_0.gguf',// QWEN_3_1_7B_INST_Q4,
       modelType: "llm",
       modelConfig: {
         ctx_size: 4096,

From b17011de1fff0af9d51769712d1b9d504c98c518 Mon Sep 17 00:00:00 2001
From: Mikhail Sotnikov <mialsot@gmail.com>
Date: Mon, 2 Mar 2026 16:32:44 +0300
Subject: [PATCH 09/11] (draft) sdk server: completion dynamic tools option

---
 .../handlers/completion-stream.ts             | 25 ++++++++++++++++---
 .../server/bare/registry/model-registry.ts    |  1 +
 .../qvac-sdk/server/utils/tool-integration.ts |  7 ++++++
 3 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/packages/qvac-sdk/server/bare/addons/llamacpp-completion/handlers/completion-stream.ts b/packages/qvac-sdk/server/bare/addons/llamacpp-completion/handlers/completion-stream.ts
index 793d33bc44..ef69e511e6 100644
--- a/packages/qvac-sdk/server/bare/addons/llamacpp-completion/handlers/completion-stream.ts
+++ b/packages/qvac-sdk/server/bare/addons/llamacpp-completion/handlers/completion-stream.ts
@@ -29,6 +29,7 @@ import {
   type AnyModel,
 } from "@/server/bare/registry/model-registry";
 import {
+  appendToolsToHistory,
   checkForToolEvents,
   insertToolsIntoHistory,
   setupToolGrammar,
@@ -160,13 +161,17 @@ function prepareMessagesForCache(
     content: string;
     attachments?: { path: string }[] | undefined;
   }[],
+  tools?: Tool[]
 ): ChatHistory[] {
+  const transformedTools = tools ? transformMessages(tools) : [];
   if (cacheExists && history.length > 0) {
     const lastMessage = history[history.length - 1];
     const lastTransformedMessages = transformMessage(lastMessage!);
+
     return [
       { role: "session", content: cachePathToUse },
       ...lastTransformedMessages,
+      ...transformedTools,
     ];
   }
 
@@ -177,13 +182,14 @@ function prepareMessagesForCache(
   return [
     { role: "session", content: cachePathToUse },
     ...transformedHistoryWithoutSystem,
+    ...transformedTools,
   ];
 }
 
 async function* processModelResponse(
   model: AnyModel,
   messagesToSend: ChatHistory[],
-  shouldSaveCache: boolean,
+  shouldSaveCache: boolean, // TODO: start here
   tools?: Tool[],
 ): AsyncGenerator<
   { token: string; toolCallEvent?: ToolCallEvent },
@@ -250,6 +256,10 @@ export async function* completion(
 
   const modelConfig = getModelConfig(modelId);
   const toolsEnabled = (modelConfig as { tools?: boolean }).tools === true;
+  // TODO: dynamicTools as a "Tool[]" param
+  const dynamicTools = toolsEnabled && (
+    (modelConfig as { dynamicTools?: boolean }).dynamicTools === true
+  );
 
   let historyWithTools: Array<
     | {
@@ -261,7 +271,11 @@ export async function* completion(
   > = history;
 
   if (tools && tools.length > 0 && toolsEnabled) {
-    historyWithTools = insertToolsIntoHistory(history, tools);
+    if (dynamicTools) {
+      historyWithTools = appendToolsToHistory(history, tools)
+    } else {
+      historyWithTools = insertToolsIntoHistory(history, tools);
+    }
     setupToolGrammar(modelConfig as Record<string, unknown>, tools);
   }
 
@@ -291,7 +305,7 @@ export async function* completion(
           cachePathToUse,
           systemPromptToUse,
           kvCache,
-          tools && toolsEnabled ? tools : undefined,
+          (tools && toolsEnabled && !dynamicTools) ? tools : undefined,
         );
         markCacheInitialized(modelId, configHash, kvCache);
         cacheExists = true;
@@ -301,6 +315,7 @@ export async function* completion(
         cachePathToUse,
         cacheExists,
         history,
+        (tools && toolsEnabled && dynamicTools) ? tools : undefined,
       );
       logMessagesToAddon(messagesToSend, "PROMPT_SEND");
 
@@ -338,7 +353,7 @@ export async function* completion(
           cachePathToUse,
           systemPromptToUse,
           "auto",
-          tools && toolsEnabled ? tools : undefined,
+          undefined, //tools && toolsEnabled ? tools : undefined,
         );
         markCacheInitialized(modelId, configHash, currentCacheInfo.cacheKey);
         cacheExists = true;
@@ -348,6 +363,8 @@ export async function* completion(
         cachePathToUse,
         cacheExists,
         history,
+        // (tools && toolsEnabled && dynamicTools) ? tools : undefined,
+        tools,
       );
       logMessagesToAddon(messagesToSend, "PROMPT_SEND");
 
diff --git a/packages/qvac-sdk/server/bare/registry/model-registry.ts b/packages/qvac-sdk/server/bare/registry/model-registry.ts
index 3e9c0d187b..21aa6ea3e2 100644
--- a/packages/qvac-sdk/server/bare/registry/model-registry.ts
+++ b/packages/qvac-sdk/server/bare/registry/model-registry.ts
@@ -81,6 +81,7 @@ export function registerModel(
       `Delegated model registered: ${id} -> topic: ${topic}, provider: ${providerPublicKey}, timeout: ${timeout}ms`,
     );
   } else {
+    // TODO: modelRegistry.set - "config.tools"?
     modelRegistry.set(id, {
       id,
       isDelegated: false,
diff --git a/packages/qvac-sdk/server/utils/tool-integration.ts b/packages/qvac-sdk/server/utils/tool-integration.ts
index 905a6c44bc..96b2a633e3 100644
--- a/packages/qvac-sdk/server/utils/tool-integration.ts
+++ b/packages/qvac-sdk/server/utils/tool-integration.ts
@@ -27,6 +27,13 @@ export function insertToolsIntoHistory(
   return [...tools, ...history];
 }
 
+export function appendToolsToHistory(
+  history: HistoryMessage[],
+  tools: Tool[],
+): Array<HistoryMessage | Tool> {
+  return [...history, ...tools];
+}
+
 export function setupToolGrammar(
   modelConfig: Record<string, unknown>,
   tools: Tool[],

From 933c96e6fcd94d482501504045d85e23e0e5ade6 Mon Sep 17 00:00:00 2001
From: Mikhail Sotnikov <mialsot@gmail.com>
Date: Tue, 3 Mar 2026 00:08:40 +0300
Subject: [PATCH 10/11] (internal) llamaccp-llm: tools tests imp with cache,
 verbose

---
 .../addon/src/utils/ChatTemplateUtils.cpp           |  2 ++
 .../examples/dynamicToolCalling.js                  | 13 +++++++++----
 packages/qvac-lib-infer-llamacpp-llm/index.js       |  2 +-
 3 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/packages/qvac-lib-infer-llamacpp-llm/addon/src/utils/ChatTemplateUtils.cpp b/packages/qvac-lib-infer-llamacpp-llm/addon/src/utils/ChatTemplateUtils.cpp
index 952b49ed53..72a75aa439 100644
--- a/packages/qvac-lib-infer-llamacpp-llm/addon/src/utils/ChatTemplateUtils.cpp
+++ b/packages/qvac-lib-infer-llamacpp-llm/addon/src/utils/ChatTemplateUtils.cpp
@@ -68,6 +68,8 @@ std::string getChatTemplateForModel(
 
   // For Qwen3 models, use the fixed template
   if (isQwen3Model(model)) {
+    QLOG_IF(
+        Priority::ERROR, "[ChatTemplateUtils] Using CHANGED Qwen3 template\n");
     return getFixedQwen3Template();
   }
 
diff --git a/packages/qvac-lib-infer-llamacpp-llm/examples/dynamicToolCalling.js b/packages/qvac-lib-infer-llamacpp-llm/examples/dynamicToolCalling.js
index a66f524d22..53f3485390 100644
--- a/packages/qvac-lib-infer-llamacpp-llm/examples/dynamicToolCalling.js
+++ b/packages/qvac-lib-infer-llamacpp-llm/examples/dynamicToolCalling.js
@@ -109,14 +109,18 @@ async function main () {
     opts: { stats: true },
     logger: console,
     modelName: 'Qwen3-1.7B-Q4_0.gguf',
-    diskPath: './models'
+    diskPath: './models',
+    modelConfig: {
+      kvCache: true,
+    }
   }
 
   const config = {
     device: 'gpu',
     gpu_layers: '999',
-    ctx_size: '2048',
-    tools: 'true'
+    ctx_size: '4096',
+    verbosity: '3',
+    tools: 'true',
   }
 
   // 3. Loading model
@@ -228,7 +232,8 @@ async function main () {
 
     ]
 
-    const laptopPrompt = 'Search laptops under $1000 and add 2 items above $500 to a cart. Also, query users table age > 25 limit 50 with metadata.'
+    const laptopPrompt = 'Search laptops under $1000 and then, query users table age > 25 limit 50 with metadata. After if there are any users older than 25, add the cheapest found prev laptop to the cart'
+    // const laptopPrompt = 'Search laptops under $1000 and then - from those returned by the search, need to add 2 items, cheapest but in $500-$1000 range, to a cart. Also, query users table age > 25 limit 50 with metadata.'
 
     const toolsFirstQuery1 = [
       systemMessageAmbiguous,
diff --git a/packages/qvac-lib-infer-llamacpp-llm/index.js b/packages/qvac-lib-infer-llamacpp-llm/index.js
index 278752e52f..5efd48fe4b 100644
--- a/packages/qvac-lib-infer-llamacpp-llm/index.js
+++ b/packages/qvac-lib-infer-llamacpp-llm/index.js
@@ -105,7 +105,7 @@ class LlmLlamacpp extends BaseInference {
    * @returns {Addon} The instantiated addon interface
    */
   _createAddon (configurationParams) {
-    this.logger.info(
+    this.logger.error(
       'Creating Llama interface with configuration:',
       configurationParams
     )

From 2c2b518d85d217072553b7e17a2eb3f3c083a489 Mon Sep 17 00:00:00 2001
From: Mikhail Sotnikov <mialsot@gmail.com>
Date: Tue, 3 Mar 2026 02:25:25 +0300
Subject: [PATCH 11/11] (internal) llamacpp-llm: cache dynamic tools impl

---
 .../src/model-interface/CacheManager.cpp      |  40 +
 .../addon/src/model-interface/LlamaModel.cpp  |   4 +-
 .../addon/src/model-interface/LlmContext.hpp  |   8 +
 .../src/model-interface/MtmdLlmContext.cpp    |   2 +
 .../src/model-interface/MtmdLlmContext.hpp    |   7 +
 .../src/model-interface/TextLlmContext.cpp    |  22 +
 .../src/model-interface/TextLlmContext.hpp    |  18 +
 .../test/unit/CMakeLists.txt                  |   1 +
 .../test/unit/test_tool_token_cache.cpp       | 761 ++++++++++++++++++
 9 files changed, 862 insertions(+), 1 deletion(-)
 create mode 100644 packages/qvac-lib-infer-llamacpp-llm/test/unit/test_tool_token_cache.cpp

diff --git a/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/CacheManager.cpp b/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/CacheManager.cpp
index ceab3c95ed..33b8d86f8e 100644
--- a/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/CacheManager.cpp
+++ b/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/CacheManager.cpp
@@ -177,6 +177,26 @@ bool CacheManager::loadCache() {
           "%s: attempting to load saved session from '%s'\n",
           __func__,
           sessionPath_.c_str()));
+
+  // Remove tool tokens from KV cache before saving
+  llama_pos toolTokenCount = llmContext_->getLastToolTokenCount();
+  if (toolTokenCount > 0) {
+    auto* mem = llama_get_memory(ctx);
+    llama_pos currentPast = llmContext_->getNPast();
+    llama_pos newNPast = currentPast - toolTokenCount;
+
+    if (newNPast > 0) {
+      llama_memory_seq_rm(mem, -1, newNPast, -1);
+      llmContext_->setNPast(newNPast);
+
+      QLOG_IF(
+          Priority::DEBUG,
+          string_format(
+              "%s: removed %d tool tokens before saving cache\n",
+              __func__,
+              toolTokenCount));
+    }
+  }
   if (!isFileInitialized(sessionPath_)) {
     QLOG_IF(
         Priority::DEBUG,
@@ -244,6 +264,26 @@ void CacheManager::saveCache() {
           __func__,
           sessionPath_.c_str()));
 
+  // Remove tool tokens from KV cache before saving
+  llama_pos toolTokenCount = llmContext_->getLastToolTokenCount();
+  if (toolTokenCount > 0) {
+    auto* mem = llama_get_memory(ctx);
+    llama_pos currentPast = llmContext_->getNPast();
+    llama_pos newNPast = currentPast - toolTokenCount;
+
+    if (newNPast > 0) {
+      llama_memory_seq_rm(mem, -1, newNPast, -1);
+      llmContext_->setNPast(newNPast);
+
+      QLOG_IF(
+          Priority::DEBUG,
+          string_format(
+              "%s: removed %d tool tokens before saving cache\n",
+              __func__,
+              toolTokenCount));
+    }
+  }
+
   llama_token sessionTokens[2] = {
       static_cast<llama_token>(llmContext_->getNPast()),
       static_cast<llama_token>(llmContext_->getFirstMsgTokens())};
diff --git a/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/LlamaModel.cpp b/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/LlamaModel.cpp
index 278298c381..a455a90002 100644
--- a/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/LlamaModel.cpp
+++ b/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/LlamaModel.cpp
@@ -667,7 +667,9 @@ std::unique_ptr<LlmContext> LlamaModel::CreateContext(
     return std::make_unique<MtmdLlmContext>(params, std::move(llamaInit));
   }
   isTextLlm = true;
-  return std::make_unique<TextLlmContext>(params, std::move(llamaInit));
+  auto ctx = std::make_unique<TextLlmContext>(params, std::move(llamaInit));
+  ctx->setCalculateToolTokenCount(params.use_jinja);
+  return ctx;
 }
 
 bool LlamaModel::LoadMedia(const LlamaModel::Input& input) {
diff --git a/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/LlmContext.hpp b/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/LlmContext.hpp
index c997566aa0..75a2571582 100644
--- a/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/LlmContext.hpp
+++ b/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/LlmContext.hpp
@@ -174,6 +174,14 @@ class LlmContext { // NOLINT(cppcoreguidelines-special-member-functions)
    */
   virtual llama_pos removeLastNTokens(llama_pos count) = 0;
 
+  /**
+   * Get the number of tool tokens from the last user message.
+   * Used for cache management when tools are appended after user messages.
+   *
+   * @return - the number of tool tokens.
+   */
+  [[nodiscard]] virtual llama_pos getLastToolTokenCount() const = 0;
+
   /**
    * The reset media method. It resets the media.
    *
diff --git a/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/MtmdLlmContext.cpp b/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/MtmdLlmContext.cpp
index 267531d2f4..582e436f5e 100644
--- a/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/MtmdLlmContext.cpp
+++ b/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/MtmdLlmContext.cpp
@@ -421,6 +421,8 @@ void MtmdLlmContext::setNPast(llama_pos nPast) { this->n_past = nPast; }
 
 llama_pos MtmdLlmContext::getFirstMsgTokens() const { return firstMsgTokens; }
 
+llama_pos MtmdLlmContext::getLastToolTokenCount() const { return 0; }
+
 void MtmdLlmContext::setFirstMsgTokens(llama_pos firstMsgTokens) {
   this->firstMsgTokens = firstMsgTokens;
 }
diff --git a/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/MtmdLlmContext.hpp b/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/MtmdLlmContext.hpp
index 4831373dee..322c289bfa 100644
--- a/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/MtmdLlmContext.hpp
+++ b/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/MtmdLlmContext.hpp
@@ -102,6 +102,13 @@ class MtmdLlmContext: public LlmContext {
    */
   void setFirstMsgTokens(llama_pos firstMsgTokens) override;
 
+  /**
+   * The get last tool token count method. It returns 0 for multimodal context.
+   *
+   * @return - 0 (not applicable for multimodal).
+   */
+  [[nodiscard]] llama_pos getLastToolTokenCount() const override;
+
   /**
    * The set n_discarded method. It sets the n_discarded.
    *
diff --git a/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/TextLlmContext.cpp b/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/TextLlmContext.cpp
index db76071c51..18a273a08f 100644
--- a/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/TextLlmContext.cpp
+++ b/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/TextLlmContext.cpp
@@ -228,6 +228,22 @@ void TextLlmContext::tokenizeChat(
         AddonID, toString(EmptyTokenizedInput), errorMsg);
   }
 
+  // Calculate tool token count by tokenizing without tools
+  lastToolTokenCount_ = 0;
+  if (calculateToolTokenCount_ && !tools.empty()) {
+    common_chat_templates_inputs inputsNoTools = inputs;
+    inputsNoTools.tools = {};
+    std::string promptNoTools = getPrompt(tmpls.get(), inputsNoTools);
+    std::vector<llama_token> tokensNoTools =
+        common_tokenize(lctx, promptNoTools, addSpecial, true);
+
+    if (!tokensNoTools.empty() && !inputTokens.empty() &&
+        inputTokens.size() > tokensNoTools.size()) {
+      lastToolTokenCount_ = static_cast<llama_pos>(
+          inputTokens.size() - tokensNoTools.size());
+    }
+  }
+
   // Encode the input if model has encoder
   if (llama_model_has_encoder(model) && n_past == 0 && !isCacheLoaded) {
     int encInputSize = static_cast<int>(inputTokens.size());
@@ -508,6 +524,12 @@ void TextLlmContext::setNPast(llama_pos nPast) { this->n_past = nPast; }
 
 llama_pos TextLlmContext::getFirstMsgTokens() const { return firstMsgTokens; }
 
+llama_pos TextLlmContext::getLastToolTokenCount() const { return lastToolTokenCount_; }
+
+void TextLlmContext::setCalculateToolTokenCount(bool enabled) {
+  calculateToolTokenCount_ = enabled;
+}
+
 void TextLlmContext::setFirstMsgTokens(llama_pos firstMsgTokens) {
   this->firstMsgTokens = firstMsgTokens;
 }
diff --git a/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/TextLlmContext.hpp b/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/TextLlmContext.hpp
index d4c004c348..7a515f821c 100644
--- a/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/TextLlmContext.hpp
+++ b/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/TextLlmContext.hpp
@@ -94,6 +94,22 @@ class TextLlmContext: public LlmContext {
    * @param first_msg_tokens - the first msg tokens.
    */
   void setFirstMsgTokens(llama_pos firstMsgTokens) override;
+
+  /**
+   * The get last tool token count method. It returns the number of tool tokens
+   * from the last user message.
+   *
+   * @return - the number of tool tokens.
+   */
+  [[nodiscard]] llama_pos getLastToolTokenCount() const override;
+
+  /**
+   * The set calculate tool token count method. It enables/disables
+   * tool token count calculation for cache management.
+   *
+   * @param enabled - whether to calculate tool token count.
+   */
+  void setCalculateToolTokenCount(bool enabled);
   /**
    * The set n_discarded method. It sets the n_discarded.
    *
@@ -157,6 +173,8 @@ class TextLlmContext: public LlmContext {
   llama_pos n_past = 0;           // NOLINT(readability-identifier-naming)
   llama_pos n_discarded = 0;      // NOLINT(readability-identifier-naming)
   llama_pos firstMsgTokens = 0;   // NOLINT(readability-identifier-naming)
+  llama_pos lastToolTokenCount_ = 0;  // NOLINT(readability-identifier-naming)
+  bool calculateToolTokenCount_ = true;  // NOLINT(readability-identifier-naming)
   ThreadPoolPtr threadpool;       // NOLINT(readability-identifier-naming)
   ThreadPoolPtr threadpool_batch; // NOLINT(readability-identifier-naming)
 
diff --git a/packages/qvac-lib-infer-llamacpp-llm/test/unit/CMakeLists.txt b/packages/qvac-lib-infer-llamacpp-llm/test/unit/CMakeLists.txt
index 8226cf3537..ef4c4ae83e 100644
--- a/packages/qvac-lib-infer-llamacpp-llm/test/unit/CMakeLists.txt
+++ b/packages/qvac-lib-infer-llamacpp-llm/test/unit/CMakeLists.txt
@@ -13,6 +13,7 @@ add_executable(
   # Implemented tests
   test_llama_model.cpp
   test_cache_management.cpp
+  test_tool_token_cache.cpp
   test_text_llm_context.cpp
   # Backend selection tests
   test_backend_selection.cpp
diff --git a/packages/qvac-lib-infer-llamacpp-llm/test/unit/test_tool_token_cache.cpp b/packages/qvac-lib-infer-llamacpp-llm/test/unit/test_tool_token_cache.cpp
new file mode 100644
index 0000000000..07235baedc
--- /dev/null
+++ b/packages/qvac-lib-infer-llamacpp-llm/test/unit/test_tool_token_cache.cpp
@@ -0,0 +1,761 @@
+#include <filesystem>
+#include <iostream>
+#include <memory>
+#include <string>
+#include <type_traits>
+#include <unordered_map>
+#include <variant>
+
+#include <gtest/gtest.h>
+#include <qvac-lib-inference-addon-cpp/RuntimeStats.hpp>
+
+#include "model-interface/LlamaModel.hpp"
+#include "test_common.hpp"
+
+namespace fs = std::filesystem;
+
+namespace {
+double getStatValue(
+    const qvac_lib_inference_addon_cpp::RuntimeStats& stats,
+    const std::string& key) {
+  for (const auto& stat : stats) {
+    if (stat.first == key) {
+      return std::visit(
+          [](const auto& value) -> double {
+            if constexpr (std::is_same_v<
+                              std::decay_t<decltype(value)>,
+                              double>) {
+              return value;
+            } else {
+              return static_cast<double>(value);
+            }
+          },
+          stat.second);
+    }
+  }
+  return 0.0;
+}
+} // namespace
+
+class ToolTokenCacheTest : public ::testing::Test {
+protected:
+  void SetUp() override {
+    config_files["device"] = test_common::getTestDevice();
+    config_files["ctx_size"] = "2048";
+    config_files["gpu_layers"] = test_common::getTestGpuLayers();
+    config_files["n_predict"] = "10";
+
+    fs::path basePath;
+    if (fs::exists(fs::path{"../../../models/unit-test"})) {
+      basePath = fs::path{"../../../models/unit-test"};
+    } else {
+      basePath = fs::path{"models/unit-test"};
+    }
+
+    fs::path modelPath = basePath / "Llama-3.2-1B-Instruct-Q4_0.gguf";
+    if (fs::exists(modelPath)) {
+      test_model_path = modelPath.string();
+    } else {
+      modelPath = basePath / "test_model.gguf";
+      if (fs::exists(modelPath)) {
+        test_model_path = modelPath.string();
+      } else {
+        test_model_path = "Llama-3.2-1B-Instruct-Q4_0.gguf";
+      }
+    }
+    test_projection_path = "";
+
+    fs::path backendDir;
+#ifdef TEST_BINARY_DIR
+    backendDir = fs::path(TEST_BINARY_DIR);
+#else
+    backendDir = fs::current_path() / "build" / "test" / "unit";
+#endif
+
+    config_files["backendsDir"] = backendDir.string();
+
+    session_with_tools_path = "test_session_with_tools.bin";
+    session_after_tools_path = "test_session_after_tools.bin";
+  }
+
+  void TearDown() override {
+    for (const auto& session_file :
+         {session_with_tools_path, session_after_tools_path}) {
+      if (fs::exists(session_file)) {
+        fs::remove(session_file);
+      }
+    }
+  }
+
+  bool hasValidModel() { return fs::exists(test_model_path); }
+
+  std::unique_ptr<LlamaModel> createModel() {
+    if (!hasValidModel()) {
+      return nullptr;
+    }
+    auto model = std::make_unique<LlamaModel>(
+        test_model_path, test_projection_path, config_files);
+    model->waitForLoadInitialization();
+    if (!model->isLoaded()) {
+      return nullptr;
+    }
+    return model;
+  }
+
+  std::unordered_map<std::string, std::string> config_files;
+  std::string test_model_path;
+  std::string test_projection_path;
+  std::string session_with_tools_path;
+  std::string session_after_tools_path;
+};
+
+TEST_F(ToolTokenCacheTest, CacheWithToolsBasic) {
+  if (!hasValidModel()) {
+    GTEST_SKIP() << "Test model not found";
+  }
+
+  auto model = createModel();
+  if (!model) {
+    GTEST_SKIP() << "Model failed to load";
+  }
+
+  std::string inputWithTools = R"([
+    {"role": "session", "content": "test_session_with_tools.bin"},
+    {"role": "user", "content": "What is the weather?"},
+    {"type": "function", "name": "getWeather", "description": "Get weather", "parameters": {"type": "object", "properties": {"city": {"type": "string"}}, "required": ["city"]}}
+  ])";
+
+  EXPECT_NO_THROW({
+    std::string output = model->process(inputWithTools);
+    auto stats = model->runtimeStats();
+    EXPECT_GE(stats.size(), 0);
+  });
+
+  std::string saveInput =
+      R"([{"role": "session", "content": "test_session_with_tools.bin"}, {"role": "session", "content": "save"}])";
+  EXPECT_NO_THROW({
+    std::string saveOutput = model->process(saveInput);
+    EXPECT_EQ(saveOutput.length(), 0);
+  });
+
+  EXPECT_TRUE(fs::exists(session_with_tools_path));
+}
+
+TEST_F(ToolTokenCacheTest, CachePersistenceWithTools) {
+  if (!hasValidModel()) {
+    GTEST_SKIP() << "Test model not found";
+  }
+
+  auto model1 = createModel();
+  if (!model1) {
+    GTEST_SKIP() << "Model failed to load";
+  }
+
+  std::string inputWithTools = R"([
+    {"role": "session", "content": "test_session_with_tools.bin"},
+    {"role": "user", "content": "What is bitcoin? Answer briefly."},
+    {"type": "function", "name": "search", "description": "Search", "parameters": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]}}
+  ])";
+
+  EXPECT_NO_THROW({
+    std::string output1 = model1->process(inputWithTools);
+    EXPECT_GE(output1.length(), 0);
+  });
+
+  std::string saveInput =
+      R"([{"role": "session", "content": "test_session_with_tools.bin"}, {"role": "session", "content": "save"}])";
+  EXPECT_NO_THROW({
+    std::string saveOutput = model1->process(saveInput);
+    EXPECT_EQ(saveOutput.length(), 0);
+  });
+
+  auto statsBefore = model1->runtimeStats();
+  double cacheTokensBefore = getStatValue(statsBefore, "CacheTokens");
+  EXPECT_GT(cacheTokensBefore, 0.0);
+
+  model1.reset();
+
+  auto model2 = createModel();
+  if (!model2) {
+    GTEST_SKIP() << "Model failed to load";
+  }
+
+  std::string inputAfterCache = R"([
+    {"role": "session", "content": "test_session_with_tools.bin"},
+    {"role": "user", "content": "What did I ask about?"}
+  ])";
+
+  EXPECT_NO_THROW({
+    std::string output2 = model2->process(inputAfterCache);
+    EXPECT_GE(output2.length(), 0);
+  });
+
+  auto statsAfter = model2->runtimeStats();
+  double cacheTokensAfter = getStatValue(statsAfter, "CacheTokens");
+  EXPECT_GT(cacheTokensAfter, 0.0);
+}
+
+TEST_F(ToolTokenCacheTest, CacheWithToolsThenWithout) {
+  if (!hasValidModel()) {
+    GTEST_SKIP() << "Test model not found";
+  }
+
+  auto model = createModel();
+  if (!model) {
+    GTEST_SKIP() << "Model failed to load";
+  }
+
+  std::string inputWithTools = R"([
+    {"role": "session", "content": "test_session_with_tools.bin"},
+    {"role": "user", "content": "What is bitcoin?"},
+    {"type": "function", "name": "search", "description": "Search", "parameters": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]}}
+  ])";
+
+  EXPECT_NO_THROW({
+    std::string output1 = model->process(inputWithTools);
+    EXPECT_GE(output1.length(), 0);
+  });
+
+  std::string saveInput1 =
+      R"([{"role": "session", "content": "test_session_with_tools.bin"}, {"role": "session", "content": "save"}])";
+  EXPECT_NO_THROW({
+    model->process(saveInput1);
+  });
+
+  std::string inputAfterCache = R"([
+    {"role": "session", "content": "test_session_with_tools.bin"},
+    {"role": "user", "content": "What is ethereum?"}
+  ])";
+
+  EXPECT_NO_THROW({
+    std::string output2 = model->process(inputAfterCache);
+    EXPECT_GE(output2.length(), 0);
+  });
+
+  auto stats = model->runtimeStats();
+  double cacheTokens = getStatValue(stats, "CacheTokens");
+  EXPECT_GT(cacheTokens, 0.0);
+}
+
+TEST_F(ToolTokenCacheTest, MultipleToolCallsInCache) {
+  if (!hasValidModel()) {
+    GTEST_SKIP() << "Test model not found";
+  }
+
+  auto model = createModel();
+  if (!model) {
+    GTEST_SKIP() << "Model failed to load";
+  }
+
+  std::string input1 = R"([
+    {"role": "session", "content": "test_session_with_tools.bin"},
+    {"role": "user", "content": "Search laptops under $1000"},
+    {"type": "function", "name": "searchProducts", "description": "Search products", "parameters": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]}}
+  ])";
+
+  EXPECT_NO_THROW({
+    std::string output1 = model->process(input1);
+    EXPECT_GE(output1.length(), 0);
+  });
+
+  std::string saveInput1 =
+      R"([{"role": "session", "content": "test_session_with_tools.bin"}, {"role": "session", "content": "save"}])";
+  EXPECT_NO_THROW({
+    model->process(saveInput1);
+  });
+
+  std::string input2 = R"([
+    {"role": "session", "content": "test_session_with_tools.bin"},
+    {"role": "user", "content": "Now search for phones"},
+    {"type": "function", "name": "searchProducts", "description": "Search products", "parameters": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]}}
+  ])";
+
+  EXPECT_NO_THROW({
+    std::string output2 = model->process(input2);
+    EXPECT_GE(output2.length(), 0);
+  });
+
+  auto stats = model->runtimeStats();
+  double cacheTokens = getStatValue(stats, "CacheTokens");
+  EXPECT_GT(cacheTokens, 0.0);
+}
+
+TEST_F(ToolTokenCacheTest, CacheResetClearsToolTokens) {
+  if (!hasValidModel()) {
+    GTEST_SKIP() << "Test model not found";
+  }
+
+  auto model = createModel();
+  if (!model) {
+    GTEST_SKIP() << "Model failed to load";
+  }
+
+  std::string inputWithTools = R"([
+    {"role": "session", "content": "test_session_with_tools.bin"},
+    {"role": "user", "content": "What is bitcoin?"},
+    {"type": "function", "name": "search", "description": "Search", "parameters": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]}}
+  ])";
+
+  EXPECT_NO_THROW({
+    std::string output1 = model->process(inputWithTools);
+    EXPECT_GE(output1.length(), 0);
+  });
+
+  std::string resetInput =
+      R"([{"role": "session", "content": "test_session_with_tools.bin"}, {"role": "session", "content": "reset"}])";
+  EXPECT_NO_THROW({
+    std::string resetOutput = model->process(resetInput);
+    EXPECT_EQ(resetOutput.length(), 0);
+  });
+
+  auto statsAfterReset = model->runtimeStats();
+  double cacheTokensAfterReset = getStatValue(statsAfterReset, "CacheTokens");
+  EXPECT_EQ(cacheTokensAfterReset, 0.0);
+}
+
+TEST_F(ToolTokenCacheTest, SaveAfterToolInference) {
+  if (!hasValidModel()) {
+    GTEST_SKIP() << "Test model not found";
+  }
+
+  auto model = createModel();
+  if (!model) {
+    GTEST_SKIP() << "Model failed to load";
+  }
+
+  std::string inputWithTools = R"([
+    {"role": "session", "content": "test_session_with_tools.bin"},
+    {"role": "user", "content": "What is blockchain? Answer briefly."},
+    {"type": "function", "name": "search", "description": "Search", "parameters": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]}}
+  ])";
+
+  EXPECT_NO_THROW({
+    std::string output1 = model->process(inputWithTools);
+    EXPECT_GE(output1.length(), 0);
+  });
+
+  auto statsBeforeSave = model->runtimeStats();
+  double promptTokens = getStatValue(statsBeforeSave, "promptTokens");
+  double cacheTokens = getStatValue(statsBeforeSave, "CacheTokens");
+
+  std::string saveInput =
+      R"([{"role": "session", "content": "test_session_with_tools.bin"}, {"role": "session", "content": "save"}])";
+  EXPECT_NO_THROW({
+    std::string saveOutput = model->process(saveInput);
+    EXPECT_EQ(saveOutput.length(), 0);
+  });
+
+  EXPECT_TRUE(fs::exists(session_with_tools_path));
+
+  model.reset();
+
+  auto model2 = createModel();
+  if (!model2) {
+    GTEST_SKIP() << "Model failed to load";
+  }
+
+  std::string reloadInput = R"([
+    {"role": "session", "content": "test_session_with_tools.bin"},
+    {"role": "user", "content": "Continue"}
+  ])";
+
+  EXPECT_NO_THROW({
+    std::string output2 = model2->process(reloadInput);
+    EXPECT_GE(output2.length(), 0);
+  });
+
+  auto statsAfterReload = model2->runtimeStats();
+  double cacheTokensAfterReload = getStatValue(statsAfterReload, "CacheTokens");
+  EXPECT_GT(cacheTokensAfterReload, 0.0);
+}
+
+TEST_F(ToolTokenCacheTest, MultipleToolsInSingleMessage) {
+  if (!hasValidModel()) {
+    GTEST_SKIP() << "Test model not found";
+  }
+
+  auto model = createModel();
+  if (!model) {
+    GTEST_SKIP() << "Model failed to load";
+  }
+
+  std::string inputWithMultipleTools = R"([
+    {"role": "session", "content": "test_session_with_tools.bin"},
+    {"role": "user", "content": "Search laptops and add to cart"},
+    {"type": "function", "name": "searchProducts", "description": "Search products", "parameters": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]}},
+    {"type": "function", "name": "addToCart", "description": "Add items to cart", "parameters": {"type": "object", "properties": {"items": {"type": "array", "items": {"type": "string"}}}, "required": ["items"]}}
+  ])";
+
+  EXPECT_NO_THROW({
+    std::string output = model->process(inputWithMultipleTools);
+    EXPECT_GE(output.length(), 0);
+    auto stats = model->runtimeStats();
+    EXPECT_GE(stats.size(), 0);
+  });
+
+  std::string saveInput =
+      R"([{"role": "session", "content": "test_session_with_tools.bin"}, {"role": "session", "content": "save"}])";
+  EXPECT_NO_THROW({
+    model->process(saveInput);
+  });
+
+  EXPECT_TRUE(fs::exists(session_with_tools_path));
+}
+
+TEST_F(ToolTokenCacheTest, CacheLoadAfterTools) {
+  if (!hasValidModel()) {
+    GTEST_SKIP() << "Test model not found";
+  }
+
+  auto model1 = createModel();
+  if (!model1) {
+    GTEST_SKIP() << "Model failed to load";
+  }
+
+  std::string firstInput = R"([
+    {"role": "session", "content": "test_session_with_tools.bin"},
+    {"role": "user", "content": "Hello"}
+  ])";
+
+  EXPECT_NO_THROW({
+    std::string output1 = model1->process(firstInput);
+    EXPECT_GE(output1.length(), 0);
+  });
+
+  std::string saveInput1 =
+      R"([{"role": "session", "content": "test_session_with_tools.bin"}, {"role": "session", "content": "save"}])";
+  EXPECT_NO_THROW({
+    model1->process(saveInput1);
+  });
+
+  model1.reset();
+
+  auto model2 = createModel();
+  if (!model2) {
+    GTEST_SKIP() << "Model failed to load";
+  }
+
+  std::string inputWithTools = R"([
+    {"role": "session", "content": "test_session_with_tools.bin"},
+    {"role": "user", "content": "What is ethereum?"},
+    {"type": "function", "name": "search", "description": "Search", "parameters": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]}}
+  ])";
+
+  EXPECT_NO_THROW({
+    std::string output2 = model2->process(inputWithTools);
+    EXPECT_GE(output2.length(), 0);
+  });
+
+  auto stats = model2->runtimeStats();
+  double cacheTokens = getStatValue(stats, "CacheTokens");
+  EXPECT_GT(cacheTokens, 0.0);
+}
+
+TEST_F(ToolTokenCacheTest, ToolCacheEdgeCaseEmptyTools) {
+  if (!hasValidModel()) {
+    GTEST_SKIP() << "Test model not found";
+  }
+
+  auto model = createModel();
+  if (!model) {
+    GTEST_SKIP() << "Model failed to load";
+  }
+
+  std::string inputNoTools = R"([
+    {"role": "session", "content": "test_session_with_tools.bin"},
+    {"role": "user", "content": "Hello, how are you?"}
+  ])";
+
+  EXPECT_NO_THROW({
+    std::string output = model->process(inputNoTools);
+    EXPECT_GE(output.length(), 0);
+    auto stats = model->runtimeStats();
+    EXPECT_GE(stats.size(), 0);
+  });
+
+  std::string saveInput =
+      R"([{"role": "session", "content": "test_session_with_tools.bin"}, {"role": "session", "content": "save"}])";
+  EXPECT_NO_THROW({
+    model->process(saveInput);
+  });
+
+  EXPECT_TRUE(fs::exists(session_with_tools_path));
+}
+
+TEST_F(ToolTokenCacheTest, CacheWithUpdatedToolsSubsequentPrompt) {
+  if (!hasValidModel()) {
+    GTEST_SKIP() << "Test model not found";
+  }
+
+  auto model = createModel();
+  if (!model) {
+    GTEST_SKIP() << "Model failed to load";
+  }
+
+  std::string inputWithFirstTool = R"([
+    {"role": "session", "content": "test_session_with_tools.bin"},
+    {"role": "user", "content": "Search for laptops"},
+    {"type": "function", "name": "searchProducts", "description": "Search products", "parameters": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]}}
+  ])";
+
+  EXPECT_NO_THROW({
+    std::string output1 = model->process(inputWithFirstTool);
+    EXPECT_GE(output1.length(), 0);
+  });
+
+  auto stats1 = model->runtimeStats();
+  double cacheTokens1 = getStatValue(stats1, "CacheTokens");
+  double promptTokens1 = getStatValue(stats1, "promptTokens");
+  EXPECT_GT(cacheTokens1, 0.0) << "Cache should have tokens after first prompt";
+  EXPECT_GT(promptTokens1, 0.0) << "Should have prompt tokens";
+
+  std::string saveInput1 =
+      R"([{"role": "session", "content": "test_session_with_tools.bin"}, {"role": "session", "content": "save"}])";
+  EXPECT_NO_THROW({
+    model->process(saveInput1);
+  });
+
+  std::string inputWithUpdatedTool = R"([
+    {"role": "session", "content": "test_session_with_tools.bin"},
+    {"role": "user", "content": "Now search for phones"},
+    {"type": "function", "name": "searchProducts", "description": "Search products", "parameters": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]}},
+    {"type": "function", "name": "getPrice", "description": "Get price", "parameters": {"type": "object", "properties": {"item": {"type": "string"}}, "required": ["item"]}}
+  ])";
+
+  EXPECT_NO_THROW({
+    std::string output2 = model->process(inputWithUpdatedTool);
+    EXPECT_GE(output2.length(), 0);
+  });
+
+  auto stats2 = model->runtimeStats();
+  double cacheTokens2 = getStatValue(stats2, "CacheTokens");
+  double promptTokens2 = getStatValue(stats2, "promptTokens");
+  EXPECT_GT(cacheTokens2, 0.0) << "Cache should have tokens after second prompt with updated tools";
+  EXPECT_GT(promptTokens2, 0.0) << "Should have prompt tokens";
+}
+
+TEST_F(ToolTokenCacheTest, CachePersistsWithDifferentToolsAfterSave) {
+  if (!hasValidModel()) {
+    GTEST_SKIP() << "Test model not found";
+  }
+
+  auto model1 = createModel();
+  if (!model1) {
+    GTEST_SKIP() << "Model failed to load";
+  }
+
+  std::string inputFirst = R"([
+    {"role": "session", "content": "test_session_with_tools.bin"},
+    {"role": "user", "content": "Search for books"},
+    {"type": "function", "name": "search", "description": "Search", "parameters": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]}}
+  ])";
+
+  EXPECT_NO_THROW({
+    model1->process(inputFirst);
+  });
+
+  auto stats1 = model1->runtimeStats();
+  double cacheTokens1 = getStatValue(stats1, "CacheTokens");
+  EXPECT_GT(cacheTokens1, 0.0);
+
+  std::string saveInput =
+      R"([{"role": "session", "content": "test_session_with_tools.bin"}, {"role": "session", "content": "save"}])";
+  EXPECT_NO_THROW({
+    model1->process(saveInput);
+  });
+
+  model1.reset();
+
+  auto model2 = createModel();
+  if (!model2) {
+    GTEST_SKIP() << "Model failed to load";
+  }
+
+  std::string inputWithDifferentTools = R"([
+    {"role": "session", "content": "test_session_with_tools.bin"},
+    {"role": "user", "content": "Now find movies"},
+    {"type": "function", "name": "searchMovies", "description": "Search movies", "parameters": {"type": "object", "properties": {"title": {"type": "string"}}, "required": ["title"]}}
+  ])";
+
+  EXPECT_NO_THROW({
+    std::string output2 = model2->process(inputWithDifferentTools);
+    EXPECT_GE(output2.length(), 0);
+  });
+
+  auto statsAfter = model2->runtimeStats();
+  double cacheTokensAfter = getStatValue(statsAfter, "CacheTokens");
+  EXPECT_GT(cacheTokensAfter, 0.0) << "Cache should have tokens after loading with different tools";
+}
+
+TEST_F(ToolTokenCacheTest, CacheTokensIncreaseWithMultiplePrompts) {
+  if (!hasValidModel()) {
+    GTEST_SKIP() << "Test model not found";
+  }
+
+  auto model = createModel();
+  if (!model) {
+    GTEST_SKIP() << "Model failed to load";
+  }
+
+  std::vector<std::string> prompts = {
+    R"([{"role": "session", "content": "test_session_with_tools.bin"}, {"role": "user", "content": "Hello"}, {"type": "function", "name": "search", "description": "Search", "parameters": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]}}])",
+    R"([{"role": "session", "content": "test_session_with_tools.bin"}, {"role": "user", "content": "How are you?"}])",
+    R"([{"role": "session", "content": "test_session_with_tools.bin"}, {"role": "user", "content": "What's the weather?"}])"
+  };
+
+  double prevCacheTokens = 0.0;
+  for (size_t i = 0; i < prompts.size(); ++i) {
+    EXPECT_NO_THROW({
+      std::string output = model->process(prompts[i]);
+      EXPECT_GE(output.length(), 0);
+    });
+
+    auto stats = model->runtimeStats();
+    double cacheTokens = getStatValue(stats, "CacheTokens");
+    EXPECT_GT(cacheTokens, 0.0) << "Cache should have tokens after prompt " << i + 1;
+  }
+}
+
+TEST_F(ToolTokenCacheTest, CacheWithNoToolsThenWithTools) {
+  if (!hasValidModel()) {
+    GTEST_SKIP() << "Test model not found";
+  }
+
+  auto model = createModel();
+  if (!model) {
+    GTEST_SKIP() << "Model failed to load";
+  }
+
+  std::string inputNoTools = R"([
+    {"role": "session", "content": "test_session_with_tools.bin"},
+    {"role": "user", "content": "Hello, what's the weather?"}
+  ])";
+
+  EXPECT_NO_THROW({
+    std::string output1 = model->process(inputNoTools);
+    EXPECT_GE(output1.length(), 0);
+  });
+
+  auto stats1 = model->runtimeStats();
+  double cacheTokens1 = getStatValue(stats1, "CacheTokens");
+
+  std::string inputWithTools = R"([
+    {"role": "session", "content": "test_session_with_tools.bin"},
+    {"role": "user", "content": "Now get weather for New York"},
+    {"type": "function", "name": "getWeather", "description": "Get weather", "parameters": {"type": "object", "properties": {"city": {"type": "string"}}, "required": ["city"]}}
+  ])";
+
+  EXPECT_NO_THROW({
+    std::string output2 = model->process(inputWithTools);
+    EXPECT_GE(output2.length(), 0);
+  });
+
+  auto stats2 = model->runtimeStats();
+  double cacheTokens2 = getStatValue(stats2, "CacheTokens");
+  EXPECT_GT(cacheTokens2, 0.0) << "Cache should have tokens after adding tools";
+}
+
+TEST_F(ToolTokenCacheTest, CacheTokenCountVerification) {
+  if (!hasValidModel()) {
+    GTEST_SKIP() << "Test model not found";
+  }
+
+  auto model = createModel();
+  if (!model) {
+    GTEST_SKIP() << "Model failed to load";
+  }
+
+  std::string inputWithTools = R"([
+    {"role": "user", "content": "What is the weather?"},
+    {"type": "function", "name": "getWeather", "description": "Get weather", "parameters": {"type": "object", "properties": {"city": {"type": "string"}}, "required": ["city"]}}
+  ])";
+
+  EXPECT_NO_THROW({
+    std::string output = model->process(inputWithTools);
+    EXPECT_GE(output.length(), 0);
+  });
+
+  auto stats = model->runtimeStats();
+  double cacheTokens = getStatValue(stats, "CacheTokens");
+  double promptTokens = getStatValue(stats, "promptTokens");
+  double evalTokens = getStatValue(stats, "evalTokens");
+
+  std::cout << "=== First prompt with tools ===" << std::endl;
+  std::cout << "CacheTokens: " << cacheTokens << std::endl;
+  std::cout << "promptTokens: " << promptTokens << std::endl;
+  std::cout << "evalTokens: " << evalTokens << std::endl;
+  std::string saveInput =
+      R"([{"role": "session", "content": "test_session_with_tools.bin"}, {"role": "session", "content": "save"}])";
+  EXPECT_NO_THROW({
+    model->process(saveInput);
+  });
+
+  std::string inputAfter = R"([
+    {"role": "session", "content": "test_session_with_tools.bin"},
+    {"role": "user", "content": "And for Paris?"}
+  ])";
+
+  EXPECT_NO_THROW({
+    std::string output = model->process(inputAfter);
+    EXPECT_GE(output.length(), 0);
+  });
+
+  auto statsAfter = model->runtimeStats();
+  double cacheTokensAfter = getStatValue(statsAfter, "CacheTokens");
+  double promptTokensAfter = getStatValue(statsAfter, "promptTokens");
+  double evalTokensAfter = getStatValue(statsAfter, "evalTokens");
+
+  std::cout << "=== Second prompt (after cache load) ===" << std::endl;
+  std::cout << "CacheTokens: " << cacheTokensAfter << std::endl;
+  std::cout << "promptTokens: " << promptTokensAfter << std::endl;
+  std::cout << "evalTokens: " << evalTokensAfter << std::endl;
+  EXPECT_EQ(cacheTokens, 0.0) << "First prompt (no session) has no cache";
+  EXPECT_GT(promptTokens, 0.0) << "First prompt should have prompt tokens";
+  EXPECT_GT(cacheTokensAfter, 0.0) << "Second prompt should load from cache";
+  EXPECT_GT(promptTokensAfter, 0.0) << "Second prompt should have new tokens too";
+}
+
+TEST_F(ToolTokenCacheTest, CacheWithToolsResetBehavior) {
+  if (!hasValidModel()) {
+    GTEST_SKIP() << "Test model not found";
+  }
+
+  auto model = createModel();
+  if (!model) {
+    GTEST_SKIP() << "Model failed to load";
+  }
+
+  std::string inputWithTools = R"([
+    {"role": "user", "content": "Search for laptops"},
+    {"type": "function", "name": "searchProducts", "description": "Search products", "parameters": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]}}
+  ])";
+
+  EXPECT_NO_THROW({
+    model->process(inputWithTools);
+  });
+
+  auto statsBefore = model->runtimeStats();
+  double cacheBefore = getStatValue(statsBefore, "CacheTokens");
+  std::cout << "=== After first tool prompt ===" << std::endl;
+  std::cout << "CacheTokens: " << cacheBefore << std::endl;
+
+  std::string saveInput =
+      R"([{"role": "session", "content": "test_session_with_tools.bin"}, {"role": "session", "content": "save"}])";
+  model->process(saveInput);
+
+  std::string inputSameTools = R"([
+    {"role": "session", "content": "test_session_with_tools.bin"},
+    {"role": "user", "content": "Search for phones"},
+    {"type": "function", "name": "searchProducts", "description": "Search products", "parameters": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]}}
+  ])";
+
+  EXPECT_NO_THROW({
+    model->process(inputSameTools);
+  });
+
+  auto statsSameTools = model->runtimeStats();
+  double cacheSameTools = getStatValue(statsSameTools, "CacheTokens");
+  std::cout << "=== After second prompt with same tools ===" << std::endl;
+  std::cout << "CacheTokens: " << cacheSameTools << std::endl;
+  EXPECT_GT(cacheSameTools, 0.0);
+}