From c90e0ae2d230761dde794560d399712440afc5de Mon Sep 17 00:00:00 2001 From: Mikhail Sotnikov Date: Wed, 11 Feb 2026 05:26:49 +0300 Subject: [PATCH 01/11] (experiment) llm tools: position before user prompt and after --- .../examples/toolCalling.js | 79 ++++++++++++++++--- 1 file changed, 66 insertions(+), 13 deletions(-) diff --git a/packages/qvac-lib-infer-llamacpp-llm/examples/toolCalling.js b/packages/qvac-lib-infer-llamacpp-llm/examples/toolCalling.js index 084c8d326d..c4afe814b4 100644 --- a/packages/qvac-lib-infer-llamacpp-llm/examples/toolCalling.js +++ b/packages/qvac-lib-infer-llamacpp-llm/examples/toolCalling.js @@ -125,8 +125,7 @@ async function main () { content: 'You are a helpful assistant with access to various tools. If request is ambiguous,skip tool calls.' } - const toolQuery1 = [ - systemMessageAmbiguous, + const tools1 = [ // Test handled by this function: // - Multiple parameters with different types // - Complex multiple tools with array parameters @@ -194,14 +193,27 @@ async function main () { required: ['table', 'conditions'] } }, + + ] + + const toolsFirstQuery1 = [ + systemMessageAmbiguous, + ...tools1, { role: 'user', content: 'Search laptops under $1000 and add 2 with ID "laptop-123" to cart. Also, query users table age > 25 limit 50 with metadata.' - } + }, ] - - const toolQuery2 = [ + const toolsLastQuery1 = [ systemMessageAmbiguous, + { + role: 'user', + content: 'Search laptops under $1000 and add 2 with ID "laptop-123" to cart. Also, query users table age > 25 limit 50 with metadata.' + }, + ...tools1, + ] + + const tools2 = [ // Test handled by this function: // - Math/computation tool { @@ -234,17 +246,27 @@ async function main () { required: ['lat1', 'lon1', 'lat2', 'lon2'] } }, + ] + + const toolsFirstQuery2 = [ + systemMessageAmbiguous, + ...tools2, { role: 'user', content: 'calculate 156 * 23 precision 0. Also, How far is here from there?' - } + }, ] - const toolQuery3 = [ + const toolsLastQuery2 = [ + systemMessageAmbiguous, { - role: 'system', - content: 'You are a personal assistant.' + role: 'user', + content: 'calculate 156 * 23 precision 0. Also, How far is here from there?' }, + ...tools2, + ] + + const tools3 = [ // Test handled by this function: // - Part of conversation context tool test { @@ -277,6 +299,14 @@ async function main () { required: ['title', 'date'] } }, + ] + + const toolsFirstQuery3 = [ + { + role: 'system', + content: 'You are a personal assistant.' + }, + ...tools3, { role: 'user', content: 'What is the weather in Seattle on April 10th?' @@ -288,14 +318,37 @@ async function main () { { role: 'user', content: 'Daily is fine. Also, schedule a team meeting on April 10th at 2 PM for 60 minutes.' - } + }, + ] + + const toolsLastQuery3 = [ + { + role: 'system', + content: 'You are a personal assistant.' + }, + { + role: 'user', + content: 'What is the weather in Seattle on April 10th?' + }, + { + role: 'assistant', + content: 'Let me check that for you. Do you need hourly or just daily?' + }, + { + role: 'user', + content: 'Daily is fine. Also, schedule a team meeting on April 10th at 2 PM for 60 minutes.' + }, + ...tools3, ] // 5. Running tool calling queries const queries = [ - { name: 'Query 1: Complex tool calling with multiple parameters', prompt: toolQuery1 }, - { name: 'Query 2: Math calculation and ambiguous query', prompt: toolQuery2 }, - { name: 'Query 3: Conversation context with tools', prompt: toolQuery3 } + { name: 'Query 1 (tools first): Complex tool calling with multiple parameters', prompt: toolsFirstQuery1 }, + { name: 'Query 1 (tools last): Complex tool calling with multiple parameters', prompt: toolsLastQuery1 }, + { name: 'Query 2 (tools first): Math calculation and ambiguous query', prompt: toolsFirstQuery2 }, + { name: 'Query 2 (tools last): Math calculation and ambiguous query', prompt: toolsLastQuery2 }, + { name: 'Query 3 (tools first): Conversation context with tools', prompt: toolsFirstQuery3 }, + { name: 'Query 3 (tools last): Conversation context with tools', prompt: toolsLastQuery3 }, ] const toolCallResults = [] From 31bd802beea80c853a222d0ee4378ccf4248d98c Mon Sep 17 00:00:00 2001 From: Mikhail Sotnikov Date: Wed, 11 Feb 2026 05:35:55 +0300 Subject: [PATCH 02/11] (chore) linter auto-fix --- .../examples/toolCalling.js | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/packages/qvac-lib-infer-llamacpp-llm/examples/toolCalling.js b/packages/qvac-lib-infer-llamacpp-llm/examples/toolCalling.js index c4afe814b4..b124fcea23 100644 --- a/packages/qvac-lib-infer-llamacpp-llm/examples/toolCalling.js +++ b/packages/qvac-lib-infer-llamacpp-llm/examples/toolCalling.js @@ -192,7 +192,7 @@ async function main () { }, required: ['table', 'conditions'] } - }, + } ] @@ -202,7 +202,7 @@ async function main () { { role: 'user', content: 'Search laptops under $1000 and add 2 with ID "laptop-123" to cart. Also, query users table age > 25 limit 50 with metadata.' - }, + } ] const toolsLastQuery1 = [ systemMessageAmbiguous, @@ -210,7 +210,7 @@ async function main () { role: 'user', content: 'Search laptops under $1000 and add 2 with ID "laptop-123" to cart. Also, query users table age > 25 limit 50 with metadata.' }, - ...tools1, + ...tools1 ] const tools2 = [ @@ -245,7 +245,7 @@ async function main () { }, required: ['lat1', 'lon1', 'lat2', 'lon2'] } - }, + } ] const toolsFirstQuery2 = [ @@ -254,7 +254,7 @@ async function main () { { role: 'user', content: 'calculate 156 * 23 precision 0. Also, How far is here from there?' - }, + } ] const toolsLastQuery2 = [ @@ -263,7 +263,7 @@ async function main () { role: 'user', content: 'calculate 156 * 23 precision 0. Also, How far is here from there?' }, - ...tools2, + ...tools2 ] const tools3 = [ @@ -298,7 +298,7 @@ async function main () { }, required: ['title', 'date'] } - }, + } ] const toolsFirstQuery3 = [ @@ -318,7 +318,7 @@ async function main () { { role: 'user', content: 'Daily is fine. Also, schedule a team meeting on April 10th at 2 PM for 60 minutes.' - }, + } ] const toolsLastQuery3 = [ @@ -338,7 +338,7 @@ async function main () { role: 'user', content: 'Daily is fine. Also, schedule a team meeting on April 10th at 2 PM for 60 minutes.' }, - ...tools3, + ...tools3 ] // 5. Running tool calling queries @@ -348,7 +348,7 @@ async function main () { { name: 'Query 2 (tools first): Math calculation and ambiguous query', prompt: toolsFirstQuery2 }, { name: 'Query 2 (tools last): Math calculation and ambiguous query', prompt: toolsLastQuery2 }, { name: 'Query 3 (tools first): Conversation context with tools', prompt: toolsFirstQuery3 }, - { name: 'Query 3 (tools last): Conversation context with tools', prompt: toolsLastQuery3 }, + { name: 'Query 3 (tools last): Conversation context with tools', prompt: toolsLastQuery3 } ] const toolCallResults = [] From 4aded94c966cff7d55e23dcc22bf66bc492f1e27 Mon Sep 17 00:00:00 2001 From: Mikhail Sotnikov Date: Wed, 11 Feb 2026 13:18:35 +0300 Subject: [PATCH 03/11] (experiment) tool call: LFM model --- .../examples/toolCalling.js | 62 +++++++++++++++---- 1 file changed, 49 insertions(+), 13 deletions(-) diff --git a/packages/qvac-lib-infer-llamacpp-llm/examples/toolCalling.js b/packages/qvac-lib-infer-llamacpp-llm/examples/toolCalling.js index b124fcea23..fe9b4e8c8e 100644 --- a/packages/qvac-lib-infer-llamacpp-llm/examples/toolCalling.js +++ b/packages/qvac-lib-infer-llamacpp-llm/examples/toolCalling.js @@ -10,7 +10,7 @@ function createSeparator (char = '=', length = 80) { return char.repeat(length) } -function extractToolCalls (response) { +function extractToolCallsQwen (response) { const toolCalls = [] const toolCallRegex = /([\s\S]*?)<\/tool_call>/g let match @@ -26,7 +26,24 @@ function extractToolCalls (response) { return toolCalls } -async function runQuery (model, query) { +const extractToolCallsLFM = (response) => { + const toolCallParts = response.split(']') + if (toolCallParts.length < 2) { + return [] + } + const toolCallsStr = toolCallParts.slice(0, -1).join(']').concat(']') + try { + const toolCalls = JSON.parse(toolCallsStr) + return toolCalls + } catch (e) { + console.error('ERROR: extractToolCallsLFM: unable to extract toolCalls\n') + console.error(toolCallsStr) + console.error(e) + } + return [] +} + +async function runQuery (model, query, extractToolCalls) { console.log(`\n${createSeparator()}`) console.log(query.name) console.log(createSeparator()) @@ -70,6 +87,19 @@ function printToolCallSummary (results) { console.log(`\n${createSeparator()}`) } +const modelMap = { + 'LFM': { + hdKey: 'f41503e44a2c0a537d9a9665984cb2d87eb2216e6301e898ffea60f5ce6c904d', + modelName: 'LFM2.5-1.2B-Instruct-Q4_K_M.gguf', + extractToolCalls: extractToolCallsLFM, + }, + 'Qwen3': { + hdKey: '05d3d7ad9cd650f53c28f85e312ef09a645dd487845897958b3be8a19cb3aab9', + modelName: 'Qwen3-1.7B-Q4_0.gguf', + extractToolCalls: extractToolCallsQwen, + } +} + async function main () { console.log('Tool Calling Example: Demonstrates tool calling capabilities') console.log('============================================================') @@ -78,7 +108,10 @@ async function main () { const store = new Corestore('./store') const hdStore = store.namespace('hd') - const hdKey = '05d3d7ad9cd650f53c28f85e312ef09a645dd487845897958b3be8a19cb3aab9' + // CHANGE ME + const { hdKey, modelName, extractToolCalls } = modelMap['LFM'] + // const { hdKey, modelName, extractToolCalls } = modelMap['Qwen3'] + const hdDL = new HyperDriveDL({ key: `hd://${hdKey}`, store: hdStore @@ -89,7 +122,7 @@ async function main () { loader: hdDL, opts: { stats: true }, logger: console, - modelName: 'Qwen3-1.7B-Q4_0.gguf', + modelName, diskPath: './models' } @@ -122,7 +155,8 @@ async function main () { // 4. Defining tool queries with function schemas const systemMessageAmbiguous = { role: 'system', - content: 'You are a helpful assistant with access to various tools. If request is ambiguous,skip tool calls.' + content: 'You are a helpful assistant with access to various tools. If request is ambiguous,skip tool calls. Output function calls as JSON.' + // content: 'Output function calls as JSON. You are a helpful assistant with access to various tools. If request is ambiguous,skip tool calls.' } const tools1 = [ @@ -197,8 +231,10 @@ async function main () { ] const toolsFirstQuery1 = [ - systemMessageAmbiguous, - ...tools1, + { + ...systemMessageAmbiguous, + content: systemMessageAmbiguous.content.concat(`List of tools: ${JSON.stringify(tools1)}`) + }, { role: 'user', content: 'Search laptops under $1000 and add 2 with ID "laptop-123" to cart. Also, query users table age > 25 limit 50 with metadata.' @@ -344,16 +380,16 @@ async function main () { // 5. Running tool calling queries const queries = [ { name: 'Query 1 (tools first): Complex tool calling with multiple parameters', prompt: toolsFirstQuery1 }, - { name: 'Query 1 (tools last): Complex tool calling with multiple parameters', prompt: toolsLastQuery1 }, - { name: 'Query 2 (tools first): Math calculation and ambiguous query', prompt: toolsFirstQuery2 }, - { name: 'Query 2 (tools last): Math calculation and ambiguous query', prompt: toolsLastQuery2 }, - { name: 'Query 3 (tools first): Conversation context with tools', prompt: toolsFirstQuery3 }, - { name: 'Query 3 (tools last): Conversation context with tools', prompt: toolsLastQuery3 } + // { name: 'Query 1 (tools last): Complex tool calling with multiple parameters', prompt: toolsLastQuery1 }, + // { name: 'Query 2 (tools first): Math calculation and ambiguous query', prompt: toolsFirstQuery2 }, + // { name: 'Query 2 (tools last): Math calculation and ambiguous query', prompt: toolsLastQuery2 }, + // { name: 'Query 3 (tools first): Conversation context with tools', prompt: toolsFirstQuery3 }, + // { name: 'Query 3 (tools last): Conversation context with tools', prompt: toolsLastQuery3 } ] const toolCallResults = [] for (const query of queries) { - const result = await runQuery(model, query) + const result = await runQuery(model, query, extractToolCalls) toolCallResults.push(result) } From 006a1393ecf37018e2be42bac1c2f1a9b56015d6 Mon Sep 17 00:00:00 2001 From: Mikhail Sotnikov Date: Wed, 11 Feb 2026 15:02:24 +0300 Subject: [PATCH 04/11] Revert "(experiment) tool call: LFM model" This reverts commit 4aded94c966cff7d55e23dcc22bf66bc492f1e27. --- .../examples/toolCalling.js | 62 ++++--------------- 1 file changed, 13 insertions(+), 49 deletions(-) diff --git a/packages/qvac-lib-infer-llamacpp-llm/examples/toolCalling.js b/packages/qvac-lib-infer-llamacpp-llm/examples/toolCalling.js index fe9b4e8c8e..b124fcea23 100644 --- a/packages/qvac-lib-infer-llamacpp-llm/examples/toolCalling.js +++ b/packages/qvac-lib-infer-llamacpp-llm/examples/toolCalling.js @@ -10,7 +10,7 @@ function createSeparator (char = '=', length = 80) { return char.repeat(length) } -function extractToolCallsQwen (response) { +function extractToolCalls (response) { const toolCalls = [] const toolCallRegex = /([\s\S]*?)<\/tool_call>/g let match @@ -26,24 +26,7 @@ function extractToolCallsQwen (response) { return toolCalls } -const extractToolCallsLFM = (response) => { - const toolCallParts = response.split(']') - if (toolCallParts.length < 2) { - return [] - } - const toolCallsStr = toolCallParts.slice(0, -1).join(']').concat(']') - try { - const toolCalls = JSON.parse(toolCallsStr) - return toolCalls - } catch (e) { - console.error('ERROR: extractToolCallsLFM: unable to extract toolCalls\n') - console.error(toolCallsStr) - console.error(e) - } - return [] -} - -async function runQuery (model, query, extractToolCalls) { +async function runQuery (model, query) { console.log(`\n${createSeparator()}`) console.log(query.name) console.log(createSeparator()) @@ -87,19 +70,6 @@ function printToolCallSummary (results) { console.log(`\n${createSeparator()}`) } -const modelMap = { - 'LFM': { - hdKey: 'f41503e44a2c0a537d9a9665984cb2d87eb2216e6301e898ffea60f5ce6c904d', - modelName: 'LFM2.5-1.2B-Instruct-Q4_K_M.gguf', - extractToolCalls: extractToolCallsLFM, - }, - 'Qwen3': { - hdKey: '05d3d7ad9cd650f53c28f85e312ef09a645dd487845897958b3be8a19cb3aab9', - modelName: 'Qwen3-1.7B-Q4_0.gguf', - extractToolCalls: extractToolCallsQwen, - } -} - async function main () { console.log('Tool Calling Example: Demonstrates tool calling capabilities') console.log('============================================================') @@ -108,10 +78,7 @@ async function main () { const store = new Corestore('./store') const hdStore = store.namespace('hd') - // CHANGE ME - const { hdKey, modelName, extractToolCalls } = modelMap['LFM'] - // const { hdKey, modelName, extractToolCalls } = modelMap['Qwen3'] - + const hdKey = '05d3d7ad9cd650f53c28f85e312ef09a645dd487845897958b3be8a19cb3aab9' const hdDL = new HyperDriveDL({ key: `hd://${hdKey}`, store: hdStore @@ -122,7 +89,7 @@ async function main () { loader: hdDL, opts: { stats: true }, logger: console, - modelName, + modelName: 'Qwen3-1.7B-Q4_0.gguf', diskPath: './models' } @@ -155,8 +122,7 @@ async function main () { // 4. Defining tool queries with function schemas const systemMessageAmbiguous = { role: 'system', - content: 'You are a helpful assistant with access to various tools. If request is ambiguous,skip tool calls. Output function calls as JSON.' - // content: 'Output function calls as JSON. You are a helpful assistant with access to various tools. If request is ambiguous,skip tool calls.' + content: 'You are a helpful assistant with access to various tools. If request is ambiguous,skip tool calls.' } const tools1 = [ @@ -231,10 +197,8 @@ async function main () { ] const toolsFirstQuery1 = [ - { - ...systemMessageAmbiguous, - content: systemMessageAmbiguous.content.concat(`List of tools: ${JSON.stringify(tools1)}`) - }, + systemMessageAmbiguous, + ...tools1, { role: 'user', content: 'Search laptops under $1000 and add 2 with ID "laptop-123" to cart. Also, query users table age > 25 limit 50 with metadata.' @@ -380,16 +344,16 @@ async function main () { // 5. Running tool calling queries const queries = [ { name: 'Query 1 (tools first): Complex tool calling with multiple parameters', prompt: toolsFirstQuery1 }, - // { name: 'Query 1 (tools last): Complex tool calling with multiple parameters', prompt: toolsLastQuery1 }, - // { name: 'Query 2 (tools first): Math calculation and ambiguous query', prompt: toolsFirstQuery2 }, - // { name: 'Query 2 (tools last): Math calculation and ambiguous query', prompt: toolsLastQuery2 }, - // { name: 'Query 3 (tools first): Conversation context with tools', prompt: toolsFirstQuery3 }, - // { name: 'Query 3 (tools last): Conversation context with tools', prompt: toolsLastQuery3 } + { name: 'Query 1 (tools last): Complex tool calling with multiple parameters', prompt: toolsLastQuery1 }, + { name: 'Query 2 (tools first): Math calculation and ambiguous query', prompt: toolsFirstQuery2 }, + { name: 'Query 2 (tools last): Math calculation and ambiguous query', prompt: toolsLastQuery2 }, + { name: 'Query 3 (tools first): Conversation context with tools', prompt: toolsFirstQuery3 }, + { name: 'Query 3 (tools last): Conversation context with tools', prompt: toolsLastQuery3 } ] const toolCallResults = [] for (const query of queries) { - const result = await runQuery(model, query, extractToolCalls) + const result = await runQuery(model, query) toolCallResults.push(result) } From 53a87745bace0e54c071e28c1ab289ce7f2f9b5a Mon Sep 17 00:00:00 2001 From: Mikhail Sotnikov Date: Thu, 12 Feb 2026 12:20:31 +0300 Subject: [PATCH 05/11] (draft) logger experiments --- .../examples/toolCalling.js | 20 +++++++++++++++++++ .../qvac-lib-infer-llamacpp-llm/package.json | 1 + .../qvac-lib-infer-llamacpp-llm/sample.json | 14 +++++++++++++ 3 files changed, 35 insertions(+) create mode 100644 packages/qvac-lib-infer-llamacpp-llm/sample.json diff --git a/packages/qvac-lib-infer-llamacpp-llm/examples/toolCalling.js b/packages/qvac-lib-infer-llamacpp-llm/examples/toolCalling.js index b124fcea23..cd80da3e36 100644 --- a/packages/qvac-lib-infer-llamacpp-llm/examples/toolCalling.js +++ b/packages/qvac-lib-infer-llamacpp-llm/examples/toolCalling.js @@ -3,6 +3,7 @@ const Corestore = require('corestore') const HyperDriveDL = require('@qvac/dl-hyperdrive') const LlmLlamacpp = require('../index') +const { setLogger, releaseLogger } = require('../addonLogging') const process = require('bare-process') // Helper functions @@ -74,6 +75,24 @@ async function main () { console.log('Tool Calling Example: Demonstrates tool calling capabilities') console.log('============================================================') + // IMPORTANT: Set up the logger FIRST, before creating any addon instances + console.log('Setting up C++ logger...') + + setLogger((priority, message) => { + const priorityNames = { + 0: 'ERROR', + 1: 'WARNING', + 2: 'INFO', + 3: 'DEBUG', + 4: 'OFF' + } + + const priorityName = priorityNames[priority] || 'UNKNOWN' + const timestamp = new Date().toISOString() + + console.log(`[${timestamp}] [C++ log] [${priorityName}]: ${message}`) + }) + // 1. Initializing data loader const store = new Corestore('./store') const hdStore = store.namespace('hd') @@ -368,6 +387,7 @@ async function main () { await store.close() await hdDL.close() await model.unload() + releaseLogger() } } diff --git a/packages/qvac-lib-infer-llamacpp-llm/package.json b/packages/qvac-lib-infer-llamacpp-llm/package.json index 5a2e947735..1858e6c3dd 100644 --- a/packages/qvac-lib-infer-llamacpp-llm/package.json +++ b/packages/qvac-lib-infer-llamacpp-llm/package.json @@ -70,6 +70,7 @@ }, "dependencies": { "@qvac/infer-base": "^0.2.2", + "@qvac/llm-llamacpp": "^0.8.9", "bare-path": "^3.0.0", "bare-process": "^4.2.2" }, diff --git a/packages/qvac-lib-infer-llamacpp-llm/sample.json b/packages/qvac-lib-infer-llamacpp-llm/sample.json new file mode 100644 index 0000000000..cbfdd20c8a --- /dev/null +++ b/packages/qvac-lib-infer-llamacpp-llm/sample.json @@ -0,0 +1,14 @@ +[ + { + "name": "searchProducts", + "arguments": {"query": "laptops under $1000", "category": "electronics", "maxPrice": 1000} + }, + { + "name": "addToCart", + "arguments": {"items": [{"productId": "laptop-123", "quantity": 2}} + }, + { + "name": "queryDB", + "arguments": {"table": "users", "conditions": {"field": "age", "operator": "greaterThan", "value": 25}, "limit": 50, "includeMetadata": true} + } +] From 0f1ba887e1174f6cad841e47d754e6ba3f01036c Mon Sep 17 00:00:00 2001 From: Mikhail Sotnikov Date: Fri, 27 Feb 2026 01:19:12 +0300 Subject: [PATCH 06/11] (internal) dynamic tools: test cases with kvCache --- .../examples/llamacpp-dynamic-tools.ts | 219 ++++++++++++++++++ 1 file changed, 219 insertions(+) create mode 100644 packages/qvac-sdk/examples/llamacpp-dynamic-tools.ts diff --git a/packages/qvac-sdk/examples/llamacpp-dynamic-tools.ts b/packages/qvac-sdk/examples/llamacpp-dynamic-tools.ts new file mode 100644 index 0000000000..98d1e4b4d1 --- /dev/null +++ b/packages/qvac-sdk/examples/llamacpp-dynamic-tools.ts @@ -0,0 +1,219 @@ +import { z } from "zod"; +import { + completion, + loadModel, + unloadModel, + type ToolCall, + type CompletionStats, + QWEN_3_1_7B_INST_Q4, +} from "@/index"; + +// Define Zod schemas for tool parameters +const weatherSchema = z.object({ + city: z.string().describe("City name"), + country: z.string().describe("Country code").optional(), +}); + +const horoscopeSchema = z.object({ + sign: z.string().describe("An astrological sign like Taurus or Aquarius"), +}); + +// Map tool names to their schemas for runtime validation +const toolSchemas = { + get_weather: weatherSchema, + get_horoscope: horoscopeSchema, +}; + +// Simple tool definitions - just name, description, and Zod schema! +const tools1 = [ + { + name: "get_weather", + description: "Get current weather for a city", + parameters: weatherSchema, + }, +]; + +const tools2 = [ + { + name: "get_horoscope", + description: "Get today's horoscope for an astrological sign", + parameters: horoscopeSchema, + }, +]; + +const toolsAll = [ + { + name: "get_weather", + description: "Get current weather for a city", + parameters: weatherSchema, + }, + { + name: "get_horoscope", + description: "Get today's horoscope for an astrological sign", + parameters: horoscopeSchema, + }, +]; + +async function chatSession ({ modelId, history, tools, kvCache }) { + const result = completion({ modelId, history, kvCache, stream: true, tools }); + + // Consume token stream + const tokensTask = (async () => { + for await (const token of result.tokenStream) { + process.stdout.write(token); + } + })(); + + // Consume tool call events + const toolsTask = (async () => { + for await (const evt of result.toolCallStream) { + if (evt.type === "toolCall") { + console.log( + `\n\n→ Tool Call Detected: ${evt.call.name}(${JSON.stringify(evt.call.arguments)})`, + ); + console.log(` ID: ${evt.call.id}`); + } else if (evt.type === "toolCallError") { + console.warn(`\nāš ļø Tool Error: ${evt.error.message}`); + console.warn(` Code: ${evt.error.code}`); + } + } + })(); + + await Promise.all([tokensTask, toolsTask]); + + const stats: CompletionStats | undefined = await result.stats; + const toolCalls: ToolCall[] = await result.toolCalls; + + console.log("\n\nšŸ“‹ Parsed Tool Calls:"); + if (toolCalls.length > 0) { + for (const call of toolCalls) { + console.log(` - ${call.name}(${JSON.stringify(call.arguments)})`); + + const schema = toolSchemas[call.name as keyof typeof toolSchemas]; + if (schema) { + const validated = schema.safeParse(call.arguments); + if (validated.success) { + console.log(` āœ“ Arguments validated with Zod`); + } else { + console.log(` āœ— Validation failed:`, validated.error); + } + } + } + } else { + console.log(" No tool calls detected in response"); + } + + console.log("\nšŸ“Š Performance Stats:", stats); + + // Execute tool calls and send results back to the model + if (toolCalls.length > 0) { + console.log("\n\nšŸ”§ Simulating Tool Execution..."); + + // Simulate tool execution (in a real app, you'd call actual APIs) + const toolResults = toolCalls.map((call) => { + let result = ""; + if (call.name === "get_weather") { + const args = call.arguments as { city: string; country?: string }; + result = `The weather in ${args.city} is sunny, 22°C with light clouds.`; + } else if (call.name === "get_horoscope") { + const args = call.arguments as { sign: string }; + result = `Horoscope for ${args.sign}: Today is a great day for new beginnings and creative endeavors!`; + } + console.log(` āœ“ ${call.name}: ${result}`); + return { toolCallId: call.id, result }; + }); + + // Add tool results to conversation history + history.push({ + role: "assistant", + content: await result.text, + }); + + // Add tool results as tool messages + for (const toolResult of toolResults) { + history.push({ + role: "tool", + content: toolResult.result, + }); + } + } + + // Send follow-up question with tool results + console.log("\n\nšŸ¤– Follow-up Response with Tool Results:"); + const followUpResult = completion({ + modelId, + history, + stream: true, + kvCache, + tools, + }); + + history.push({ + role: "assistant", + content: await followUpResult.text, + }); + + for await (const token of followUpResult.tokenStream) { + process.stdout.write(token); + } + + + const followUpStats = await followUpResult.stats; + console.log("\n\nšŸ“Š Follow-up Stats:", followUpStats); +} + +async function runTest({ kvCache, toolVariants }) { + console.log('run cache id=', kvCache) + try { + // Load model from provided file path with tools support enabled + const modelId = await loadModel({ + modelSrc: QWEN_3_1_7B_INST_Q4, + modelType: "llm", + modelConfig: { + ctx_size: 4096, + tools: true, // Enable tools support + }, + onProgress: (progress) => + console.log(`Loading: ${progress.percentage.toFixed(1)}%`), + }); + console.log(`āœ… Model loaded successfully! Model ID: ${modelId}`); + + // Create conversation history + const history = [ + { + role: "system", + content: + "You are a helpful assistant that can use tools.", + // "You are a helpful assistant that can use tools to get the weather and horoscope.", + }, + { + role: "user", + content: "What's the weather in Tokyo?", + }, + ]; + + + + console.log("\nšŸ¤– AI Response:"); + console.log("(Streaming with tool definitions in prompt)\n"); + + await chatSession({ modelId, history, tools: toolVariants[0], kvCache }) + + history.push({ + role: "user", + content: "if the weather in Tokyo is good, could you check my horoscope for Aquarius? ", + }) + + await chatSession({ modelId, history, tools: toolVariants[1], kvCache }) + + + console.log("\n\nšŸŽ‰ Completed!"); + await unloadModel({ modelId, clearStorage: false }); + } catch (error) { + console.error("āŒ Error:", error); + process.exit(1); + } +} +// using same kvCache for a single session +await runTest({ kvCache: `id-${Date.now()}`, toolVariants: [toolsAll, toolsAll] }) +await runTest({ kvCache: `id-${Date.now()}`, toolVariants: [tools1, tools2] }) From a9c66c13c8f97a1fca7324da369dfde3e41b4c97 Mon Sep 17 00:00:00 2001 From: Mikhail Sotnikov Date: Fri, 27 Feb 2026 01:47:12 +0300 Subject: [PATCH 07/11] (internal) dynamic tools cache: enable log, skip checks --- packages/qvac-sdk/examples/llamacpp-dynamic-tools.ts | 2 ++ .../server/bare/addons/llamacpp-completion/cache-logger.ts | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/qvac-sdk/examples/llamacpp-dynamic-tools.ts b/packages/qvac-sdk/examples/llamacpp-dynamic-tools.ts index 98d1e4b4d1..00a74e35bc 100644 --- a/packages/qvac-sdk/examples/llamacpp-dynamic-tools.ts +++ b/packages/qvac-sdk/examples/llamacpp-dynamic-tools.ts @@ -1,3 +1,5 @@ +/* eslint-disable */ +// @ts-nocheck import { z } from "zod"; import { completion, diff --git a/packages/qvac-sdk/server/bare/addons/llamacpp-completion/cache-logger.ts b/packages/qvac-sdk/server/bare/addons/llamacpp-completion/cache-logger.ts index 1790cd878f..b06d25cc84 100644 --- a/packages/qvac-sdk/server/bare/addons/llamacpp-completion/cache-logger.ts +++ b/packages/qvac-sdk/server/bare/addons/llamacpp-completion/cache-logger.ts @@ -25,7 +25,7 @@ function formatMessages(messages: ChatMessage[]): string { export function logCacheStatus(cacheKey: string, isReusing: boolean): void { const status = isReusing ? "REUSING" : "CREATING"; - logger.debug(`[kv-cache] [${cacheKey}] ${status} cache`); + logger.info(`[kv-cache] [${cacheKey}] ${status} cache`); } export function logCacheInit( From 5a9afc3a9dcd0791e44d71bffa13f114d161dec7 Mon Sep 17 00:00:00 2001 From: Mikhail Sotnikov Date: Fri, 27 Feb 2026 23:51:12 +0300 Subject: [PATCH 08/11] (improvement) llamacpp-llm addon: qwen template dynamic tools --- .../addon/src/utils/QwenTemplate.cpp | 26 +- .../examples/dynamicToolCalling.js | 426 ++++++++++++++++++ .../examples/llamacpp-dynamic-tools.ts | 2 +- 3 files changed, 438 insertions(+), 16 deletions(-) create mode 100644 packages/qvac-lib-infer-llamacpp-llm/examples/dynamicToolCalling.js diff --git a/packages/qvac-lib-infer-llamacpp-llm/addon/src/utils/QwenTemplate.cpp b/packages/qvac-lib-infer-llamacpp-llm/addon/src/utils/QwenTemplate.cpp index 70bb99a753..94d9ea4290 100644 --- a/packages/qvac-lib-infer-llamacpp-llm/addon/src/utils/QwenTemplate.cpp +++ b/packages/qvac-lib-infer-llamacpp-llm/addon/src/utils/QwenTemplate.cpp @@ -4,21 +4,8 @@ namespace qvac_lib_inference_addon_llama { namespace utils { const char* getFixedQwen3Template() { - return R"({%- if tools %} - {{- '<|im_start|>system\n' }} - {%- if messages[0].role == 'system' %} - {{- messages[0].content + '\n\n' }} - {%- endif %} - {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} - {%- for tool in tools %} - {{- "\n" }} - {{- tool | tojson }} - {%- endfor %} - {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} -{%- else %} - {%- if messages[0].role == 'system' %} - {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }} - {%- endif %} + return R"({%- if messages[0].role == 'system' %} + {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }} {%- endif %} {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %} {%- for message in messages[::-1] %} @@ -81,6 +68,15 @@ const char* getFixedQwen3Template() { {%- endif %} {%- endif %} {%- endfor %} +{%- if tools %} + {{- '<|im_start|>system\n' }} + {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- endif %} {%- if add_generation_prompt %} {{- '<|im_start|>assistant\n' }} {%- if enable_thinking is defined and enable_thinking is false %} diff --git a/packages/qvac-lib-infer-llamacpp-llm/examples/dynamicToolCalling.js b/packages/qvac-lib-infer-llamacpp-llm/examples/dynamicToolCalling.js new file mode 100644 index 0000000000..a66f524d22 --- /dev/null +++ b/packages/qvac-lib-infer-llamacpp-llm/examples/dynamicToolCalling.js @@ -0,0 +1,426 @@ +'use strict' + +const Corestore = require('corestore') +const HyperDriveDL = require('@qvac/dl-hyperdrive') +const LlmLlamacpp = require('../index') +const { setLogger, releaseLogger } = require('../addonLogging') +const process = require('bare-process') + +// Helper functions +function createSeparator (char = '=', length = 80) { + return char.repeat(length) +} + +function extractToolCalls (response) { + const toolCalls = [] + const toolCallRegex = /([\s\S]*?)<\/tool_call>/g + let match + while ((match = toolCallRegex.exec(response)) !== null) { + try { + const toolCallJson = match[1].trim() + const toolCall = JSON.parse(toolCallJson) + toolCalls.push(toolCall) + } catch (e) { + // Skip invalid JSON + } + } + return toolCalls +} + +async function runQuery (model, query) { + console.log(`\n${createSeparator()}`) + console.log(query.name) + console.log(createSeparator()) + console.log('\nThinking and Response:') + console.log(createSeparator('-')) + + const response = await model.run(query.prompt) + let fullResponse = '' + + await response + .onUpdate(data => { + process.stdout.write(data) + fullResponse += data + }) + .await() + + console.log('\n') + console.log(createSeparator('-')) + console.log('\nFull Response:') + console.log(fullResponse) + console.log(`\nInference Stats: ${JSON.stringify(response.stats, null, 2)}`) + console.log('\n') + + return { name: query.name, toolCalls: extractToolCalls(fullResponse) } +} + +function printToolCallSummary (results) { + console.log(`\n${createSeparator()}`) + console.log('Tool Call Summary') + console.log(createSeparator()) + for (const result of results) { + console.log(`\n${result.name}:`) + if (result.toolCalls.length === 0) { + console.log(' No tool calls found') + } else { + for (const toolCall of result.toolCalls) { + console.log(` ${toolCall.name} ${JSON.stringify(toolCall.arguments)}`) + } + } + } + console.log(`\n${createSeparator()}`) +} + +async function main () { + console.log('Tool Calling Example: Demonstrates tool calling capabilities') + console.log('============================================================') + + // IMPORTANT: Set up the logger FIRST, before creating any addon instances + console.log('Setting up C++ logger...') + + setLogger((priority, message) => { + const priorityNames = { + 0: 'ERROR', + 1: 'WARNING', + 2: 'INFO', + 3: 'DEBUG', + 4: 'OFF' + } + + const priorityName = priorityNames[priority] || 'UNKNOWN' + const timestamp = new Date().toISOString() + + console.log(`[${timestamp}] [C++ log] [${priorityName}]: ${message}`) + }) + + // 1. Initializing data loader + const store = new Corestore('./store') + const hdStore = store.namespace('hd') + + const hdKey = '05d3d7ad9cd650f53c28f85e312ef09a645dd487845897958b3be8a19cb3aab9' + const hdDL = new HyperDriveDL({ + key: `hd://${hdKey}`, + store: hdStore + }) + + // 2. Configuring model settings + const args = { + loader: hdDL, + opts: { stats: true }, + logger: console, + modelName: 'Qwen3-1.7B-Q4_0.gguf', + diskPath: './models' + } + + const config = { + device: 'gpu', + gpu_layers: '999', + ctx_size: '2048', + tools: 'true' + } + + // 3. Loading model + await hdDL.ready() + const model = new LlmLlamacpp(args, config) + const closeLoader = true + let totalProgress = 0 + const reportProgressCallback = (report) => { + if (typeof report === 'object' && Number(report.overallProgress) > totalProgress) { + process.stdout.write( + `\r${report.overallProgress}%: ${report.action} [${report.filesProcessed}/${report.totalFiles}] ${report.currentFileProgress}% ${report.currentFile}` + ) + if (Number(report.currentFileProgress) === 100) { + process.stdout.write('\n') + } + totalProgress = Number(report.overallProgress) + } + } + await model.load(closeLoader, reportProgressCallback) + + try { + // 4. Defining tool queries with function schemas + const systemMessageAmbiguous = { + role: 'system', + content: 'You are a helpful assistant with access to various tools. If request is ambiguous,skip tool calls.' + } + + const tools1 = [ + // Test handled by this function: + // - Multiple parameters with different types + // - Complex multiple tools with array parameters + { + type: 'function', + name: 'searchProducts', + description: 'Search products', + parameters: { + type: 'object', + properties: { + query: { type: 'string', description: 'Query' }, + category: { type: 'string', enum: ['electronics', 'clothing', 'books'], description: 'Category' }, + maxPrice: { type: 'number', minimum: 0, description: 'Max price' } + }, + required: ['query'] + }, + handler: async ({ query, category, maxPrice }) => { + if (!query) throw new Error('searchProducts: invalid empty "query" field') + if (category !== 'electronics' || maxPrice !== 1000) { + return { data: '' } + } + return { + data: [ + { productId: 'laptop-1', price: '$300' }, + { productId: 'laptop-2', price: '$550' }, + { productId: 'laptop-3', price: '$800' }, + ] + } + }, + }, + // Test handled by this function: + // - Part of Complex multiple tools with array parameters test + { + type: 'function', + name: 'addToCart', + description: 'Add items to cart', + parameters: { + type: 'object', + properties: { + items: { + type: 'array', + items: { + type: 'object', + properties: { + productId: { type: 'string', description: 'Product ID' }, + quantity: { type: 'integer', minimum: 1, description: 'Quantity' } + }, + required: ['productId', 'quantity'] + } + } + }, + required: ['items'] + } + }, + // Test handled by this function: + // - Tool with boolean and optional parameters + // - Part of Complex multiple tools with nested object parameters test + { + type: 'function', + name: 'queryDB', + description: 'Query database', + parameters: { + type: 'object', + properties: { + table: { type: 'string', description: 'Table' }, + conditions: { + type: 'object', + properties: { + field: { type: 'string', description: 'Field' }, + operator: { type: 'string', enum: ['equals', 'greaterThan'], description: 'Operator' }, + value: { type: 'string', description: 'Value' } + }, + required: ['field', 'operator', 'value'] + }, + limit: { type: 'integer', minimum: 1, default: 10, description: 'Limit' }, + includeMetadata: { type: 'boolean', default: false, description: 'Include metadata' } + }, + required: ['table', 'conditions'] + } + } + + ] + + const laptopPrompt = 'Search laptops under $1000 and add 2 items above $500 to a cart. Also, query users table age > 25 limit 50 with metadata.' + + const toolsFirstQuery1 = [ + systemMessageAmbiguous, + ...tools1, + { + role: 'user', + content: laptopPrompt, + } + ] + // { toolCallId: call.id, result } + const toolsFirstQuery1_1 = [ + ...toolsFirstQuery1, + { + role: 'user', + content: 'Thank you!', + } + ] + const toolsLastQuery1 = [ + systemMessageAmbiguous, + { + role: 'user', + content: laptopPrompt, + }, + ...tools1 + ] + + const tools2 = [ + // Test handled by this function: + // - Math/computation tool + { + type: 'function', + name: 'calculate', + description: 'Calculate math', + parameters: { + type: 'object', + properties: { + expression: { type: 'string', description: 'Expression' }, + precision: { type: 'integer', minimum: 0, maximum: 10, default: 2, description: 'Precision' } + }, + required: ['expression'] + } + }, + // Test handled by this function: + // - Invalid/ambiguous query + { + type: 'function', + name: 'calculateDistance', + description: 'Calculate distance between two coordinates', + parameters: { + type: 'object', + properties: { + lat1: { type: 'number', description: 'Latitude of point 1' }, + lon1: { type: 'number', description: 'Longitude of point 1' }, + lat2: { type: 'number', description: 'Latitude of point 2' }, + lon2: { type: 'number', description: 'Longitude of point 2' } + }, + required: ['lat1', 'lon1', 'lat2', 'lon2'] + } + } + ] + + const toolsFirstQuery2 = [ + systemMessageAmbiguous, + ...tools2, + { + role: 'user', + content: 'calculate 156 * 23 precision 0. Also, How far is here from there?' + } + ] + + const toolsLastQuery2 = [ + systemMessageAmbiguous, + { + role: 'user', + content: 'calculate 156 * 23 precision 0. Also, How far is here from there?' + }, + ...tools2 + ] + + const tools3 = [ + // Test handled by this function: + // - Part of conversation context tool test + { + type: 'function', + name: 'getWeather', + description: 'Get weather forecast for a city', + parameters: { + type: 'object', + properties: { + city: { type: 'string', description: 'City name' }, + date: { type: 'string', description: 'Date in YYYY-MM-DD' } + }, + required: ['city', 'date'] + } + }, + // Test handled by this function: + // - Part of conversation context tool test + { + type: 'function', + name: 'createCalendarEvent', + description: 'Create a calendar event', + parameters: { + type: 'object', + properties: { + title: { type: 'string', description: 'Event title' }, + date: { type: 'string', description: 'Event date (YYYY-MM-DD)' }, + time: { type: 'string', description: 'Start time (HH:MM)' }, + duration: { type: 'integer', description: 'Duration in minutes' } + }, + required: ['title', 'date'] + } + } + ] + + const toolsFirstQuery3 = [ + { + role: 'system', + content: 'You are a personal assistant.' + }, + ...tools3, + { + role: 'user', + content: 'What is the weather in Seattle on April 10th?' + }, + { + role: 'assistant', + content: 'Let me check that for you. Do you need hourly or just daily?' + }, + { + role: 'user', + content: 'Daily is fine. Also, schedule a team meeting on April 10th at 2 PM for 60 minutes.' + } + ] + + const toolsLastQuery3 = [ + { + role: 'system', + content: 'You are a personal assistant.' + }, + { + role: 'user', + content: 'What is the weather in Seattle on April 10th?' + }, + { + role: 'assistant', + content: 'Let me check that for you. Do you need hourly or just daily?' + }, + { + role: 'user', + content: 'Daily is fine. Also, schedule a team meeting on April 10th at 2 PM for 60 minutes.' + }, + ...tools3 + ] + + // 5. Running tool calling queries + const queries = [ + { name: 'Query 1 (tools first): Complex tool calling with multiple parameters', prompt: toolsFirstQuery1 }, + { name: 'Query 1 (tools first-thanks): Complex tool calling with multiple parameters', prompt: toolsFirstQuery1_1 }, + // { name: 'Query 1 (tools last): Complex tool calling with multiple parameters', prompt: toolsLastQuery1 }, + // { name: 'Query 2 (tools first): Math calculation and ambiguous query', prompt: toolsFirstQuery2 }, + // { name: 'Query 2 (tools last): Math calculation and ambiguous query', prompt: toolsLastQuery2 }, + // { name: 'Query 3 (tools first): Conversation context with tools', prompt: toolsFirstQuery3 }, + // { name: 'Query 3 (tools last): Conversation context with tools', prompt: toolsLastQuery3 } + ] + + const toolCallResults = [] + for (const query of queries) { + const result = await runQuery(model, query) + toolCallResults.push(result) + } + + // Print all tool calls together at the end + printToolCallSummary(toolCallResults) + } catch (error) { + const errorMessage = error?.message || error?.toString() || String(error) + console.error('Error occurred:', errorMessage) + console.error('Error details:', error) + } finally { + // 6. Cleaning up resources + await store.close() + await hdDL.close() + await model.unload() + releaseLogger() + } +} + +main().catch(error => { + console.error('Fatal error in main function:', { + error: error.message, + stack: error.stack, + timestamp: new Date().toISOString() + }) + process.exit(1) +}) + diff --git a/packages/qvac-sdk/examples/llamacpp-dynamic-tools.ts b/packages/qvac-sdk/examples/llamacpp-dynamic-tools.ts index 00a74e35bc..9cd6bb2526 100644 --- a/packages/qvac-sdk/examples/llamacpp-dynamic-tools.ts +++ b/packages/qvac-sdk/examples/llamacpp-dynamic-tools.ts @@ -169,7 +169,7 @@ async function runTest({ kvCache, toolVariants }) { try { // Load model from provided file path with tools support enabled const modelId = await loadModel({ - modelSrc: QWEN_3_1_7B_INST_Q4, + modelSrc: 'dynamic-tools_Qwen3-1.7B-Q4_0.gguf',// QWEN_3_1_7B_INST_Q4, modelType: "llm", modelConfig: { ctx_size: 4096, From b17011de1fff0af9d51769712d1b9d504c98c518 Mon Sep 17 00:00:00 2001 From: Mikhail Sotnikov Date: Mon, 2 Mar 2026 16:32:44 +0300 Subject: [PATCH 09/11] (draft) sdk server: completion dynamic tools option --- .../handlers/completion-stream.ts | 25 ++++++++++++++++--- .../server/bare/registry/model-registry.ts | 1 + .../qvac-sdk/server/utils/tool-integration.ts | 7 ++++++ 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/packages/qvac-sdk/server/bare/addons/llamacpp-completion/handlers/completion-stream.ts b/packages/qvac-sdk/server/bare/addons/llamacpp-completion/handlers/completion-stream.ts index 793d33bc44..ef69e511e6 100644 --- a/packages/qvac-sdk/server/bare/addons/llamacpp-completion/handlers/completion-stream.ts +++ b/packages/qvac-sdk/server/bare/addons/llamacpp-completion/handlers/completion-stream.ts @@ -29,6 +29,7 @@ import { type AnyModel, } from "@/server/bare/registry/model-registry"; import { + appendToolsToHistory, checkForToolEvents, insertToolsIntoHistory, setupToolGrammar, @@ -160,13 +161,17 @@ function prepareMessagesForCache( content: string; attachments?: { path: string }[] | undefined; }[], + tools?: Tool[] ): ChatHistory[] { + const transformedTools = tools ? transformMessages(tools) : []; if (cacheExists && history.length > 0) { const lastMessage = history[history.length - 1]; const lastTransformedMessages = transformMessage(lastMessage!); + return [ { role: "session", content: cachePathToUse }, ...lastTransformedMessages, + ...transformedTools, ]; } @@ -177,13 +182,14 @@ function prepareMessagesForCache( return [ { role: "session", content: cachePathToUse }, ...transformedHistoryWithoutSystem, + ...transformedTools, ]; } async function* processModelResponse( model: AnyModel, messagesToSend: ChatHistory[], - shouldSaveCache: boolean, + shouldSaveCache: boolean, // TODO: start here tools?: Tool[], ): AsyncGenerator< { token: string; toolCallEvent?: ToolCallEvent }, @@ -250,6 +256,10 @@ export async function* completion( const modelConfig = getModelConfig(modelId); const toolsEnabled = (modelConfig as { tools?: boolean }).tools === true; + // TODO: dynamicTools as a "Tool[]" param + const dynamicTools = toolsEnabled && ( + (modelConfig as { dynamicTools?: boolean }).dynamicTools === true + ); let historyWithTools: Array< | { @@ -261,7 +271,11 @@ export async function* completion( > = history; if (tools && tools.length > 0 && toolsEnabled) { - historyWithTools = insertToolsIntoHistory(history, tools); + if (dynamicTools) { + historyWithTools = appendToolsToHistory(history, tools) + } else { + historyWithTools = insertToolsIntoHistory(history, tools); + } setupToolGrammar(modelConfig as Record, tools); } @@ -291,7 +305,7 @@ export async function* completion( cachePathToUse, systemPromptToUse, kvCache, - tools && toolsEnabled ? tools : undefined, + (tools && toolsEnabled && !dynamicTools) ? tools : undefined, ); markCacheInitialized(modelId, configHash, kvCache); cacheExists = true; @@ -301,6 +315,7 @@ export async function* completion( cachePathToUse, cacheExists, history, + (tools && toolsEnabled && dynamicTools) ? tools : undefined, ); logMessagesToAddon(messagesToSend, "PROMPT_SEND"); @@ -338,7 +353,7 @@ export async function* completion( cachePathToUse, systemPromptToUse, "auto", - tools && toolsEnabled ? tools : undefined, + undefined, //tools && toolsEnabled ? tools : undefined, ); markCacheInitialized(modelId, configHash, currentCacheInfo.cacheKey); cacheExists = true; @@ -348,6 +363,8 @@ export async function* completion( cachePathToUse, cacheExists, history, + // (tools && toolsEnabled && dynamicTools) ? tools : undefined, + tools, ); logMessagesToAddon(messagesToSend, "PROMPT_SEND"); diff --git a/packages/qvac-sdk/server/bare/registry/model-registry.ts b/packages/qvac-sdk/server/bare/registry/model-registry.ts index 3e9c0d187b..21aa6ea3e2 100644 --- a/packages/qvac-sdk/server/bare/registry/model-registry.ts +++ b/packages/qvac-sdk/server/bare/registry/model-registry.ts @@ -81,6 +81,7 @@ export function registerModel( `Delegated model registered: ${id} -> topic: ${topic}, provider: ${providerPublicKey}, timeout: ${timeout}ms`, ); } else { + // TODO: modelRegistry.set - "config.tools"? modelRegistry.set(id, { id, isDelegated: false, diff --git a/packages/qvac-sdk/server/utils/tool-integration.ts b/packages/qvac-sdk/server/utils/tool-integration.ts index 905a6c44bc..96b2a633e3 100644 --- a/packages/qvac-sdk/server/utils/tool-integration.ts +++ b/packages/qvac-sdk/server/utils/tool-integration.ts @@ -27,6 +27,13 @@ export function insertToolsIntoHistory( return [...tools, ...history]; } +export function appendToolsToHistory( + history: HistoryMessage[], + tools: Tool[], +): Array { + return [...history, ...tools]; +} + export function setupToolGrammar( modelConfig: Record, tools: Tool[], From 933c96e6fcd94d482501504045d85e23e0e5ade6 Mon Sep 17 00:00:00 2001 From: Mikhail Sotnikov Date: Tue, 3 Mar 2026 00:08:40 +0300 Subject: [PATCH 10/11] (internal) llamaccp-llm: tools tests imp with cache, verbose --- .../addon/src/utils/ChatTemplateUtils.cpp | 2 ++ .../examples/dynamicToolCalling.js | 13 +++++++++---- packages/qvac-lib-infer-llamacpp-llm/index.js | 2 +- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/packages/qvac-lib-infer-llamacpp-llm/addon/src/utils/ChatTemplateUtils.cpp b/packages/qvac-lib-infer-llamacpp-llm/addon/src/utils/ChatTemplateUtils.cpp index 952b49ed53..72a75aa439 100644 --- a/packages/qvac-lib-infer-llamacpp-llm/addon/src/utils/ChatTemplateUtils.cpp +++ b/packages/qvac-lib-infer-llamacpp-llm/addon/src/utils/ChatTemplateUtils.cpp @@ -68,6 +68,8 @@ std::string getChatTemplateForModel( // For Qwen3 models, use the fixed template if (isQwen3Model(model)) { + QLOG_IF( + Priority::ERROR, "[ChatTemplateUtils] Using CHANGED Qwen3 template\n"); return getFixedQwen3Template(); } diff --git a/packages/qvac-lib-infer-llamacpp-llm/examples/dynamicToolCalling.js b/packages/qvac-lib-infer-llamacpp-llm/examples/dynamicToolCalling.js index a66f524d22..53f3485390 100644 --- a/packages/qvac-lib-infer-llamacpp-llm/examples/dynamicToolCalling.js +++ b/packages/qvac-lib-infer-llamacpp-llm/examples/dynamicToolCalling.js @@ -109,14 +109,18 @@ async function main () { opts: { stats: true }, logger: console, modelName: 'Qwen3-1.7B-Q4_0.gguf', - diskPath: './models' + diskPath: './models', + modelConfig: { + kvCache: true, + } } const config = { device: 'gpu', gpu_layers: '999', - ctx_size: '2048', - tools: 'true' + ctx_size: '4096', + verbosity: '3', + tools: 'true', } // 3. Loading model @@ -228,7 +232,8 @@ async function main () { ] - const laptopPrompt = 'Search laptops under $1000 and add 2 items above $500 to a cart. Also, query users table age > 25 limit 50 with metadata.' + const laptopPrompt = 'Search laptops under $1000 and then, query users table age > 25 limit 50 with metadata. After if there are any users older than 25, add the cheapest found prev laptop to the cart' + // const laptopPrompt = 'Search laptops under $1000 and then - from those returned by the search, need to add 2 items, cheapest but in $500-$1000 range, to a cart. Also, query users table age > 25 limit 50 with metadata.' const toolsFirstQuery1 = [ systemMessageAmbiguous, diff --git a/packages/qvac-lib-infer-llamacpp-llm/index.js b/packages/qvac-lib-infer-llamacpp-llm/index.js index 278752e52f..5efd48fe4b 100644 --- a/packages/qvac-lib-infer-llamacpp-llm/index.js +++ b/packages/qvac-lib-infer-llamacpp-llm/index.js @@ -105,7 +105,7 @@ class LlmLlamacpp extends BaseInference { * @returns {Addon} The instantiated addon interface */ _createAddon (configurationParams) { - this.logger.info( + this.logger.error( 'Creating Llama interface with configuration:', configurationParams ) From 2c2b518d85d217072553b7e17a2eb3f3c083a489 Mon Sep 17 00:00:00 2001 From: Mikhail Sotnikov Date: Tue, 3 Mar 2026 02:25:25 +0300 Subject: [PATCH 11/11] (internal) llamacpp-llm: cache dynamic tools impl --- .../src/model-interface/CacheManager.cpp | 40 + .../addon/src/model-interface/LlamaModel.cpp | 4 +- .../addon/src/model-interface/LlmContext.hpp | 8 + .../src/model-interface/MtmdLlmContext.cpp | 2 + .../src/model-interface/MtmdLlmContext.hpp | 7 + .../src/model-interface/TextLlmContext.cpp | 22 + .../src/model-interface/TextLlmContext.hpp | 18 + .../test/unit/CMakeLists.txt | 1 + .../test/unit/test_tool_token_cache.cpp | 761 ++++++++++++++++++ 9 files changed, 862 insertions(+), 1 deletion(-) create mode 100644 packages/qvac-lib-infer-llamacpp-llm/test/unit/test_tool_token_cache.cpp diff --git a/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/CacheManager.cpp b/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/CacheManager.cpp index ceab3c95ed..33b8d86f8e 100644 --- a/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/CacheManager.cpp +++ b/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/CacheManager.cpp @@ -177,6 +177,26 @@ bool CacheManager::loadCache() { "%s: attempting to load saved session from '%s'\n", __func__, sessionPath_.c_str())); + + // Remove tool tokens from KV cache before saving + llama_pos toolTokenCount = llmContext_->getLastToolTokenCount(); + if (toolTokenCount > 0) { + auto* mem = llama_get_memory(ctx); + llama_pos currentPast = llmContext_->getNPast(); + llama_pos newNPast = currentPast - toolTokenCount; + + if (newNPast > 0) { + llama_memory_seq_rm(mem, -1, newNPast, -1); + llmContext_->setNPast(newNPast); + + QLOG_IF( + Priority::DEBUG, + string_format( + "%s: removed %d tool tokens before saving cache\n", + __func__, + toolTokenCount)); + } + } if (!isFileInitialized(sessionPath_)) { QLOG_IF( Priority::DEBUG, @@ -244,6 +264,26 @@ void CacheManager::saveCache() { __func__, sessionPath_.c_str())); + // Remove tool tokens from KV cache before saving + llama_pos toolTokenCount = llmContext_->getLastToolTokenCount(); + if (toolTokenCount > 0) { + auto* mem = llama_get_memory(ctx); + llama_pos currentPast = llmContext_->getNPast(); + llama_pos newNPast = currentPast - toolTokenCount; + + if (newNPast > 0) { + llama_memory_seq_rm(mem, -1, newNPast, -1); + llmContext_->setNPast(newNPast); + + QLOG_IF( + Priority::DEBUG, + string_format( + "%s: removed %d tool tokens before saving cache\n", + __func__, + toolTokenCount)); + } + } + llama_token sessionTokens[2] = { static_cast(llmContext_->getNPast()), static_cast(llmContext_->getFirstMsgTokens())}; diff --git a/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/LlamaModel.cpp b/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/LlamaModel.cpp index 278298c381..a455a90002 100644 --- a/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/LlamaModel.cpp +++ b/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/LlamaModel.cpp @@ -667,7 +667,9 @@ std::unique_ptr LlamaModel::CreateContext( return std::make_unique(params, std::move(llamaInit)); } isTextLlm = true; - return std::make_unique(params, std::move(llamaInit)); + auto ctx = std::make_unique(params, std::move(llamaInit)); + ctx->setCalculateToolTokenCount(params.use_jinja); + return ctx; } bool LlamaModel::LoadMedia(const LlamaModel::Input& input) { diff --git a/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/LlmContext.hpp b/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/LlmContext.hpp index c997566aa0..75a2571582 100644 --- a/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/LlmContext.hpp +++ b/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/LlmContext.hpp @@ -174,6 +174,14 @@ class LlmContext { // NOLINT(cppcoreguidelines-special-member-functions) */ virtual llama_pos removeLastNTokens(llama_pos count) = 0; + /** + * Get the number of tool tokens from the last user message. + * Used for cache management when tools are appended after user messages. + * + * @return - the number of tool tokens. + */ + [[nodiscard]] virtual llama_pos getLastToolTokenCount() const = 0; + /** * The reset media method. It resets the media. * diff --git a/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/MtmdLlmContext.cpp b/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/MtmdLlmContext.cpp index 267531d2f4..582e436f5e 100644 --- a/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/MtmdLlmContext.cpp +++ b/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/MtmdLlmContext.cpp @@ -421,6 +421,8 @@ void MtmdLlmContext::setNPast(llama_pos nPast) { this->n_past = nPast; } llama_pos MtmdLlmContext::getFirstMsgTokens() const { return firstMsgTokens; } +llama_pos MtmdLlmContext::getLastToolTokenCount() const { return 0; } + void MtmdLlmContext::setFirstMsgTokens(llama_pos firstMsgTokens) { this->firstMsgTokens = firstMsgTokens; } diff --git a/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/MtmdLlmContext.hpp b/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/MtmdLlmContext.hpp index 4831373dee..322c289bfa 100644 --- a/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/MtmdLlmContext.hpp +++ b/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/MtmdLlmContext.hpp @@ -102,6 +102,13 @@ class MtmdLlmContext: public LlmContext { */ void setFirstMsgTokens(llama_pos firstMsgTokens) override; + /** + * The get last tool token count method. It returns 0 for multimodal context. + * + * @return - 0 (not applicable for multimodal). + */ + [[nodiscard]] llama_pos getLastToolTokenCount() const override; + /** * The set n_discarded method. It sets the n_discarded. * diff --git a/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/TextLlmContext.cpp b/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/TextLlmContext.cpp index db76071c51..18a273a08f 100644 --- a/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/TextLlmContext.cpp +++ b/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/TextLlmContext.cpp @@ -228,6 +228,22 @@ void TextLlmContext::tokenizeChat( AddonID, toString(EmptyTokenizedInput), errorMsg); } + // Calculate tool token count by tokenizing without tools + lastToolTokenCount_ = 0; + if (calculateToolTokenCount_ && !tools.empty()) { + common_chat_templates_inputs inputsNoTools = inputs; + inputsNoTools.tools = {}; + std::string promptNoTools = getPrompt(tmpls.get(), inputsNoTools); + std::vector tokensNoTools = + common_tokenize(lctx, promptNoTools, addSpecial, true); + + if (!tokensNoTools.empty() && !inputTokens.empty() && + inputTokens.size() > tokensNoTools.size()) { + lastToolTokenCount_ = static_cast( + inputTokens.size() - tokensNoTools.size()); + } + } + // Encode the input if model has encoder if (llama_model_has_encoder(model) && n_past == 0 && !isCacheLoaded) { int encInputSize = static_cast(inputTokens.size()); @@ -508,6 +524,12 @@ void TextLlmContext::setNPast(llama_pos nPast) { this->n_past = nPast; } llama_pos TextLlmContext::getFirstMsgTokens() const { return firstMsgTokens; } +llama_pos TextLlmContext::getLastToolTokenCount() const { return lastToolTokenCount_; } + +void TextLlmContext::setCalculateToolTokenCount(bool enabled) { + calculateToolTokenCount_ = enabled; +} + void TextLlmContext::setFirstMsgTokens(llama_pos firstMsgTokens) { this->firstMsgTokens = firstMsgTokens; } diff --git a/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/TextLlmContext.hpp b/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/TextLlmContext.hpp index d4c004c348..7a515f821c 100644 --- a/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/TextLlmContext.hpp +++ b/packages/qvac-lib-infer-llamacpp-llm/addon/src/model-interface/TextLlmContext.hpp @@ -94,6 +94,22 @@ class TextLlmContext: public LlmContext { * @param first_msg_tokens - the first msg tokens. */ void setFirstMsgTokens(llama_pos firstMsgTokens) override; + + /** + * The get last tool token count method. It returns the number of tool tokens + * from the last user message. + * + * @return - the number of tool tokens. + */ + [[nodiscard]] llama_pos getLastToolTokenCount() const override; + + /** + * The set calculate tool token count method. It enables/disables + * tool token count calculation for cache management. + * + * @param enabled - whether to calculate tool token count. + */ + void setCalculateToolTokenCount(bool enabled); /** * The set n_discarded method. It sets the n_discarded. * @@ -157,6 +173,8 @@ class TextLlmContext: public LlmContext { llama_pos n_past = 0; // NOLINT(readability-identifier-naming) llama_pos n_discarded = 0; // NOLINT(readability-identifier-naming) llama_pos firstMsgTokens = 0; // NOLINT(readability-identifier-naming) + llama_pos lastToolTokenCount_ = 0; // NOLINT(readability-identifier-naming) + bool calculateToolTokenCount_ = true; // NOLINT(readability-identifier-naming) ThreadPoolPtr threadpool; // NOLINT(readability-identifier-naming) ThreadPoolPtr threadpool_batch; // NOLINT(readability-identifier-naming) diff --git a/packages/qvac-lib-infer-llamacpp-llm/test/unit/CMakeLists.txt b/packages/qvac-lib-infer-llamacpp-llm/test/unit/CMakeLists.txt index 8226cf3537..ef4c4ae83e 100644 --- a/packages/qvac-lib-infer-llamacpp-llm/test/unit/CMakeLists.txt +++ b/packages/qvac-lib-infer-llamacpp-llm/test/unit/CMakeLists.txt @@ -13,6 +13,7 @@ add_executable( # Implemented tests test_llama_model.cpp test_cache_management.cpp + test_tool_token_cache.cpp test_text_llm_context.cpp # Backend selection tests test_backend_selection.cpp diff --git a/packages/qvac-lib-infer-llamacpp-llm/test/unit/test_tool_token_cache.cpp b/packages/qvac-lib-infer-llamacpp-llm/test/unit/test_tool_token_cache.cpp new file mode 100644 index 0000000000..07235baedc --- /dev/null +++ b/packages/qvac-lib-infer-llamacpp-llm/test/unit/test_tool_token_cache.cpp @@ -0,0 +1,761 @@ +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "model-interface/LlamaModel.hpp" +#include "test_common.hpp" + +namespace fs = std::filesystem; + +namespace { +double getStatValue( + const qvac_lib_inference_addon_cpp::RuntimeStats& stats, + const std::string& key) { + for (const auto& stat : stats) { + if (stat.first == key) { + return std::visit( + [](const auto& value) -> double { + if constexpr (std::is_same_v< + std::decay_t, + double>) { + return value; + } else { + return static_cast(value); + } + }, + stat.second); + } + } + return 0.0; +} +} // namespace + +class ToolTokenCacheTest : public ::testing::Test { +protected: + void SetUp() override { + config_files["device"] = test_common::getTestDevice(); + config_files["ctx_size"] = "2048"; + config_files["gpu_layers"] = test_common::getTestGpuLayers(); + config_files["n_predict"] = "10"; + + fs::path basePath; + if (fs::exists(fs::path{"../../../models/unit-test"})) { + basePath = fs::path{"../../../models/unit-test"}; + } else { + basePath = fs::path{"models/unit-test"}; + } + + fs::path modelPath = basePath / "Llama-3.2-1B-Instruct-Q4_0.gguf"; + if (fs::exists(modelPath)) { + test_model_path = modelPath.string(); + } else { + modelPath = basePath / "test_model.gguf"; + if (fs::exists(modelPath)) { + test_model_path = modelPath.string(); + } else { + test_model_path = "Llama-3.2-1B-Instruct-Q4_0.gguf"; + } + } + test_projection_path = ""; + + fs::path backendDir; +#ifdef TEST_BINARY_DIR + backendDir = fs::path(TEST_BINARY_DIR); +#else + backendDir = fs::current_path() / "build" / "test" / "unit"; +#endif + + config_files["backendsDir"] = backendDir.string(); + + session_with_tools_path = "test_session_with_tools.bin"; + session_after_tools_path = "test_session_after_tools.bin"; + } + + void TearDown() override { + for (const auto& session_file : + {session_with_tools_path, session_after_tools_path}) { + if (fs::exists(session_file)) { + fs::remove(session_file); + } + } + } + + bool hasValidModel() { return fs::exists(test_model_path); } + + std::unique_ptr createModel() { + if (!hasValidModel()) { + return nullptr; + } + auto model = std::make_unique( + test_model_path, test_projection_path, config_files); + model->waitForLoadInitialization(); + if (!model->isLoaded()) { + return nullptr; + } + return model; + } + + std::unordered_map config_files; + std::string test_model_path; + std::string test_projection_path; + std::string session_with_tools_path; + std::string session_after_tools_path; +}; + +TEST_F(ToolTokenCacheTest, CacheWithToolsBasic) { + if (!hasValidModel()) { + GTEST_SKIP() << "Test model not found"; + } + + auto model = createModel(); + if (!model) { + GTEST_SKIP() << "Model failed to load"; + } + + std::string inputWithTools = R"([ + {"role": "session", "content": "test_session_with_tools.bin"}, + {"role": "user", "content": "What is the weather?"}, + {"type": "function", "name": "getWeather", "description": "Get weather", "parameters": {"type": "object", "properties": {"city": {"type": "string"}}, "required": ["city"]}} + ])"; + + EXPECT_NO_THROW({ + std::string output = model->process(inputWithTools); + auto stats = model->runtimeStats(); + EXPECT_GE(stats.size(), 0); + }); + + std::string saveInput = + R"([{"role": "session", "content": "test_session_with_tools.bin"}, {"role": "session", "content": "save"}])"; + EXPECT_NO_THROW({ + std::string saveOutput = model->process(saveInput); + EXPECT_EQ(saveOutput.length(), 0); + }); + + EXPECT_TRUE(fs::exists(session_with_tools_path)); +} + +TEST_F(ToolTokenCacheTest, CachePersistenceWithTools) { + if (!hasValidModel()) { + GTEST_SKIP() << "Test model not found"; + } + + auto model1 = createModel(); + if (!model1) { + GTEST_SKIP() << "Model failed to load"; + } + + std::string inputWithTools = R"([ + {"role": "session", "content": "test_session_with_tools.bin"}, + {"role": "user", "content": "What is bitcoin? Answer briefly."}, + {"type": "function", "name": "search", "description": "Search", "parameters": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]}} + ])"; + + EXPECT_NO_THROW({ + std::string output1 = model1->process(inputWithTools); + EXPECT_GE(output1.length(), 0); + }); + + std::string saveInput = + R"([{"role": "session", "content": "test_session_with_tools.bin"}, {"role": "session", "content": "save"}])"; + EXPECT_NO_THROW({ + std::string saveOutput = model1->process(saveInput); + EXPECT_EQ(saveOutput.length(), 0); + }); + + auto statsBefore = model1->runtimeStats(); + double cacheTokensBefore = getStatValue(statsBefore, "CacheTokens"); + EXPECT_GT(cacheTokensBefore, 0.0); + + model1.reset(); + + auto model2 = createModel(); + if (!model2) { + GTEST_SKIP() << "Model failed to load"; + } + + std::string inputAfterCache = R"([ + {"role": "session", "content": "test_session_with_tools.bin"}, + {"role": "user", "content": "What did I ask about?"} + ])"; + + EXPECT_NO_THROW({ + std::string output2 = model2->process(inputAfterCache); + EXPECT_GE(output2.length(), 0); + }); + + auto statsAfter = model2->runtimeStats(); + double cacheTokensAfter = getStatValue(statsAfter, "CacheTokens"); + EXPECT_GT(cacheTokensAfter, 0.0); +} + +TEST_F(ToolTokenCacheTest, CacheWithToolsThenWithout) { + if (!hasValidModel()) { + GTEST_SKIP() << "Test model not found"; + } + + auto model = createModel(); + if (!model) { + GTEST_SKIP() << "Model failed to load"; + } + + std::string inputWithTools = R"([ + {"role": "session", "content": "test_session_with_tools.bin"}, + {"role": "user", "content": "What is bitcoin?"}, + {"type": "function", "name": "search", "description": "Search", "parameters": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]}} + ])"; + + EXPECT_NO_THROW({ + std::string output1 = model->process(inputWithTools); + EXPECT_GE(output1.length(), 0); + }); + + std::string saveInput1 = + R"([{"role": "session", "content": "test_session_with_tools.bin"}, {"role": "session", "content": "save"}])"; + EXPECT_NO_THROW({ + model->process(saveInput1); + }); + + std::string inputAfterCache = R"([ + {"role": "session", "content": "test_session_with_tools.bin"}, + {"role": "user", "content": "What is ethereum?"} + ])"; + + EXPECT_NO_THROW({ + std::string output2 = model->process(inputAfterCache); + EXPECT_GE(output2.length(), 0); + }); + + auto stats = model->runtimeStats(); + double cacheTokens = getStatValue(stats, "CacheTokens"); + EXPECT_GT(cacheTokens, 0.0); +} + +TEST_F(ToolTokenCacheTest, MultipleToolCallsInCache) { + if (!hasValidModel()) { + GTEST_SKIP() << "Test model not found"; + } + + auto model = createModel(); + if (!model) { + GTEST_SKIP() << "Model failed to load"; + } + + std::string input1 = R"([ + {"role": "session", "content": "test_session_with_tools.bin"}, + {"role": "user", "content": "Search laptops under $1000"}, + {"type": "function", "name": "searchProducts", "description": "Search products", "parameters": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]}} + ])"; + + EXPECT_NO_THROW({ + std::string output1 = model->process(input1); + EXPECT_GE(output1.length(), 0); + }); + + std::string saveInput1 = + R"([{"role": "session", "content": "test_session_with_tools.bin"}, {"role": "session", "content": "save"}])"; + EXPECT_NO_THROW({ + model->process(saveInput1); + }); + + std::string input2 = R"([ + {"role": "session", "content": "test_session_with_tools.bin"}, + {"role": "user", "content": "Now search for phones"}, + {"type": "function", "name": "searchProducts", "description": "Search products", "parameters": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]}} + ])"; + + EXPECT_NO_THROW({ + std::string output2 = model->process(input2); + EXPECT_GE(output2.length(), 0); + }); + + auto stats = model->runtimeStats(); + double cacheTokens = getStatValue(stats, "CacheTokens"); + EXPECT_GT(cacheTokens, 0.0); +} + +TEST_F(ToolTokenCacheTest, CacheResetClearsToolTokens) { + if (!hasValidModel()) { + GTEST_SKIP() << "Test model not found"; + } + + auto model = createModel(); + if (!model) { + GTEST_SKIP() << "Model failed to load"; + } + + std::string inputWithTools = R"([ + {"role": "session", "content": "test_session_with_tools.bin"}, + {"role": "user", "content": "What is bitcoin?"}, + {"type": "function", "name": "search", "description": "Search", "parameters": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]}} + ])"; + + EXPECT_NO_THROW({ + std::string output1 = model->process(inputWithTools); + EXPECT_GE(output1.length(), 0); + }); + + std::string resetInput = + R"([{"role": "session", "content": "test_session_with_tools.bin"}, {"role": "session", "content": "reset"}])"; + EXPECT_NO_THROW({ + std::string resetOutput = model->process(resetInput); + EXPECT_EQ(resetOutput.length(), 0); + }); + + auto statsAfterReset = model->runtimeStats(); + double cacheTokensAfterReset = getStatValue(statsAfterReset, "CacheTokens"); + EXPECT_EQ(cacheTokensAfterReset, 0.0); +} + +TEST_F(ToolTokenCacheTest, SaveAfterToolInference) { + if (!hasValidModel()) { + GTEST_SKIP() << "Test model not found"; + } + + auto model = createModel(); + if (!model) { + GTEST_SKIP() << "Model failed to load"; + } + + std::string inputWithTools = R"([ + {"role": "session", "content": "test_session_with_tools.bin"}, + {"role": "user", "content": "What is blockchain? Answer briefly."}, + {"type": "function", "name": "search", "description": "Search", "parameters": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]}} + ])"; + + EXPECT_NO_THROW({ + std::string output1 = model->process(inputWithTools); + EXPECT_GE(output1.length(), 0); + }); + + auto statsBeforeSave = model->runtimeStats(); + double promptTokens = getStatValue(statsBeforeSave, "promptTokens"); + double cacheTokens = getStatValue(statsBeforeSave, "CacheTokens"); + + std::string saveInput = + R"([{"role": "session", "content": "test_session_with_tools.bin"}, {"role": "session", "content": "save"}])"; + EXPECT_NO_THROW({ + std::string saveOutput = model->process(saveInput); + EXPECT_EQ(saveOutput.length(), 0); + }); + + EXPECT_TRUE(fs::exists(session_with_tools_path)); + + model.reset(); + + auto model2 = createModel(); + if (!model2) { + GTEST_SKIP() << "Model failed to load"; + } + + std::string reloadInput = R"([ + {"role": "session", "content": "test_session_with_tools.bin"}, + {"role": "user", "content": "Continue"} + ])"; + + EXPECT_NO_THROW({ + std::string output2 = model2->process(reloadInput); + EXPECT_GE(output2.length(), 0); + }); + + auto statsAfterReload = model2->runtimeStats(); + double cacheTokensAfterReload = getStatValue(statsAfterReload, "CacheTokens"); + EXPECT_GT(cacheTokensAfterReload, 0.0); +} + +TEST_F(ToolTokenCacheTest, MultipleToolsInSingleMessage) { + if (!hasValidModel()) { + GTEST_SKIP() << "Test model not found"; + } + + auto model = createModel(); + if (!model) { + GTEST_SKIP() << "Model failed to load"; + } + + std::string inputWithMultipleTools = R"([ + {"role": "session", "content": "test_session_with_tools.bin"}, + {"role": "user", "content": "Search laptops and add to cart"}, + {"type": "function", "name": "searchProducts", "description": "Search products", "parameters": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]}}, + {"type": "function", "name": "addToCart", "description": "Add items to cart", "parameters": {"type": "object", "properties": {"items": {"type": "array", "items": {"type": "string"}}}, "required": ["items"]}} + ])"; + + EXPECT_NO_THROW({ + std::string output = model->process(inputWithMultipleTools); + EXPECT_GE(output.length(), 0); + auto stats = model->runtimeStats(); + EXPECT_GE(stats.size(), 0); + }); + + std::string saveInput = + R"([{"role": "session", "content": "test_session_with_tools.bin"}, {"role": "session", "content": "save"}])"; + EXPECT_NO_THROW({ + model->process(saveInput); + }); + + EXPECT_TRUE(fs::exists(session_with_tools_path)); +} + +TEST_F(ToolTokenCacheTest, CacheLoadAfterTools) { + if (!hasValidModel()) { + GTEST_SKIP() << "Test model not found"; + } + + auto model1 = createModel(); + if (!model1) { + GTEST_SKIP() << "Model failed to load"; + } + + std::string firstInput = R"([ + {"role": "session", "content": "test_session_with_tools.bin"}, + {"role": "user", "content": "Hello"} + ])"; + + EXPECT_NO_THROW({ + std::string output1 = model1->process(firstInput); + EXPECT_GE(output1.length(), 0); + }); + + std::string saveInput1 = + R"([{"role": "session", "content": "test_session_with_tools.bin"}, {"role": "session", "content": "save"}])"; + EXPECT_NO_THROW({ + model1->process(saveInput1); + }); + + model1.reset(); + + auto model2 = createModel(); + if (!model2) { + GTEST_SKIP() << "Model failed to load"; + } + + std::string inputWithTools = R"([ + {"role": "session", "content": "test_session_with_tools.bin"}, + {"role": "user", "content": "What is ethereum?"}, + {"type": "function", "name": "search", "description": "Search", "parameters": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]}} + ])"; + + EXPECT_NO_THROW({ + std::string output2 = model2->process(inputWithTools); + EXPECT_GE(output2.length(), 0); + }); + + auto stats = model2->runtimeStats(); + double cacheTokens = getStatValue(stats, "CacheTokens"); + EXPECT_GT(cacheTokens, 0.0); +} + +TEST_F(ToolTokenCacheTest, ToolCacheEdgeCaseEmptyTools) { + if (!hasValidModel()) { + GTEST_SKIP() << "Test model not found"; + } + + auto model = createModel(); + if (!model) { + GTEST_SKIP() << "Model failed to load"; + } + + std::string inputNoTools = R"([ + {"role": "session", "content": "test_session_with_tools.bin"}, + {"role": "user", "content": "Hello, how are you?"} + ])"; + + EXPECT_NO_THROW({ + std::string output = model->process(inputNoTools); + EXPECT_GE(output.length(), 0); + auto stats = model->runtimeStats(); + EXPECT_GE(stats.size(), 0); + }); + + std::string saveInput = + R"([{"role": "session", "content": "test_session_with_tools.bin"}, {"role": "session", "content": "save"}])"; + EXPECT_NO_THROW({ + model->process(saveInput); + }); + + EXPECT_TRUE(fs::exists(session_with_tools_path)); +} + +TEST_F(ToolTokenCacheTest, CacheWithUpdatedToolsSubsequentPrompt) { + if (!hasValidModel()) { + GTEST_SKIP() << "Test model not found"; + } + + auto model = createModel(); + if (!model) { + GTEST_SKIP() << "Model failed to load"; + } + + std::string inputWithFirstTool = R"([ + {"role": "session", "content": "test_session_with_tools.bin"}, + {"role": "user", "content": "Search for laptops"}, + {"type": "function", "name": "searchProducts", "description": "Search products", "parameters": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]}} + ])"; + + EXPECT_NO_THROW({ + std::string output1 = model->process(inputWithFirstTool); + EXPECT_GE(output1.length(), 0); + }); + + auto stats1 = model->runtimeStats(); + double cacheTokens1 = getStatValue(stats1, "CacheTokens"); + double promptTokens1 = getStatValue(stats1, "promptTokens"); + EXPECT_GT(cacheTokens1, 0.0) << "Cache should have tokens after first prompt"; + EXPECT_GT(promptTokens1, 0.0) << "Should have prompt tokens"; + + std::string saveInput1 = + R"([{"role": "session", "content": "test_session_with_tools.bin"}, {"role": "session", "content": "save"}])"; + EXPECT_NO_THROW({ + model->process(saveInput1); + }); + + std::string inputWithUpdatedTool = R"([ + {"role": "session", "content": "test_session_with_tools.bin"}, + {"role": "user", "content": "Now search for phones"}, + {"type": "function", "name": "searchProducts", "description": "Search products", "parameters": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]}}, + {"type": "function", "name": "getPrice", "description": "Get price", "parameters": {"type": "object", "properties": {"item": {"type": "string"}}, "required": ["item"]}} + ])"; + + EXPECT_NO_THROW({ + std::string output2 = model->process(inputWithUpdatedTool); + EXPECT_GE(output2.length(), 0); + }); + + auto stats2 = model->runtimeStats(); + double cacheTokens2 = getStatValue(stats2, "CacheTokens"); + double promptTokens2 = getStatValue(stats2, "promptTokens"); + EXPECT_GT(cacheTokens2, 0.0) << "Cache should have tokens after second prompt with updated tools"; + EXPECT_GT(promptTokens2, 0.0) << "Should have prompt tokens"; +} + +TEST_F(ToolTokenCacheTest, CachePersistsWithDifferentToolsAfterSave) { + if (!hasValidModel()) { + GTEST_SKIP() << "Test model not found"; + } + + auto model1 = createModel(); + if (!model1) { + GTEST_SKIP() << "Model failed to load"; + } + + std::string inputFirst = R"([ + {"role": "session", "content": "test_session_with_tools.bin"}, + {"role": "user", "content": "Search for books"}, + {"type": "function", "name": "search", "description": "Search", "parameters": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]}} + ])"; + + EXPECT_NO_THROW({ + model1->process(inputFirst); + }); + + auto stats1 = model1->runtimeStats(); + double cacheTokens1 = getStatValue(stats1, "CacheTokens"); + EXPECT_GT(cacheTokens1, 0.0); + + std::string saveInput = + R"([{"role": "session", "content": "test_session_with_tools.bin"}, {"role": "session", "content": "save"}])"; + EXPECT_NO_THROW({ + model1->process(saveInput); + }); + + model1.reset(); + + auto model2 = createModel(); + if (!model2) { + GTEST_SKIP() << "Model failed to load"; + } + + std::string inputWithDifferentTools = R"([ + {"role": "session", "content": "test_session_with_tools.bin"}, + {"role": "user", "content": "Now find movies"}, + {"type": "function", "name": "searchMovies", "description": "Search movies", "parameters": {"type": "object", "properties": {"title": {"type": "string"}}, "required": ["title"]}} + ])"; + + EXPECT_NO_THROW({ + std::string output2 = model2->process(inputWithDifferentTools); + EXPECT_GE(output2.length(), 0); + }); + + auto statsAfter = model2->runtimeStats(); + double cacheTokensAfter = getStatValue(statsAfter, "CacheTokens"); + EXPECT_GT(cacheTokensAfter, 0.0) << "Cache should have tokens after loading with different tools"; +} + +TEST_F(ToolTokenCacheTest, CacheTokensIncreaseWithMultiplePrompts) { + if (!hasValidModel()) { + GTEST_SKIP() << "Test model not found"; + } + + auto model = createModel(); + if (!model) { + GTEST_SKIP() << "Model failed to load"; + } + + std::vector prompts = { + R"([{"role": "session", "content": "test_session_with_tools.bin"}, {"role": "user", "content": "Hello"}, {"type": "function", "name": "search", "description": "Search", "parameters": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]}}])", + R"([{"role": "session", "content": "test_session_with_tools.bin"}, {"role": "user", "content": "How are you?"}])", + R"([{"role": "session", "content": "test_session_with_tools.bin"}, {"role": "user", "content": "What's the weather?"}])" + }; + + double prevCacheTokens = 0.0; + for (size_t i = 0; i < prompts.size(); ++i) { + EXPECT_NO_THROW({ + std::string output = model->process(prompts[i]); + EXPECT_GE(output.length(), 0); + }); + + auto stats = model->runtimeStats(); + double cacheTokens = getStatValue(stats, "CacheTokens"); + EXPECT_GT(cacheTokens, 0.0) << "Cache should have tokens after prompt " << i + 1; + } +} + +TEST_F(ToolTokenCacheTest, CacheWithNoToolsThenWithTools) { + if (!hasValidModel()) { + GTEST_SKIP() << "Test model not found"; + } + + auto model = createModel(); + if (!model) { + GTEST_SKIP() << "Model failed to load"; + } + + std::string inputNoTools = R"([ + {"role": "session", "content": "test_session_with_tools.bin"}, + {"role": "user", "content": "Hello, what's the weather?"} + ])"; + + EXPECT_NO_THROW({ + std::string output1 = model->process(inputNoTools); + EXPECT_GE(output1.length(), 0); + }); + + auto stats1 = model->runtimeStats(); + double cacheTokens1 = getStatValue(stats1, "CacheTokens"); + + std::string inputWithTools = R"([ + {"role": "session", "content": "test_session_with_tools.bin"}, + {"role": "user", "content": "Now get weather for New York"}, + {"type": "function", "name": "getWeather", "description": "Get weather", "parameters": {"type": "object", "properties": {"city": {"type": "string"}}, "required": ["city"]}} + ])"; + + EXPECT_NO_THROW({ + std::string output2 = model->process(inputWithTools); + EXPECT_GE(output2.length(), 0); + }); + + auto stats2 = model->runtimeStats(); + double cacheTokens2 = getStatValue(stats2, "CacheTokens"); + EXPECT_GT(cacheTokens2, 0.0) << "Cache should have tokens after adding tools"; +} + +TEST_F(ToolTokenCacheTest, CacheTokenCountVerification) { + if (!hasValidModel()) { + GTEST_SKIP() << "Test model not found"; + } + + auto model = createModel(); + if (!model) { + GTEST_SKIP() << "Model failed to load"; + } + + std::string inputWithTools = R"([ + {"role": "user", "content": "What is the weather?"}, + {"type": "function", "name": "getWeather", "description": "Get weather", "parameters": {"type": "object", "properties": {"city": {"type": "string"}}, "required": ["city"]}} + ])"; + + EXPECT_NO_THROW({ + std::string output = model->process(inputWithTools); + EXPECT_GE(output.length(), 0); + }); + + auto stats = model->runtimeStats(); + double cacheTokens = getStatValue(stats, "CacheTokens"); + double promptTokens = getStatValue(stats, "promptTokens"); + double evalTokens = getStatValue(stats, "evalTokens"); + + std::cout << "=== First prompt with tools ===" << std::endl; + std::cout << "CacheTokens: " << cacheTokens << std::endl; + std::cout << "promptTokens: " << promptTokens << std::endl; + std::cout << "evalTokens: " << evalTokens << std::endl; + std::string saveInput = + R"([{"role": "session", "content": "test_session_with_tools.bin"}, {"role": "session", "content": "save"}])"; + EXPECT_NO_THROW({ + model->process(saveInput); + }); + + std::string inputAfter = R"([ + {"role": "session", "content": "test_session_with_tools.bin"}, + {"role": "user", "content": "And for Paris?"} + ])"; + + EXPECT_NO_THROW({ + std::string output = model->process(inputAfter); + EXPECT_GE(output.length(), 0); + }); + + auto statsAfter = model->runtimeStats(); + double cacheTokensAfter = getStatValue(statsAfter, "CacheTokens"); + double promptTokensAfter = getStatValue(statsAfter, "promptTokens"); + double evalTokensAfter = getStatValue(statsAfter, "evalTokens"); + + std::cout << "=== Second prompt (after cache load) ===" << std::endl; + std::cout << "CacheTokens: " << cacheTokensAfter << std::endl; + std::cout << "promptTokens: " << promptTokensAfter << std::endl; + std::cout << "evalTokens: " << evalTokensAfter << std::endl; + EXPECT_EQ(cacheTokens, 0.0) << "First prompt (no session) has no cache"; + EXPECT_GT(promptTokens, 0.0) << "First prompt should have prompt tokens"; + EXPECT_GT(cacheTokensAfter, 0.0) << "Second prompt should load from cache"; + EXPECT_GT(promptTokensAfter, 0.0) << "Second prompt should have new tokens too"; +} + +TEST_F(ToolTokenCacheTest, CacheWithToolsResetBehavior) { + if (!hasValidModel()) { + GTEST_SKIP() << "Test model not found"; + } + + auto model = createModel(); + if (!model) { + GTEST_SKIP() << "Model failed to load"; + } + + std::string inputWithTools = R"([ + {"role": "user", "content": "Search for laptops"}, + {"type": "function", "name": "searchProducts", "description": "Search products", "parameters": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]}} + ])"; + + EXPECT_NO_THROW({ + model->process(inputWithTools); + }); + + auto statsBefore = model->runtimeStats(); + double cacheBefore = getStatValue(statsBefore, "CacheTokens"); + std::cout << "=== After first tool prompt ===" << std::endl; + std::cout << "CacheTokens: " << cacheBefore << std::endl; + + std::string saveInput = + R"([{"role": "session", "content": "test_session_with_tools.bin"}, {"role": "session", "content": "save"}])"; + model->process(saveInput); + + std::string inputSameTools = R"([ + {"role": "session", "content": "test_session_with_tools.bin"}, + {"role": "user", "content": "Search for phones"}, + {"type": "function", "name": "searchProducts", "description": "Search products", "parameters": {"type": "object", "properties": {"query": {"type": "string"}}, "required": ["query"]}} + ])"; + + EXPECT_NO_THROW({ + model->process(inputSameTools); + }); + + auto statsSameTools = model->runtimeStats(); + double cacheSameTools = getStatValue(statsSameTools, "CacheTokens"); + std::cout << "=== After second prompt with same tools ===" << std::endl; + std::cout << "CacheTokens: " << cacheSameTools << std::endl; + EXPECT_GT(cacheSameTools, 0.0); +}