diff --git a/packages/sdk/bun.lock b/packages/sdk/bun.lock
index 15c5d13f88..91cef8802c 100644
--- a/packages/sdk/bun.lock
+++ b/packages/sdk/bun.lock
@@ -1,6 +1,5 @@
 {
   "lockfileVersion": 1,
-  "configVersion": 1,
   "workspaces": {
     "": {
       "name": "@qvac/sdk",
@@ -10,7 +9,7 @@
         "@qvac/embed-llamacpp": "^0.14.0",
         "@qvac/error": "^0.1.1",
         "@qvac/langdetect-text": "^0.1.2",
-        "@qvac/llm-llamacpp": "^0.16.0",
+        "@qvac/llm-llamacpp": "^0.17.0",
         "@qvac/logging": "^0.1.0",
         "@qvac/ocr-onnx": "^0.4.2",
         "@qvac/rag": "^0.4.4",
@@ -488,7 +487,7 @@
 
     "@qvac/langdetect-text": ["@qvac/langdetect-text@0.1.2", "", { "dependencies": { "tinyld": "1.3.4" } }, "sha512-V6ntqPNBmz+49eIaY8jYdpgyx8MzSk9/bNp9ibSn+Xwx1D/8Mca8RNfn7/gHWsuACMvkvvJmNzZGGLu1eOW3og=="],
 
-    "@qvac/llm-llamacpp": ["@qvac/llm-llamacpp@0.16.0", "", { "dependencies": { "@qvac/infer-base": "^0.4.0", "@qvac/logging": "^0.1.0", "bare-fs": "^4.5.1", "bare-path": "^3.0.0", "bare-process": "^4.2.2" } }, "sha512-BJTEVnzsx1uhUpyPp3nTOLDBk3LQZQN1D2xfY4Hi0o6WTGbIoS/YGfxNG57rAVt9njzk5RI1QH8EDUhbdke3AA=="],
+    "@qvac/llm-llamacpp": ["@qvac/llm-llamacpp@0.17.0", "", { "dependencies": { "@qvac/infer-base": "^0.4.0", "@qvac/logging": "^0.1.0", "bare-fs": "^4.5.1", "bare-path": "^3.0.0", "bare-process": "^4.2.2" } }, "sha512-RGUByLxLGXGfHilJYpksOMELMBPg5HBEG0vhX0Exc1oNvJpJ2zR+jOyGYW1dbNOBoyADmIM+Ss/7KniLKNmAeQ=="],
 
     "@qvac/logging": ["@qvac/logging@0.1.0", "", {}, "sha512-B9JayZKJGzSsM/9JmMdO7wiOOZ2mY5aWTbXl2aIKzy+l2Uqzkoby0IxMjKSVtYo6uMDs2zcrZqLtjI2dDSaYog=="],
 
diff --git a/packages/sdk/examples/llamacpp-dynamic-tools.ts b/packages/sdk/examples/llamacpp-dynamic-tools.ts
new file mode 100644
index 0000000000..3db1a1b236
--- /dev/null
+++ b/packages/sdk/examples/llamacpp-dynamic-tools.ts
@@ -0,0 +1,343 @@
+import { z } from "zod";
+import {
+  completion,
+  loadModel,
+  unloadModel,
+  type ToolInput,
+  type ToolCall,
+  type CompletionStats,
+  type CompletionParams,
+  QWEN3_1_7B_INST_Q4,
+} from "@qvac/sdk";
+
+// Define Zod schemas for tool parameters
+const weatherSchema = z.object({
+  city: z.string().describe("City name"),
+});
+
+const horoscopeSchema = z.object({
+  sign: z.string().describe("An astrological sign like Taurus or Aquarius"),
+});
+
+// Map tool names to their schemas for runtime validation
+const toolSchemas = {
+  get_weather: weatherSchema,
+  get_horoscope: horoscopeSchema,
+};
+
+// Simple tool definitions - just name, description, and Zod schema!
+const tools1 = [
+  {
+    name: "get_weather",
+    description: "Get current weather for a city",
+    parameters: weatherSchema,
+  },
+];
+
+const tools2 = [
+  {
+    name: "get_horoscope",
+    description: "Get today's horoscope for an astrological sign",
+    parameters: horoscopeSchema,
+  },
+];
+
+const tools3 = [
+  {
+    name: "get_date",
+    description: "Get today's Date",
+    parameters: z.object(),
+  },
+];
+
+type ChatSesssionParam = CompletionParams & {
+  tools: ToolInput[]
+}
+async function chatSession ({ modelId, history, tools, kvCache }: ChatSesssionParam) {
+  const result = completion({ modelId, history, kvCache, stream: true, tools });
+
+  // Consume token stream
+  const tokensTask = (async () => {
+    for await (const token of result.tokenStream) {
+      process.stdout.write(token);
+    }
+  })();
+
+  // Consume tool call events
+  const toolsTask = (async () => {
+    for await (const evt of result.toolCallStream) {
+      if (evt.type === "toolCall") {
+        console.log(
+          `\n\n→ Tool Call Detected: ${evt.call.name}(${JSON.stringify(evt.call.arguments)})`,
+        );
+        console.log(`   ID: ${evt.call.id}`);
+      } else if (evt.type === "toolError") {
+        console.warn(`\n⚠️  Tool Error: ${evt.error.message}`);
+        console.warn(`   Code: ${evt.error.code}`);
+      }
+    }
+  })();
+
+  await Promise.all([tokensTask, toolsTask]);
+
+  const stats: CompletionStats | undefined = await result.stats;
+  const toolCalls: ToolCall[] = await result.toolCalls;
+
+  if (toolCalls.length > 0) {
+    console.log("\n\n📋 Parsed Tool Calls:");
+    for (const call of toolCalls) {
+      console.log(`  - ${call.name}(${JSON.stringify(call.arguments)})`);
+
+      const schema = toolSchemas[call.name as keyof typeof toolSchemas];
+      if (schema) {
+        const validated = schema.safeParse(call.arguments);
+        if (validated.success) {
+          console.log(`    ✓ Arguments validated with Zod`);
+        } else {
+          console.log(`    ✗ Validation failed:`, validated.error);
+        }
+      }
+    }
+  } else {
+    history.push({
+      role: "assistant",
+      content: await result.text,
+    });
+    console.log("\n📊 <NO TOOL CALLS FOUND> Performance Stats:", stats);
+    return;
+  }
+
+  console.log("\n📊 <WITH TOOLS> Performance Stats:", stats);
+
+  // Execute tool calls and send results back to the model
+  if (toolCalls.length > 0) {
+    console.log("\n\n🔧 Simulating Tool Execution...");
+
+    // Simulate tool execution (in a real app, you'd call actual APIs)
+    const toolResults = toolCalls.map((call) => {
+      let result = "";
+      if (call.name === "get_weather") {
+        const args = call.arguments as { city: string; country?: string };
+        result = `The weather in ${args.city} is rainy, 08°C with heavy clouds.`;
+      } else if (call.name === "get_horoscope") {
+        const args = call.arguments as { sign: string };
+        result = `Horoscope for ${args.sign}: Today is a great day for new beginnings and creative endeavors!`;
+      }
+      console.log(`  ✓ ${call.name}: ${result}`);
+      return { toolCallId: call.id, result };
+    });
+
+    // Add tool results to conversation history
+    history.push({
+      role: "assistant",
+      content: await result.text,
+    });
+
+    // Add tool results as tool messages
+    for (const toolResult of toolResults) {
+      history.push({
+        role: "tool",
+        content: toolResult.result,
+      });
+    }
+  }
+
+  // Send follow-up question with tool results
+  console.log("\n\n🤖 Follow-up Response with Tool Results:");
+  const followUpResult = completion({
+    modelId,
+    history,
+    stream: true,
+    kvCache,
+    tools,
+  });
+
+  for await (const token of followUpResult.tokenStream) {
+    process.stdout.write(token);
+  }
+
+  history.push({
+    role: "assistant",
+    content: await followUpResult.text,
+  });
+
+  const followUpStats = await followUpResult.stats;
+  console.log("\n\n📊 Follow-up Stats:", followUpStats);
+}
+
+type ToolInvocationParam = Pick<CompletionParams, 'kvCache'> & {
+  toolVariants: [ToolInput[], ToolInput[], ToolInput[]]
+}
+export async function runToolInvocationTest({ kvCache, toolVariants }: ToolInvocationParam) {
+  try {
+    // Load model from provided file path with tools support enabled
+    const modelId = await loadModel({
+      modelSrc: QWEN3_1_7B_INST_Q4,
+      modelType: "llm",
+      modelConfig: {
+        ctx_size: 4096,
+        tools: true, // Enable tools support
+        toolsMode: 'dynamic',
+      },
+      onProgress: (progress) =>
+        console.log(`Loading: ${progress.percentage.toFixed(1)}%`),
+    });
+    console.log(`✅ Model loaded successfully! Model ID: ${modelId}`);
+
+    // Create conversation history
+    const history = [
+      {
+        role: "system",
+        content:"You are a helpful assistant that can use tools. User's personal info: cat name is Windy and dog is Butch",
+      },
+      {
+        role: "user",
+        content: "What's the weather in Tokyo?",
+      },
+    ];
+
+    console.log("\n🤖 AI Response:");
+    console.log("(Streaming with tool definitions in prompt)\n");
+
+    await chatSession({ modelId, history, tools: toolVariants[0], kvCache })
+
+    history.push({
+      role: "user",
+      content: "What is my cat name?",
+    })
+
+    console.log("\n🤖 AI Response:");
+    await chatSession({ modelId, history, tools: toolVariants[0], kvCache })
+
+    history.push({
+      role: "user",
+      content: "What's my dog name?",
+    })
+
+    console.log("\n🤖 AI Response:");
+    await chatSession({ modelId, history, tools: toolVariants[0], kvCache })
+
+    history.push({
+      role: "user",
+      content: "What is the weather in Tokyo?",
+    })
+    console.log("\n🤖 AI Response:");
+    await chatSession({ modelId, history, tools: toolVariants[2], kvCache })
+
+    history.push({
+      role: "user",
+      content: "only in case the weather in Tokyo is rainy, check my horoscope for Aquarius; if the weather is good - check Taurus; need only one horoscope depending on the whether",
+    })
+
+    console.log("\n🤖 AI Response:");
+    console.log("(Streaming with tool definitions in prompt)\n");
+
+    await chatSession({ modelId, history, tools: toolVariants[1], kvCache })
+
+    console.log("\n\n🎉 Completed!");
+    await unloadModel({ modelId, clearStorage: false });
+  } catch (error) {
+    console.error("❌ Error:", error);
+    process.exit(1);
+  }
+}
+
+export async function runToolInvocationContTest({ kvCache, toolVariants }: ToolInvocationParam) {
+  try {
+    // Load model from provided file path with tools support enabled
+    const modelId = await loadModel({
+      modelSrc: QWEN3_1_7B_INST_Q4,
+      modelType: "llm",
+      modelConfig: {
+        ctx_size: 4096,
+        tools: true, // Enable tools support
+        toolsMode: 'dynamic',
+      },
+      onProgress: (progress) =>
+        console.log(`Loading: ${progress.percentage.toFixed(1)}%`),
+    });
+    console.log(`✅ Model loaded successfully! Model ID: ${modelId}`);
+
+    // Create conversation history
+    const history = [
+      {
+        role: "system",
+        content:"You are a helpful assistant that can use tools. User's cat name is Windy and dog is Butch",
+      },
+      {
+        role: "user",
+        content: "What's the weather in Tokyo?",
+      },
+      {
+        role: "assistant",
+        content: `
+<think>
+Okay, the user is asking about the weather in Tokyo. I need to check if there's a function available to get the weather. Looking at the tools provided, there's a function called get_weather that takes a city name as a parameter. The city here is Tokyo. So I should call that function with the city parameter set to "Tokyo". Let me make sure I format the JSON correctly within the tool_call tags.
+</think>
+<tool_call>
+{"name": "get_weather", "arguments": {"city": "Tokyo"}}
+</tool_call>"
+`,
+      },
+      {
+        role: "tool",
+        content: "The weather in Tokyo is rainy, 08°C with heavy clouds.",
+      },
+      /*
+      {
+        role: "assistant",
+        content: `
+<think>
+Okay, the user asked about the weather in Tokyo. I used the get_weather function and found out it's rainy with 08°C and heavy clouds. Now I need to respond appropriately. Let me check the details again. The function response says rainy, 08°C, heavy clouds. I should present this info clearly. Maybe mention the current conditions and temperature. Make sure it's friendly and helpful. Alright, the response should be something like informing the user about the weather and offering further help if needed.
+</think>
+The weather in Tokyo is currently rainy with clouds and a temperature of 08°C. Let me know if you need further weather updates or assistance! 🌧️
+`,
+      },
+      {
+        role: "user",
+        content: "What is my cat name?",
+      },
+      */
+    ];
+
+    /*
+    console.log("\n🤖 AI Response:");
+    await chatSession({ modelId, history, tools: toolVariants[0], kvCache })
+    */
+
+    history.push({
+      role: "user",
+      content: "What's my dog name?",
+    })
+
+    console.log("\n🤖 AI Response:");
+    await chatSession({ modelId, history, tools: toolVariants[0], kvCache })
+
+    history.push({
+      role: "user",
+      content: "What is the weather in Tokyo?",
+    })
+    console.log("\n🤖 AI Response:");
+    await chatSession({ modelId, history, tools: toolVariants[2], kvCache })
+
+    history.push({
+      role: "user",
+      content: "only in case the weather in Tokyo is rainy, check my horoscope for Aquarius; if the weather is good - check Taurus; need only one horoscope depending on the whether",
+    })
+
+    console.log("\n🤖 AI Response:");
+    console.log("(Streaming with tool definitions in prompt)\n");
+
+    await chatSession({ modelId, history, tools: toolVariants[1], kvCache })
+
+    console.log("\n\n🎉 Completed!");
+    await unloadModel({ modelId, clearStorage: false });
+  } catch (error) {
+    console.error("❌ Error:", error);
+    process.exit(1);
+  }
+}
+// using same kvCache for a single session
+// await runToolInvocationTest({ kvCache: false, toolVariants: [tools1, tools2] })
+await runToolInvocationTest({ kvCache: `id-${Date.now()}`, toolVariants: [tools1, tools2, tools3] })
+// await runToolInvocationContTest({ kvCache: `id-${Date.now()}`, toolVariants: [tools1, tools2, tools3] })
diff --git a/packages/sdk/examples/llamacpp-native-tools.ts b/packages/sdk/examples/llamacpp-native-tools.ts
index 6e516fb768..004996cc83 100644
--- a/packages/sdk/examples/llamacpp-native-tools.ts
+++ b/packages/sdk/examples/llamacpp-native-tools.ts
@@ -80,10 +80,15 @@ try {
   // Consume tool call events
   const toolsTask = (async () => {
     for await (const evt of result.toolCallStream) {
-      console.log(
-        `\n\n→ Tool Call Detected: ${evt.call.name}(${JSON.stringify(evt.call.arguments)})`,
-      );
-      console.log(`   ID: ${evt.call.id}`);
+      if (evt.type === "toolCall") {
+        console.log(
+          `\n\n→ Tool Call Detected: ${evt.call.name}(${JSON.stringify(evt.call.arguments)})`,
+        );
+        console.log(`   ID: ${evt.call.id}`);
+      } else if (evt.type === "toolError") {
+        console.warn(`\n⚠️  Tool Error: ${evt.error.message}`);
+        console.warn(`   Code: ${evt.error.code}`);
+      }
     }
   })();
 
@@ -122,7 +127,7 @@ try {
       let result = "";
       if (call.name === "get_weather") {
         const args = call.arguments as { city: string; country?: string };
-        result = `The weather in ${args.city} is sunny, 22°C with light clouds.`;
+        result = `The weather in ${args.city} is rainy, 02°C with heavy clouds.`;
       } else if (call.name === "get_horoscope") {
         const args = call.arguments as { sign: string };
         result = `Horoscope for ${args.sign}: Today is a great day for new beginnings and creative endeavors!`;
diff --git a/packages/sdk/index.ts b/packages/sdk/index.ts
index 2bbb88d2f4..f4740bfcc7 100644
--- a/packages/sdk/index.ts
+++ b/packages/sdk/index.ts
@@ -124,6 +124,7 @@ export {
   type FinetuneProgress,
   type FinetuneStats,
   type FinetuneResult,
+  ToolsModeType,
 } from "./schemas";
 
 export { type ToolInput, type ToolHandler } from "./utils/tool-helpers";
diff --git a/packages/sdk/package.json b/packages/sdk/package.json
index 147552a2d2..b03adb3bbf 100644
--- a/packages/sdk/package.json
+++ b/packages/sdk/package.json
@@ -177,7 +177,7 @@
     "@qvac/embed-llamacpp": "^0.14.0",
     "@qvac/error": "^0.1.1",
     "@qvac/langdetect-text": "^0.1.2",
-    "@qvac/llm-llamacpp": "^0.16.0",
+    "@qvac/llm-llamacpp": "^0.17.0",
     "@qvac/logging": "^0.1.0",
     "@qvac/ocr-onnx": "^0.4.2",
     "@qvac/rag": "^0.4.4",
diff --git a/packages/sdk/schemas/completion-event.ts b/packages/sdk/schemas/completion-event.ts
index 2f59a5c151..8a0c5b5ff1 100644
--- a/packages/sdk/schemas/completion-event.ts
+++ b/packages/sdk/schemas/completion-event.ts
@@ -11,6 +11,8 @@ export const completionStatsSchema = z.object({
   tokensPerSecond: z.number().optional(),
   cacheTokens: z.number().optional(),
   backendDevice: z.enum(["cpu", "gpu"]).optional(),
+  promptTokens: z.number().optional(),
+  generatedTokens: z.number().optional(),
 });
 
 export type CompletionStats = z.infer<typeof completionStatsSchema>;
@@ -138,7 +140,7 @@ export type CompletionRun = {
   final: Promise<CompletionFinal>;
 
   tokenStream: AsyncGenerator<string>;
-  toolCallStream: AsyncGenerator<ToolCallEvent>;
+  toolCallStream: AsyncGenerator<ToolCallEvent | ToolErrorEvent>;
   text: Promise<string>;
   toolCalls: Promise<ToolCallWithCall[]>;
   stats: Promise<CompletionStats | undefined>;
diff --git a/packages/sdk/schemas/index.ts b/packages/sdk/schemas/index.ts
index 6acb6cccaa..d61f589f7a 100644
--- a/packages/sdk/schemas/index.ts
+++ b/packages/sdk/schemas/index.ts
@@ -7,6 +7,7 @@ export {
   toolSchema,
   toolCallSchema,
   toolCallErrorSchema,
+  ToolsModeType,
   type Tool,
   type ToolCall,
   type ToolCallError,
diff --git a/packages/sdk/schemas/llamacpp-config.ts b/packages/sdk/schemas/llamacpp-config.ts
index 89e65a99dc..6a6f167f3f 100644
--- a/packages/sdk/schemas/llamacpp-config.ts
+++ b/packages/sdk/schemas/llamacpp-config.ts
@@ -1,5 +1,6 @@
 import { z } from "zod";
 import { modelSrcInputSchema } from "./model-src-utils";
+import { ToolsModeType } from "./tools";
 
 export const VERBOSITY = {
   ERROR: 0,
@@ -41,6 +42,9 @@ export const llmConfigBaseSchema = z.object({
   stop_sequences: z.array(z.string()).optional(),
   n_discarded: z.number().optional(),
   tools: z.boolean().optional(),
+  toolsMode: z.enum([ToolsModeType.static, ToolsModeType.dynamic])
+    .describe('"static" is a default way where tools belong to a chat session; "dynamic" mode allows to provide specific tools for each user prompt')
+    .optional(),
   "cache-type-k": z.string().optional(),
   "cache-type-v": z.string().optional(),
   /**
@@ -59,6 +63,7 @@ export const LLM_CONFIG_DEFAULTS = {
   gpu_layers: 99,
   device: "gpu",
   system_prompt: "You are a helpful assistant.",
+  toolsMode: ToolsModeType.static,
 } as const satisfies Partial<LlmConfigInput>;
 
 // Full schema - applies defaults via transform (no duplication)
diff --git a/packages/sdk/schemas/tools.ts b/packages/sdk/schemas/tools.ts
index 8b028f3018..0fe515627e 100644
--- a/packages/sdk/schemas/tools.ts
+++ b/packages/sdk/schemas/tools.ts
@@ -1,5 +1,15 @@
 import { z } from "zod";
 
+/**
+ * "static" tools is when a set of tools is provided to the whole chat session,
+ * in "dynamic" mode tools are attached with each message,
+ * so in a long conversation next message can have it's own tools
+ */
+export const ToolsModeType = {
+  static: "static",
+  dynamic: "dynamic",
+} as const;
+
 export const toolSchema = z.object({
   type: z.literal("function"),
   name: z.string(),
diff --git a/packages/sdk/server/bare/plugins/llamacpp-completion/ops/completion-stream.ts b/packages/sdk/server/bare/plugins/llamacpp-completion/ops/completion-stream.ts
index 304eb2069e..eacd5d72d0 100644
--- a/packages/sdk/server/bare/plugins/llamacpp-completion/ops/completion-stream.ts
+++ b/packages/sdk/server/bare/plugins/llamacpp-completion/ops/completion-stream.ts
@@ -6,7 +6,7 @@ import type {
   Tool,
   ToolCall,
 } from "@/schemas";
-import type { ToolCallEvent } from "@/schemas/tools";
+import { type ToolCallEvent, ToolsModeType } from "@/schemas/tools";
 import {
   logCacheDisabled,
   logCacheInit,
@@ -231,31 +231,84 @@ async function initSystemPromptCache(
   await primeResponse.await();
 }
 
+type HistoryMsg = {
+  role: string;
+  content: string;
+  attachments?: { path: string }[] | undefined;
+}
+
+/**
+ * Dynamic tools (anchored) cache contract:
+ *
+ * Round 1 (cache miss): send full history + tools.
+ *   The addon template anchors tools after the last user message.
+ *   After generation, tools stay in cache if model called a tool.
+ *
+ * Round 2+ (chain continuation, last msg is role "tool"):
+ *   Send ONLY the new tool response(s). The assistant message and
+ *   tools are already in the KV cache from previous rounds.
+ *   Tools are NOT re-sent — the addon preserves them at the anchor.
+ *
+ * New turn (last msg is role "user"):
+ *   Send assistant + user messages + tools. A new anchor is created.
+ *
+ * The app must keep <think> blocks and raw <tool_call> XML in
+ * assistant content during the chain (see agentic-tools.ts for the
+ * full contract).
+ */
 function prepareMessagesForCache(
   cachePathToUse: string,
   cacheExists: boolean,
-  history: {
-    role: string;
-    content: string;
-    attachments?: { path: string }[] | undefined;
-  }[],
+  history: HistoryMsg[],
+  tools?: Tool[],
+  toolsMode?: string
 ): ChatHistory[] {
+  const addTools = tools?.length ? transformMessages(tools) : [];
   if (cacheExists && history.length > 0) {
     const savedCount = cachedMessageCounts.get(cachePathToUse) ?? 0;
-    const canSlice = savedCount > 0 && savedCount <= history.length;
-    const newMessages = canSlice
-      ? history.slice(savedCount)
-      : history.filter((msg) => msg.role !== "system");
-
-    if (!canSlice && savedCount > 0) {
+    if (savedCount > 0 && savedCount <= history.length) {
       cachedMessageCounts.delete(cachePathToUse);
     }
 
-    return transformMessages(newMessages);
+    const lastMsg = history[history.length - 1] as HistoryMsg;
+    const isToolChainContinuation = lastMsg.role === 'tool';
+    const isUserPrompt = lastMsg.role === 'user';
+    let lastMessages: HistoryMsg[];
+    if (isToolChainContinuation) {
+      // Collect all consecutive tool responses from the end of history.
+      lastMessages = [];
+      for (let i = history.length - 1; i >= 0; i--) {
+        if ((history[i] as HistoryMsg).role === 'tool') {
+          lastMessages.unshift(history[i] as HistoryMsg);
+        } else {
+          break;
+        }
+      }
+    } else if (isUserPrompt && toolsMode === ToolsModeType.dynamic) {
+      const prevMsg = history[history.length - 2];
+      if (prevMsg?.role === 'assistant') {
+        lastMessages = [prevMsg, lastMsg];
+      } else {
+        lastMessages = [lastMsg];
+      }
+    } else {
+      lastMessages = [lastMsg];
+    }
+    const lastTransformedMessages = transformMessages(lastMessages);
+    return [
+      ...lastTransformedMessages,
+      ...(isToolChainContinuation ? [] : addTools),
+    ];
   }
 
   const historyWithoutSystem = history.filter((msg) => msg.role !== "system");
-  return transformMessages(historyWithoutSystem);
+  const transformedHistoryWithoutSystem =
+    transformMessages(historyWithoutSystem);
+
+  return [
+    ...transformedHistoryWithoutSystem,
+    ...addTools,
+  ];
 }
 
 type CacheRunOptions = Pick<RunOptions, "cacheKey" | "saveCacheToDisk">;
@@ -364,6 +417,7 @@ export async function* completion(
 
   const modelConfig = getModelConfig(modelId);
   const toolsEnabled = (modelConfig as { tools?: boolean }).tools === true;
+  const toolsMode = (modelConfig as { toolsMode?: string }).toolsMode;
 
   let historyWithTools: Array<
     | {
@@ -375,7 +429,11 @@ export async function* completion(
   > = history;
 
   if (tools && tools.length > 0 && toolsEnabled) {
-    historyWithTools = insertToolsIntoHistory(history, tools);
+    historyWithTools = insertToolsIntoHistory({
+      history,
+      tools,
+      append: toolsMode === ToolsModeType.dynamic,
+    });
     setupToolGrammar(modelConfig as Record<string, unknown>, tools);
   }
 
@@ -385,7 +443,13 @@ export async function* completion(
   if (kvCache) {
     const modelConfig = getModelConfig(modelId);
     const systemPromptFromHistory = extractSystemPrompt(history);
-    const configHash = generateConfigHash(systemPromptFromHistory, tools);
+    const toolsModeForHash = (modelConfig as { toolsMode?: string }).toolsMode;
+    const systemTools = !!(toolsMode !== ToolsModeType.dynamic && tools?.length && toolsEnabled);
+    const dynamicTools = !!(toolsMode === ToolsModeType.dynamic && tools?.length && toolsEnabled);
+    const configHash = generateConfigHash(
+      systemPromptFromHistory,
+      toolsModeForHash !== ToolsModeType.dynamic ? tools : undefined,
+    );
 
     const systemPromptToUse =
       systemPromptFromHistory ||
@@ -396,7 +460,7 @@ export async function* completion(
 
     if (typeof kvCache === "string") {
       cachePathToUse = await getCacheFilePath(modelId, configHash, kvCache);
-      let cacheExists = await customCacheExists(modelId, configHash, kvCache);
+      const cacheExists = await customCacheExists(modelId, configHash, kvCache);
       logCacheStatus(kvCache, cacheExists);
 
       if (!cacheExists) {
@@ -405,16 +469,17 @@ export async function* completion(
           cachePathToUse,
           systemPromptToUse,
           kvCache,
-          tools && toolsEnabled ? tools : undefined,
+          systemTools ? tools : undefined,
         );
         markCacheInitialized(modelId, configHash, kvCache);
-        cacheExists = true;
       }
 
       const messagesToSend = prepareMessagesForCache(
         cachePathToUse,
         cacheExists,
         history,
+        dynamicTools ? tools : undefined,
+        toolsMode,
       );
       logMessagesToAddon(messagesToSend, "PROMPT_SEND");
 
@@ -460,7 +525,7 @@ export async function* completion(
           cachePathToUse,
           systemPromptToUse,
           "auto",
-          tools && toolsEnabled ? tools : undefined,
+          systemTools ? tools : undefined,
         );
         markCacheInitialized(modelId, configHash, preResponseCacheInfo.cacheKey);
         cacheExists = true;
@@ -470,6 +535,8 @@ export async function* completion(
         cachePathToUse,
         cacheExists,
         history,
+        dynamicTools ? tools : undefined,
+        toolsMode,
       );
       logMessagesToAddon(messagesToSend, "PROMPT_SEND");
 
diff --git a/packages/sdk/server/bare/plugins/llamacpp-completion/plugin.ts b/packages/sdk/server/bare/plugins/llamacpp-completion/plugin.ts
index 6cf51143fd..704e0e0c0d 100644
--- a/packages/sdk/server/bare/plugins/llamacpp-completion/plugin.ts
+++ b/packages/sdk/server/bare/plugins/llamacpp-completion/plugin.ts
@@ -12,6 +12,7 @@ import {
   ModelType,
   llmConfigBaseSchema,
   ADDON_LLM,
+  ToolsModeType,
   type CompletionEvent,
   type CreateModelParams,
   type PluginCapabilities,
@@ -58,6 +59,12 @@ function transformLlmConfig(llmConfig: LlmConfig) {
     delete transformed["opencl_cache_dir"];
   }
 
+  if ("tools_mode" in transformed) {
+    transformed["tools_compact"] =
+      transformed["tools_mode"] === ToolsModeType.dynamic ? "true" : "false";
+    delete transformed["tools_mode"];
+  }
+
   return transformed;
 }
 
diff --git a/packages/sdk/server/utils/tool-integration.ts b/packages/sdk/server/utils/tool-integration.ts
index 6dff8e7fe5..edd4f445b6 100644
--- a/packages/sdk/server/utils/tool-integration.ts
+++ b/packages/sdk/server/utils/tool-integration.ts
@@ -11,10 +11,14 @@ interface HistoryMessage {
   attachments?: { path: string }[] | undefined;
 }
 
-export function insertToolsIntoHistory(
+export function insertToolsIntoHistory({ history, tools, append = false }: {
   history: HistoryMessage[],
   tools: Tool[],
-): Array<HistoryMessage | Tool> {
+  append?: boolean,
+}): Array<HistoryMessage | Tool> {
+  if (append) {
+    return [...history, ...tools];
+  }
   const systemMsgIndex = history.findIndex((msg) => msg.role === "system");
 
   if (systemMsgIndex >= 0) {
diff --git a/packages/sdk/tests-qvac/tests/desktop/consumer.ts b/packages/sdk/tests-qvac/tests/desktop/consumer.ts
index 3aa5068019..41b2bd3dad 100644
--- a/packages/sdk/tests-qvac/tests/desktop/consumer.ts
+++ b/packages/sdk/tests-qvac/tests/desktop/consumer.ts
@@ -120,6 +120,12 @@ resources.define("tools", {
   config: { ctx_size: 4096, tools: true },
 });
 
+resources.define("tools-dynamic", {
+  constant: QWEN3_1_7B_INST_Q4,
+  type: "llm",
+  config: { ctx_size: 4096, tools: true, toolsMode: 'dynamic' },
+});
+
 resources.define("ocr", {
   constant: OCR_LATIN_RECOGNIZER_1,
   type: "ocr",
diff --git a/packages/sdk/tests-qvac/tests/mobile/consumer.ts b/packages/sdk/tests-qvac/tests/mobile/consumer.ts
index 016a93adc1..9542f9287d 100644
--- a/packages/sdk/tests-qvac/tests/mobile/consumer.ts
+++ b/packages/sdk/tests-qvac/tests/mobile/consumer.ts
@@ -111,6 +111,12 @@ resources.define("tools", {
   config: { ctx_size: 4096, tools: true },
 });
 
+resources.define("tools-dynamic", {
+  constant: QWEN3_1_7B_INST_Q4,
+  type: "llm",
+  config: { ctx_size: 4096, tools: true, toolsMode: 'dynamic' },
+});
+
 resources.define("ocr", {
   constant: OCR_LATIN_RECOGNIZER_1,
   type: "ocr",
diff --git a/packages/sdk/tests-qvac/tests/shared/executors/tools-executor.ts b/packages/sdk/tests-qvac/tests/shared/executors/tools-executor.ts
index 59855f8138..2c80288f49 100644
--- a/packages/sdk/tests-qvac/tests/shared/executors/tools-executor.ts
+++ b/packages/sdk/tests-qvac/tests/shared/executors/tools-executor.ts
@@ -1,11 +1,11 @@
-import { completion } from "@qvac/sdk";
+import { completion, ToolsModeType } from "@qvac/sdk";
 import {
   ValidationHelpers,
   type TestResult,
   type Expectation,
 } from "@tetherto/qvac-test-suite";
 import { AbstractModelExecutor } from "./abstract-model-executor.js";
-import { toolsTests } from "../../tools-tests.js";
+import { toolsTests, type ToolsExpectation } from "../../tools-tests.js";
 
 export class ToolsExecutor extends AbstractModelExecutor<typeof toolsTests> {
   pattern = /^tools-/;
@@ -14,7 +14,7 @@ export class ToolsExecutor extends AbstractModelExecutor<typeof toolsTests> {
     toolsTests.map((test) => [test.testId, this.generic.bind(this)]),
   ) as never;
 
-  async generic(params: unknown, expectation: unknown): Promise<TestResult> {
+  async generic(params: unknown, expectation: ToolsExpectation): Promise<TestResult> {
     const p = params as {
       history: Array<{ role: string; content: string }>;
       tools: Array<{
@@ -23,9 +23,11 @@ export class ToolsExecutor extends AbstractModelExecutor<typeof toolsTests> {
         description: string;
         parameters: Record<string, unknown>;
       }>;
+      toolsMode: string,
       stream?: boolean;
     };
-    const toolsModelId = await this.resources.ensureLoaded("tools");
+    const resourceDep = p.toolsMode  === ToolsModeType.dynamic ? "tools-dynamic" : "tools"
+    const toolsModelId = await this.resources.ensureLoaded(resourceDep);
 
     try {
       const result = completion({
@@ -38,11 +40,27 @@ export class ToolsExecutor extends AbstractModelExecutor<typeof toolsTests> {
       const text = await result.text;
       const toolCalls = result.toolCalls ? await result.toolCalls : undefined;
 
-      const resultData =
-        text ||
-        (toolCalls && toolCalls.length > 0 ? "tool call made" : "no response");
+      if (expectation.validation === "type") {
+        const resultData =
+          text ||
+          (toolCalls && toolCalls.length > 0 ? "tool call made" : "no response");
+        return ValidationHelpers.validate(resultData, expectation as Expectation);
+      }
 
-      return ValidationHelpers.validate(resultData, expectation as Expectation);
+      if (expectation.validation === "custom") {
+        const passed = expectation.validator({ toolCalls, text });
+        return {
+          passed,
+          output: passed
+            ? "custom validator passed"
+            : "custom validator returned false",
+        };
+      }
+
+      return {
+        passed: false,
+        output: `Unhandled validation type: ${(expectation as { validation: string }).validation}`,
+      };
     } catch (error) {
       const errorMsg = error instanceof Error ? error.message : String(error);
       return { passed: false, output: `Tools test failed: ${errorMsg}` };
diff --git a/packages/sdk/tests-qvac/tests/tools-tests.ts b/packages/sdk/tests-qvac/tests/tools-tests.ts
index c669414b49..d08c4b7ef1 100644
--- a/packages/sdk/tests-qvac/tests/tools-tests.ts
+++ b/packages/sdk/tests-qvac/tests/tools-tests.ts
@@ -1,5 +1,29 @@
 // Tools/Function calling test definitions
-import type { TestDefinition } from "@tetherto/qvac-test-suite";
+import type { Expectation, TestDefinition } from "@tetherto/qvac-test-suite";
+
+type ToolsResult = { toolCalls?: Array<{ name: string }>; text?: string };
+
+export type ToolsExpectation =
+  | Expectation
+  | { validation: "custom"; validator: (result: ToolsResult) => boolean };
+
+const toolsCalledValidator = (result: ToolsResult) => {
+  return Boolean(result.toolCalls && result.toolCalls.length > 0);
+};
+
+const toolsCalledExpectation: ToolsExpectation = {
+  validation: "custom",
+  validator: toolsCalledValidator,
+};
+
+const toolsNotCalledValidatorEqual = (numCalls: number) => (result: ToolsResult) => {
+  return Boolean(result.toolCalls && result.toolCalls.length === numCalls);
+};
+
+const toolsNotCalledExpectation: ToolsExpectation = {
+  validation: "custom",
+  validator: toolsNotCalledValidatorEqual(0),
+};
 
 // Helper for creating tools tests
 const createToolsTest = (
@@ -15,10 +39,8 @@ const createToolsTest = (
       required?: string[];
     };
   }>,
-  expectation: {
-    validation: "type";
-    expectedType: "string" | "number" | "array";
-  } = {
+  toolsMode?: "static" | "dynamic",
+  expectation: ToolsExpectation = {
     validation: "type",
     expectedType: "string",
   },
@@ -29,8 +51,9 @@ const createToolsTest = (
     history: [{ role: "user", content: userPrompt }],
     tools,
     stream: false,
+    ...(toolsMode && { toolsMode }),
   },
-  expectation,
+  expectation: expectation as Expectation,
   ...(suites && { suites }),
   metadata: {
     category: "tools",
@@ -69,6 +92,7 @@ export const toolsSimpleFunction = createToolsTest(
     },
   ],
   undefined,
+  undefined,
   ["smoke"],
 );
 
@@ -103,6 +127,7 @@ export const toolsMultipleFunctions = createToolsTest(
     },
   ],
   undefined,
+  undefined,
   ["smoke"],
 );
 
@@ -185,8 +210,192 @@ const additionalToolsTests: TestDefinition[] = toolsTestIds.map((testId) => ({
   },
 }));
 
+// Test basic tool call with different possible tools modes and unset
+export const toolsModeStatic = createToolsTest(
+  "tools-mode-static",
+  "What's 25 degrees Celsius in Fahrenheit?",
+  [
+    {
+      type: "function",
+      name: "convert_temperature",
+      description: "Convert temperature between Celsius and Fahrenheit",
+      parameters: {
+        type: "object",
+        properties: {
+          value: { type: "number", description: "Temperature value" },
+          from_unit: {
+            type: "string",
+            enum: ["celsius", "fahrenheit"],
+            description: "Source unit",
+          },
+          to_unit: {
+            type: "string",
+            enum: ["celsius", "fahrenheit"],
+            description: "Target unit",
+          },
+        },
+        required: ["value", "from_unit", "to_unit"],
+      },
+    },
+  ],
+  "static",
+  toolsCalledExpectation,
+);
+
+export const toolsModeDynamic = createToolsTest(
+  "tools-mode-dynamic",
+  "What's 25 degrees Celsius in Fahrenheit?",
+  [
+    {
+      type: "function",
+      name: "convert_temperature",
+      description: "Convert temperature between Celsius and Fahrenheit",
+      parameters: {
+        type: "object",
+        properties: {
+          value: { type: "number", description: "Temperature value" },
+          from_unit: {
+            type: "string",
+            enum: ["celsius", "fahrenheit"],
+            description: "Source unit",
+          },
+          to_unit: {
+            type: "string",
+            enum: ["celsius", "fahrenheit"],
+            description: "Target unit",
+          },
+        },
+        required: ["value", "from_unit", "to_unit"],
+      },
+    },
+  ],
+  "dynamic",
+  toolsCalledExpectation,
+);
+
+// Test multiple tools with correct selection
+export const toolsModeMultipleTools = createToolsTest(
+  "tools-mode-multiple-tools",
+  "Get the weather for London and calculate the time difference with New York",
+  [
+    {
+      type: "function",
+      name: "get_weather",
+      description: "Get current weather for a location",
+      parameters: {
+        type: "object",
+        properties: {
+          location: { type: "string", description: "City name" },
+        },
+        required: ["location"],
+      },
+    },
+    {
+      type: "function",
+      name: "get_time_difference",
+      description: "Calculate time difference between two cities",
+      parameters: {
+        type: "object",
+        properties: {
+          city1: { type: "string" },
+          city2: { type: "string" },
+        },
+        required: ["city1", "city2"],
+      },
+    },
+  ],
+  "dynamic",
+  toolsCalledExpectation,
+);
+
+// Test model declines tool use
+export const toolsModeModelDeclines = createToolsTest(
+  "tools-mode-model-declines",
+  "Tell me a joke about programming",
+  [
+    {
+      type: "function",
+      name: "get_weather",
+      description: "Get current weather for a location",
+      parameters: {
+        type: "object",
+        properties: {
+          location: { type: "string", description: "City name" },
+        },
+        required: ["location"],
+      },
+    },
+  ],
+  "dynamic",
+  toolsNotCalledExpectation,
+);
+
+// Test no tools provided (empty array)
+export const toolsModeEmptyArray = createToolsTest(
+  "tools-mode-empty-array",
+  "What's the weather like?",
+  [],
+  "dynamic",
+  toolsNotCalledExpectation,
+);
+
+// Test single tool
+export const toolsModeSingleTool = createToolsTest(
+  "tools-mode-single-tool",
+  "Get the current time in Tokyo",
+  [
+    {
+      type: "function",
+      name: "get_time",
+      description: "Get current time for a location",
+      parameters: {
+        type: "object",
+        properties: {
+          location: { type: "string", description: "City name" },
+        },
+        required: ["location"],
+      },
+    },
+  ],
+  "dynamic",
+  toolsCalledExpectation,
+);
+
+// Test large tool set (10+ tools)
+const largeToolSet = Array.from({ length: 12 }, (_, i) => ({
+  type: "function" as const,
+  name: `tool_function_${i}`,
+  description: `Test function number ${i}`,
+  parameters: {
+    type: "object" as const,
+    properties: {
+      param: { type: "string", description: `Parameter for function ${i}` },
+    },
+    required: ["param"],
+  },
+}));
+
+export const toolsModeLargeToolSet = createToolsTest(
+  "tools-mode-large-tool-set",
+  "Execute tool number 5",
+  largeToolSet,
+  "dynamic",
+  toolsCalledExpectation,
+);
+
+export const dynamicToolsTests = [
+  toolsModeDynamic,
+  toolsModeMultipleTools,
+  toolsModeModelDeclines,
+  toolsModeEmptyArray,
+  toolsModeSingleTool,
+  toolsModeLargeToolSet,
+];
+
 export const toolsTests = [
   toolsSimpleFunction,
   toolsMultipleFunctions,
+  toolsModeStatic,
   ...additionalToolsTests,
+  ...dynamicToolsTests,
 ];