tetherto · mialso · Mar 23, 2026 · Mar 23, 2026 · Mar 24, 2026 · Mar 24, 2026
diff --git a/packages/sdk/README.md b/packages/sdk/README.md
@@ -170,6 +170,7 @@ bun run examples/path/to/example.ts
 - `llama.cpp` with P2P registry: [`examples/llamacpp-p2p.ts`](examples/llamacpp-p2p.ts)
 - `llama.cpp` with HTTP: [`examples/llamacpp-http.ts`](examples/llamacpp-http.ts)
 - `llama.cpp` with tools/function calls: [`examples/llamacpp-native-tools.ts`](examples/llamacpp-native-tools.ts)
+- `llama.cpp` with tools/function calls (dynamic mode): [`examples/llamacpp-dynamic-tools.ts`](examples/llamacpp-dynamic-tools.ts)
 - `llama.cpp` with multimodal inference: [`examples/llamacpp-multimodal.ts`](examples/llamacpp-multimodal.ts)
 - `llama.cpp` with KV cache: [`examples/kv-cache-example.ts`](examples/kv-cache-example.ts)
 

@@ -0,0 +1,208 @@
+import { z } from "zod";
+import {
+  completion,
+  loadModel,
+  unloadModel,
+  type ToolInput,
+  type ToolCall,
+  type CompletionStats,
+  type CompletionParams,
+  QWEN3_1_7B_INST_Q4,
+} from "@qvac/sdk";
+
+// Define Zod schemas for tool parameters
+const weatherSchema = z.object({
+  city: z.string().describe("City name"),
+});
+
+const horoscopeSchema = z.object({
+  sign: z.string().describe("An astrological sign like Taurus or Aquarius"),
+});
+
+// Map tool names to their schemas for runtime validation
+const toolSchemas = {
+  get_weather: weatherSchema,
+  get_horoscope: horoscopeSchema,
+};
+
+// Simple tool definitions - just name, description, and Zod schema!
+const tools1 = [
+  {
+    name: "get_weather",
+    description: "Get current weather for a city",
+    parameters: weatherSchema,
+  },
+];
+
+const tools2 = [
+  {
+    name: "get_horoscope",
+    description: "Get today's horoscope for an astrological sign",
+    parameters: horoscopeSchema,
+  },
+];
+
+type ChatSesssionParam = CompletionParams & {
+  tools: ToolInput[]
+}
+async function chatSession ({ modelId, history, tools, kvCache }: ChatSesssionParam) {
+  const result = completion({ modelId, history, kvCache, stream: true, tools });
+
+  // Consume token stream
+  const tokensTask = (async () => {
+    for await (const token of result.tokenStream) {
+      process.stdout.write(token);
+    }
+  })();
+
+  // Consume tool call events
+  const toolsTask = (async () => {
+    for await (const evt of result.toolCallStream) {
+      if (evt.type === "toolCall") {
+        console.log(
+          `\n\n→ Tool Call Detected: ${evt.call.name}(${JSON.stringify(evt.call.arguments)})`,
+        );
+        console.log(`   ID: ${evt.call.id}`);
+      } else if (evt.type === "toolCallError") {
+        console.warn(`\n⚠️  Tool Error: ${evt.error.message}`);
+        console.warn(`   Code: ${evt.error.code}`);
+      }
+    }
+  })();
+
+  await Promise.all([tokensTask, toolsTask]);
+
+  const stats: CompletionStats | undefined = await result.stats;
+  const toolCalls: ToolCall[] = await result.toolCalls;
+
+  console.log("\n\n📋 Parsed Tool Calls:");
+  if (toolCalls.length > 0) {
+    for (const call of toolCalls) {
+      console.log(`  - ${call.name}(${JSON.stringify(call.arguments)})`);
+
+      const schema = toolSchemas[call.name as keyof typeof toolSchemas];
+      if (schema) {
+        const validated = schema.safeParse(call.arguments);
+        if (validated.success) {
+          console.log(`    ✓ Arguments validated with Zod`);
+        } else {
+          console.log(`    ✗ Validation failed:`, validated.error);
+        }
+      }
+    }
+  } else {
+    console.log("  No tool calls detected in response");
+  }
+
+  console.log("\n📊 Performance Stats:", stats);
+
+  // Execute tool calls and send results back to the model
+  if (toolCalls.length > 0) {
+    console.log("\n\n🔧 Simulating Tool Execution...");
+
+    // Simulate tool execution (in a real app, you'd call actual APIs)
+    const toolResults = toolCalls.map((call) => {
+      let result = "";
+      if (call.name === "get_weather") {
+        const args = call.arguments as { city: string; country?: string };
+        result = `The weather in ${args.city} is sunny, 22°C with light clouds.`;
+      } else if (call.name === "get_horoscope") {
+        const args = call.arguments as { sign: string };
+        result = `Horoscope for ${args.sign}: Today is a great day for new beginnings and creative endeavors!`;
+      }
+      console.log(`  ✓ ${call.name}: ${result}`);
+      return { toolCallId: call.id, result };
+    });
+
+    // Add tool results to conversation history
+    history.push({
+      role: "assistant",
+      content: await result.text,
+    });
+
+    // Add tool results as tool messages
+    for (const toolResult of toolResults) {
+      history.push({
+        role: "tool",
+        content: toolResult.result,
+      });
+    }
+  }
+
+  // Send follow-up question with tool results
+  console.log("\n\n🤖 Follow-up Response with Tool Results:");
+  const followUpResult = completion({
+    modelId,
+    history,
+    stream: true,
+    kvCache,
+    tools,
+  });
+
+  history.push({
+    role: "assistant",
+    content: await followUpResult.text,
+  });
+
+  for await (const token of followUpResult.tokenStream) {
+    process.stdout.write(token);
+  }
+
+
+  const followUpStats = await followUpResult.stats;
+  console.log("\n\n📊 Follow-up Stats:", followUpStats);
+}
+
+type ToolInvocationParam = Pick<CompletionParams, 'kvCache'> & {
+  toolVariants: [ToolInput[], ToolInput[]]
+}
+async function runToolInvocationTest({ kvCache, toolVariants }: ToolInvocationParam) {
+  try {
+    // Load model from provided file path with tools support enabled
+    const modelId = await loadModel({
+      modelSrc: QWEN3_1_7B_INST_Q4,
+      modelType: "llm",
+      modelConfig: {
+        ctx_size: 4096,
+        tools: true, // Enable tools support
+        toolsMode: 'dynamic',
+      },
+      onProgress: (progress) =>
+        console.log(`Loading: ${progress.percentage.toFixed(1)}%`),
+    });
+    console.log(`✅ Model loaded successfully! Model ID: ${modelId}`);
+
+    // Create conversation history
+    const history = [
+      {
+        role: "system",
+        content:"You are a helpful assistant that can use tools.",
+      },
+      {
+        role: "user",
+        content: "What's the weather in Tokyo?",
+      },
+    ];
+
+    console.log("\n🤖 AI Response:");
+    console.log("(Streaming with tool definitions in prompt)\n");
+
+    await chatSession({ modelId, history, tools: toolVariants[0], kvCache })
+
+    history.push({
+      role: "user",
+      content: "only in case the weather in Tokyo is good, check my horoscope for Aquarius; if the weather is bad - check Taurus; need only one horoscope depending on the whether",
+    })
+
+    await chatSession({ modelId, history, tools: toolVariants[1], kvCache })
+
+
+    console.log("\n\n🎉 Completed!");
+    await unloadModel({ modelId, clearStorage: false });
+  } catch (error) {
+    console.error("❌ Error:", error);
+    process.exit(1);
+  }
+}
+// using same kvCache for a single session
+await runToolInvocationTest({ kvCache: `id-${Date.now()}`, toolVariants: [tools1, tools2] })
@@ -122,7 +122,7 @@
     "@qvac/embed-llamacpp": "^0.12.0",
     "@qvac/error": "^0.1.1",
     "@qvac/langdetect-text-cld2": "0.3.0",
-    "@qvac/llm-llamacpp": "^0.12.1",
+    "@qvac/llm-llamacpp": "^0.14.0",
     "@qvac/logging": "^0.1.0",
     "@qvac/ocr-onnx": "^0.2.0",
     "@qvac/rag": "^0.4.2",

@@ -41,6 +41,7 @@ export const llmConfigBaseSchema = z.object({
   stop_sequences: z.array(z.string()).optional(),
   n_discarded: z.number().optional(),
   tools: z.boolean().optional(),
+  toolsMode: z.enum(["static", "dynamic"]).optional(),
   projectionModelSrc: modelSrcInputSchema.optional(),
 });
 
@@ -52,6 +53,7 @@ export const LLM_CONFIG_DEFAULTS = {
   gpu_layers: 99,
   device: "gpu",
   system_prompt: "You are a helpful assistant.",
+  toolsMode: "static",
 } as const satisfies Partial<LlmConfigInput>;
 
 // Full schema - applies defaults via transform (no duplication)

@@ -31,7 +31,8 @@ import {
 } from "@/server/bare/registry/model-registry";
 import {
   checkForToolEvents,
-  insertToolsIntoHistory,
+  appendToolsToHistory,
+  prependToolsToHistory,
   setupToolGrammar,
 } from "@/server/utils/tool-integration";
 import { parseToolCalls } from "@/server/utils/tool-parser";
@@ -161,13 +162,16 @@ function prepareMessagesForCache(
     content: string;
     attachments?: { path: string }[] | undefined;
   }[],
+  tools?: Tool[],
 ): ChatHistory[] {
+  const addTools = tools?.length ? transformMessages(tools) : [];
   if (cacheExists && history.length > 0) {
     const lastMessage = history[history.length - 1];
     const lastTransformedMessages = transformMessage(lastMessage!);
     return [
       { role: "session", content: cachePathToUse },
       ...lastTransformedMessages,
+      ...addTools,
     ];
   }
 
@@ -178,6 +182,7 @@ function prepareMessagesForCache(
   return [
     { role: "session", content: cachePathToUse },
     ...transformedHistoryWithoutSystem,
+    ...addTools,
   ];
 }
 
@@ -257,6 +262,7 @@ export async function* completion(
 
   const modelConfig = getModelConfig(modelId);
   const toolsEnabled = (modelConfig as { tools?: boolean }).tools === true;
+  const toolsMode = (modelConfig as { toolsMode?: string }).toolsMode;
 
   let historyWithTools: Array<
     | {
@@ -268,7 +274,11 @@ export async function* completion(
   > = history;
 
   if (tools && tools.length > 0 && toolsEnabled) {
-    historyWithTools = insertToolsIntoHistory(history, tools);
+    if (toolsMode === "dynamic") {
+      historyWithTools = appendToolsToHistory(history, tools);
+    } else {
+      historyWithTools = prependToolsToHistory(history, tools);
+    }
     setupToolGrammar(modelConfig as Record<string, unknown>, tools);
   }
 
@@ -278,7 +288,13 @@ export async function* completion(
   if (kvCache) {
     const modelConfig = getModelConfig(modelId);
     const systemPromptFromHistory = extractSystemPrompt(history);
-    const configHash = generateConfigHash(systemPromptFromHistory, tools);
+    const toolsModeForHash = (modelConfig as { toolsMode?: string }).toolsMode;
+    const systemTools = toolsMode !== "dynamic" && tools?.length && toolsEnabled;
+    const dynamicTools = toolsMode === "dynamic" && tools?.length && toolsEnabled;
+    const configHash = generateConfigHash(
+      systemPromptFromHistory,
+      toolsModeForHash !== "dynamic" ? tools : undefined,
+    );
 
     const systemPromptToUse =
       systemPromptFromHistory ||
@@ -298,7 +314,7 @@ export async function* completion(
           cachePathToUse,
           systemPromptToUse,
           kvCache,
-          tools && toolsEnabled ? tools : undefined,
+          systemTools ? tools : undefined,
         );
         markCacheInitialized(modelId, configHash, kvCache);
         cacheExists = true;
@@ -308,6 +324,7 @@ export async function* completion(
         cachePathToUse,
         cacheExists,
         history,
+        dynamicTools ? tools : undefined,
       );
       logMessagesToAddon(messagesToSend, "PROMPT_SEND");
 
@@ -345,7 +362,7 @@ export async function* completion(
           cachePathToUse,
           systemPromptToUse,
           "auto",
-          tools && toolsEnabled ? tools : undefined,
+          systemTools ? tools : undefined,
         );
         markCacheInitialized(modelId, configHash, currentCacheInfo.cacheKey);
         cacheExists = true;
@@ -355,6 +372,7 @@ export async function* completion(
         cachePathToUse,
         cacheExists,
         history,
+        dynamicTools ? tools : undefined,
       );
       logMessagesToAddon(messagesToSend, "PROMPT_SEND");
 

@@ -50,6 +50,11 @@ function transformLlmConfig(llmConfig: LlmConfig) {
     delete transformed["stop_sequences"];
   }
 
+  if ("tools_mode" in transformed) {
+    transformed["tools_at_end"] = transformed["tools_mode"] === "dynamic" ? "true" : "false";
+    delete transformed["tools_mode"];
+  }
+
   return transformed;
 }