-
Notifications
You must be signed in to change notification settings - Fork 72
QVAC-13559 feat[api]: sdk "dynamic" tools mode #745
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
5094f21
5f5ce14
a26efdd
a5f3d06
2a10f45
0d3a650
d4d5a9f
94f57ba
a3e4059
59aba04
b8c7136
7eb0828
8785d46
a720a33
8fa7156
06e97a4
15c4491
f0ac901
474e6e6
b1a08b7
c01b702
8b11f50
a0cf11a
2baa19e
858819b
2a7a5d3
6c2344d
844786e
4fe44a4
a8fd16a
09d9e1d
2ca2ce6
4576665
de097f8
e1e9d04
2840f4d
3984f17
c5590a1
b30e062
4353e4f
a533c11
62931a3
ba5050a
c970b30
a3d6efd
ce32c9f
a55fc18
a489553
06d5e0f
1667ce0
514f13a
710fb87
7cb6483
d7c5c33
61dccf4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,208 @@ | ||
| import { z } from "zod"; | ||
| import { | ||
| completion, | ||
| loadModel, | ||
| unloadModel, | ||
| type ToolInput, | ||
| type ToolCall, | ||
| type CompletionStats, | ||
| type CompletionParams, | ||
| QWEN3_1_7B_INST_Q4, | ||
| } from "@qvac/sdk"; | ||
|
|
||
| // Define Zod schemas for tool parameters | ||
| const weatherSchema = z.object({ | ||
| city: z.string().describe("City name"), | ||
| }); | ||
|
|
||
| const horoscopeSchema = z.object({ | ||
| sign: z.string().describe("An astrological sign like Taurus or Aquarius"), | ||
| }); | ||
|
|
||
| // Map tool names to their schemas for runtime validation | ||
| const toolSchemas = { | ||
| get_weather: weatherSchema, | ||
| get_horoscope: horoscopeSchema, | ||
| }; | ||
|
|
||
| // Simple tool definitions - just name, description, and Zod schema! | ||
| const tools1 = [ | ||
| { | ||
| name: "get_weather", | ||
| description: "Get current weather for a city", | ||
| parameters: weatherSchema, | ||
| }, | ||
| ]; | ||
|
|
||
| const tools2 = [ | ||
| { | ||
| name: "get_horoscope", | ||
| description: "Get today's horoscope for an astrological sign", | ||
| parameters: horoscopeSchema, | ||
| }, | ||
| ]; | ||
|
|
||
| type ChatSesssionParam = CompletionParams & { | ||
| tools: ToolInput[] | ||
| } | ||
| async function chatSession ({ modelId, history, tools, kvCache }: ChatSesssionParam) { | ||
| const result = completion({ modelId, history, kvCache, stream: true, tools }); | ||
|
|
||
| // Consume token stream | ||
| const tokensTask = (async () => { | ||
| for await (const token of result.tokenStream) { | ||
| process.stdout.write(token); | ||
| } | ||
| })(); | ||
|
|
||
| // Consume tool call events | ||
| const toolsTask = (async () => { | ||
| for await (const evt of result.toolCallStream) { | ||
| if (evt.type === "toolCall") { | ||
| console.log( | ||
| `\n\n→ Tool Call Detected: ${evt.call.name}(${JSON.stringify(evt.call.arguments)})`, | ||
| ); | ||
| console.log(` ID: ${evt.call.id}`); | ||
| } else if (evt.type === "toolCallError") { | ||
| console.warn(`\n⚠️ Tool Error: ${evt.error.message}`); | ||
| console.warn(` Code: ${evt.error.code}`); | ||
| } | ||
| } | ||
| })(); | ||
|
|
||
| await Promise.all([tokensTask, toolsTask]); | ||
|
|
||
| const stats: CompletionStats | undefined = await result.stats; | ||
| const toolCalls: ToolCall[] = await result.toolCalls; | ||
|
|
||
| console.log("\n\n📋 Parsed Tool Calls:"); | ||
| if (toolCalls.length > 0) { | ||
| for (const call of toolCalls) { | ||
| console.log(` - ${call.name}(${JSON.stringify(call.arguments)})`); | ||
|
|
||
| const schema = toolSchemas[call.name as keyof typeof toolSchemas]; | ||
| if (schema) { | ||
| const validated = schema.safeParse(call.arguments); | ||
| if (validated.success) { | ||
| console.log(` ✓ Arguments validated with Zod`); | ||
| } else { | ||
| console.log(` ✗ Validation failed:`, validated.error); | ||
| } | ||
| } | ||
| } | ||
| } else { | ||
| console.log(" No tool calls detected in response"); | ||
| } | ||
|
|
||
| console.log("\n📊 Performance Stats:", stats); | ||
|
|
||
| // Execute tool calls and send results back to the model | ||
| if (toolCalls.length > 0) { | ||
| console.log("\n\n🔧 Simulating Tool Execution..."); | ||
|
|
||
| // Simulate tool execution (in a real app, you'd call actual APIs) | ||
| const toolResults = toolCalls.map((call) => { | ||
| let result = ""; | ||
| if (call.name === "get_weather") { | ||
| const args = call.arguments as { city: string; country?: string }; | ||
| result = `The weather in ${args.city} is sunny, 22°C with light clouds.`; | ||
| } else if (call.name === "get_horoscope") { | ||
| const args = call.arguments as { sign: string }; | ||
| result = `Horoscope for ${args.sign}: Today is a great day for new beginnings and creative endeavors!`; | ||
| } | ||
| console.log(` ✓ ${call.name}: ${result}`); | ||
| return { toolCallId: call.id, result }; | ||
| }); | ||
|
|
||
| // Add tool results to conversation history | ||
| history.push({ | ||
| role: "assistant", | ||
| content: await result.text, | ||
| }); | ||
|
|
||
| // Add tool results as tool messages | ||
| for (const toolResult of toolResults) { | ||
| history.push({ | ||
| role: "tool", | ||
| content: toolResult.result, | ||
| }); | ||
| } | ||
| } | ||
|
|
||
| // Send follow-up question with tool results | ||
| console.log("\n\n🤖 Follow-up Response with Tool Results:"); | ||
| const followUpResult = completion({ | ||
| modelId, | ||
| history, | ||
| stream: true, | ||
| kvCache, | ||
| tools, | ||
| }); | ||
|
|
||
| history.push({ | ||
| role: "assistant", | ||
| content: await followUpResult.text, | ||
| }); | ||
|
|
||
| for await (const token of followUpResult.tokenStream) { | ||
| process.stdout.write(token); | ||
| } | ||
|
|
||
|
|
||
| const followUpStats = await followUpResult.stats; | ||
| console.log("\n\n📊 Follow-up Stats:", followUpStats); | ||
| } | ||
|
|
||
| type ToolInvocationParam = Pick<CompletionParams, 'kvCache'> & { | ||
| toolVariants: [ToolInput[], ToolInput[]] | ||
| } | ||
| async function runToolInvocationTest({ kvCache, toolVariants }: ToolInvocationParam) { | ||
| try { | ||
| // Load model from provided file path with tools support enabled | ||
| const modelId = await loadModel({ | ||
| modelSrc: QWEN3_1_7B_INST_Q4, | ||
| modelType: "llm", | ||
| modelConfig: { | ||
| ctx_size: 4096, | ||
| tools: true, // Enable tools support | ||
| toolsMode: 'dynamic', | ||
| }, | ||
| onProgress: (progress) => | ||
| console.log(`Loading: ${progress.percentage.toFixed(1)}%`), | ||
| }); | ||
| console.log(`✅ Model loaded successfully! Model ID: ${modelId}`); | ||
|
|
||
| // Create conversation history | ||
| const history = [ | ||
| { | ||
| role: "system", | ||
| content:"You are a helpful assistant that can use tools.", | ||
| }, | ||
| { | ||
| role: "user", | ||
| content: "What's the weather in Tokyo?", | ||
| }, | ||
| ]; | ||
|
|
||
| console.log("\n🤖 AI Response:"); | ||
| console.log("(Streaming with tool definitions in prompt)\n"); | ||
|
|
||
| await chatSession({ modelId, history, tools: toolVariants[0], kvCache }) | ||
|
|
||
| history.push({ | ||
| role: "user", | ||
| content: "only in case the weather in Tokyo is good, check my horoscope for Aquarius; if the weather is bad - check Taurus; need only one horoscope depending on the whether", | ||
| }) | ||
|
|
||
| await chatSession({ modelId, history, tools: toolVariants[1], kvCache }) | ||
|
|
||
|
|
||
| console.log("\n\n🎉 Completed!"); | ||
| await unloadModel({ modelId, clearStorage: false }); | ||
| } catch (error) { | ||
| console.error("❌ Error:", error); | ||
| process.exit(1); | ||
| } | ||
| } | ||
| // using same kvCache for a single session | ||
| await runToolInvocationTest({ kvCache: `id-${Date.now()}`, toolVariants: [tools1, tools2] }) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -31,7 +31,8 @@ import { | |
| } from "@/server/bare/registry/model-registry"; | ||
| import { | ||
| checkForToolEvents, | ||
| insertToolsIntoHistory, | ||
| appendToolsToHistory, | ||
| prependToolsToHistory, | ||
| setupToolGrammar, | ||
| } from "@/server/utils/tool-integration"; | ||
| import { parseToolCalls } from "@/server/utils/tool-parser"; | ||
|
|
@@ -161,13 +162,16 @@ function prepareMessagesForCache( | |
| content: string; | ||
| attachments?: { path: string }[] | undefined; | ||
| }[], | ||
| tools?: Tool[], | ||
| ): ChatHistory[] { | ||
| const addTools = tools?.length ? transformMessages(tools) : []; | ||
| if (cacheExists && history.length > 0) { | ||
| const lastMessage = history[history.length - 1]; | ||
| const lastTransformedMessages = transformMessage(lastMessage!); | ||
| return [ | ||
| { role: "session", content: cachePathToUse }, | ||
| ...lastTransformedMessages, | ||
| ...addTools, | ||
| ]; | ||
| } | ||
|
|
||
|
|
@@ -178,6 +182,7 @@ function prepareMessagesForCache( | |
| return [ | ||
| { role: "session", content: cachePathToUse }, | ||
| ...transformedHistoryWithoutSystem, | ||
| ...addTools, | ||
| ]; | ||
| } | ||
|
|
||
|
|
@@ -257,6 +262,7 @@ export async function* completion( | |
|
|
||
| const modelConfig = getModelConfig(modelId); | ||
| const toolsEnabled = (modelConfig as { tools?: boolean }).tools === true; | ||
| const toolsMode = (modelConfig as { toolsMode?: string }).toolsMode; | ||
|
|
||
| let historyWithTools: Array< | ||
| | { | ||
|
|
@@ -268,7 +274,11 @@ export async function* completion( | |
| > = history; | ||
|
|
||
| if (tools && tools.length > 0 && toolsEnabled) { | ||
| historyWithTools = insertToolsIntoHistory(history, tools); | ||
| if (toolsMode === "dynamic") { | ||
| historyWithTools = appendToolsToHistory(history, tools); | ||
| } else { | ||
| historyWithTools = prependToolsToHistory(history, tools); | ||
| } | ||
|
lauripiisang marked this conversation as resolved.
Outdated
|
||
| setupToolGrammar(modelConfig as Record<string, unknown>, tools); | ||
| } | ||
|
|
||
|
|
@@ -278,7 +288,13 @@ export async function* completion( | |
| if (kvCache) { | ||
| const modelConfig = getModelConfig(modelId); | ||
| const systemPromptFromHistory = extractSystemPrompt(history); | ||
| const configHash = generateConfigHash(systemPromptFromHistory, tools); | ||
| const toolsModeForHash = (modelConfig as { toolsMode?: string }).toolsMode; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. isn't this the same than the
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @simon-iribarren yes, I but I've decided to follow current logic since there are 2 model configs right now and kvCache "branch" has it's own, like const modelConfig = getModelConfig(modelId);
// <...>
if (kvCache) {
const modelConfig = getModelConfig(modelId);
// at this point using toolsMode from the "second" confighence this is just for consistency - otherwise prob we should refactor to have a single |
||
| const systemTools = toolsMode !== "dynamic" && tools?.length && toolsEnabled; | ||
| const dynamicTools = toolsMode === "dynamic" && tools?.length && toolsEnabled; | ||
| const configHash = generateConfigHash( | ||
| systemPromptFromHistory, | ||
| toolsModeForHash !== "dynamic" ? tools : undefined, | ||
| ); | ||
|
|
||
| const systemPromptToUse = | ||
| systemPromptFromHistory || | ||
|
|
@@ -298,7 +314,7 @@ export async function* completion( | |
| cachePathToUse, | ||
| systemPromptToUse, | ||
| kvCache, | ||
| tools && toolsEnabled ? tools : undefined, | ||
| systemTools ? tools : undefined, | ||
| ); | ||
| markCacheInitialized(modelId, configHash, kvCache); | ||
| cacheExists = true; | ||
|
|
@@ -308,6 +324,7 @@ export async function* completion( | |
| cachePathToUse, | ||
| cacheExists, | ||
| history, | ||
| dynamicTools ? tools : undefined, | ||
| ); | ||
| logMessagesToAddon(messagesToSend, "PROMPT_SEND"); | ||
|
|
||
|
|
@@ -345,7 +362,7 @@ export async function* completion( | |
| cachePathToUse, | ||
| systemPromptToUse, | ||
| "auto", | ||
| tools && toolsEnabled ? tools : undefined, | ||
| systemTools ? tools : undefined, | ||
| ); | ||
| markCacheInitialized(modelId, configHash, currentCacheInfo.cacheKey); | ||
| cacheExists = true; | ||
|
|
@@ -355,6 +372,7 @@ export async function* completion( | |
| cachePathToUse, | ||
| cacheExists, | ||
| history, | ||
| dynamicTools ? tools : undefined, | ||
| ); | ||
| logMessagesToAddon(messagesToSend, "PROMPT_SEND"); | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
need to bump to 0.13.0, to use the same version of llama.cpp as the llm-addon
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@gianni-cor but dynamic tools feature PR has a bump to 0.14.0 🤔
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
oh, ok sorry mixed the package - upgraded
"@qvac/embed-llamacpp": "^0.13.0",✔️