diff --git a/packages/cli/src/serve/adapters/openai/translate.ts b/packages/cli/src/serve/adapters/openai/translate.ts index 74d910b8eb..618944ca68 100644 --- a/packages/cli/src/serve/adapters/openai/translate.ts +++ b/packages/cli/src/serve/adapters/openai/translate.ts @@ -146,6 +146,8 @@ export function extractGenerationParams (body: Record): SDKGene if (typeof body['max_tokens'] === 'number') params.predict = body['max_tokens'] if (typeof body['max_completion_tokens'] === 'number') params.predict = body['max_completion_tokens'] + if (typeof body['reasoning_budget'] === 'boolean') params.reasoning_budget = body['reasoning_budget'] + return Object.keys(params).length > 0 ? params : undefined } diff --git a/packages/cli/src/serve/core/sdk.ts b/packages/cli/src/serve/core/sdk.ts index b6db7823a8..2327df9e99 100644 --- a/packages/cli/src/serve/core/sdk.ts +++ b/packages/cli/src/serve/core/sdk.ts @@ -11,6 +11,7 @@ export interface SDKGenerationParams { frequency_penalty?: number presence_penalty?: number repeat_penalty?: number + reasoning_budget?: boolean } export type SDKResponseFormat = @@ -162,7 +163,12 @@ export async function sdkCompletion (opts: { params['tools'] = opts.tools } if (opts.generationParams) { - params['generationParams'] = opts.generationParams + const { reasoning_budget, ...rest } = opts.generationParams + const sdkGenParams: Record = { ...rest } + if (reasoning_budget !== undefined) { + sdkGenParams['reasoning_budget'] = reasoning_budget ? -1 : 0 + } + params['generationParams'] = sdkGenParams } if (opts.responseFormat) { params['responseFormat'] = opts.responseFormat diff --git a/packages/cli/test/translate.test.ts b/packages/cli/test/translate.test.ts index 5a1c1f8bcb..eb374a624a 100644 --- a/packages/cli/test/translate.test.ts +++ b/packages/cli/test/translate.test.ts @@ -344,6 +344,23 @@ describe('extractGenerationParams', () => { assert.equal(params.presence_penalty, 0.1) }) + it('extracts reasoning_budget true', () => { + const params = extractGenerationParams({ reasoning_budget: true }) + assert.ok(params) + assert.equal(params.reasoning_budget, true) + }) + + it('extracts reasoning_budget false', () => { + const params = extractGenerationParams({ reasoning_budget: false }) + assert.ok(params) + assert.equal(params.reasoning_budget, false) + }) + + it('ignores non-boolean reasoning_budget', () => { + const params = extractGenerationParams({ reasoning_budget: -1 }) + assert.equal(params, undefined) + }) + it('ignores non-number values', () => { const params = extractGenerationParams({ temperature: 'hot', max_tokens: '100' }) assert.equal(params, undefined) diff --git a/packages/sdk/client/api/completion-stream.ts b/packages/sdk/client/api/completion-stream.ts index 18a52f0f26..0fe0ef0c1a 100644 --- a/packages/sdk/client/api/completion-stream.ts +++ b/packages/sdk/client/api/completion-stream.ts @@ -54,7 +54,7 @@ type CompletionParams = Omit & { * @param params.mcp - Optional array of MCP client inputs for tool integration * @param params.captureThinking - Best-effort parsing of `` blocks into `thinkingDelta` events; `final.raw.fullText` always preserves the original output * @param params.emitRawDeltas - When true, every raw model token is also emitted as a `rawDelta` event - * @param params.toolDialect - Override the SDK's name-based dialect detection. Use when your model emits a known format (`"hermes"`, `"pythonic"`, or `"json"`) the auto-router doesn't recognise. Drives both streaming frame detection and finalization parsing. + * @param params.toolDialect - Override the SDK's name-based dialect detection. Supported values: `"hermes"`, `"pythonic"`, `"json"`, `"harmony"`, `"qwen35"` (Qwen3.5/3.6), `"gemma4"`. Use when the auto-router doesn't recognise your model name. Drives both streaming frame detection and finalization parsing. * Common override case: Llama 3.x tool-calling fine-tunes that emit the native pythonic header (`<|start_header_id|>tool_call<|end_header_id|>...<|eot_id|>`). * @param params.responseFormat - Optional structured-output constraint applied to the model's output: * - `{ type: "text" }` — no constraint (default behavior) diff --git a/packages/sdk/examples/tools/llamacpp-tools-gemma4.ts b/packages/sdk/examples/tools/llamacpp-tools-gemma4.ts new file mode 100644 index 0000000000..d9aa1427ab --- /dev/null +++ b/packages/sdk/examples/tools/llamacpp-tools-gemma4.ts @@ -0,0 +1,94 @@ +/** + * Tool-calling example using the Gemma4 native dialect. + * + * Gemma4 emits tool calls in a JS-literal format with custom quote tokens: + * <|tool_call>call:NAME{key:<|"|>val<|"|>,...} + * + * Reasoning output (thinking) is emitted inside <|channel>thought... + * frames, which are stripped from contentDelta and forwarded as thinkingDelta + * when captureThinking is true. + * + * The dialect is auto-detected from the model name/path when the file name + * contains "gemma4" or "gemma-4". Pass toolDialect: "gemma4" explicitly to + * completion() if auto-detection does not pick it up for a given file name. + * + * Usage: + * bun run bare:example dist/examples/tools/llamacpp-tools-gemma4.js + */ +import { + completion, + loadModel, + unloadModel, + type ToolCall, +} from "@qvac/sdk"; +import { tools, mockExecute } from "./shared"; + +// bartowski's pack tags as the EOG token (matching the base tokenizer), +// ensuring generation stops correctly; unsloth's variant maps it differently. +const GEMMA4_HF = + "https://huggingface.co/bartowski/google_gemma-4-E2B-it-GGUF/resolve/main/google_gemma-4-E2B-it-Q4_K_M.gguf"; + +const modelSrc = process.argv[2] ?? GEMMA4_HF; + +let modelId: string | undefined; +try { + modelId = await loadModel({ + modelSrc, + modelType: "llm", + modelConfig: { ctx_size: 4096, tools: true }, + onProgress: (progress) => + console.log(`Loading: ${progress.percentage.toFixed(1)}%`), + }); + console.log(`Model loaded: ${modelId}`); + + const history = [ + { + role: "system", + content: + "You are a helpful assistant that can call tools to look up weather and horoscopes.", + }, + { + role: "user", + content: "What's the weather in Tokyo and my horoscope for Aquarius?", + }, + ]; + + const result = completion({ modelId, history, stream: true, tools }); + + const tokensTask = (async () => { + for await (const token of result.tokenStream) { + process.stdout.write(token); + } + })(); + + const toolsTask = (async () => { + for await (const evt of result.toolCallStream) { + if (evt.type === "toolCall") { + console.log( + `\n-> ${evt.call.name}(${JSON.stringify(evt.call.arguments)})`, + ); + } + } + })(); + + await Promise.all([tokensTask, toolsTask]); + + const toolCalls: ToolCall[] = await result.toolCalls; + + console.log("\n\nFinal tool calls:"); + if (toolCalls.length > 0) { + for (const call of toolCalls) { + console.log(` - ${call.name}(${JSON.stringify(call.arguments)})`); + const toolResult = mockExecute(call.name, call.arguments); + console.log(` result: ${toolResult}`); + } + } else { + console.log(" (none)"); + } + + await unloadModel({ modelId, clearStorage: false }); +} catch (error) { + console.error("Error:", error); + if (modelId) await unloadModel({ modelId, clearStorage: false }).catch(() => {}); + process.exit(1); +} diff --git a/packages/sdk/examples/tools/llamacpp-tools-qwen35.ts b/packages/sdk/examples/tools/llamacpp-tools-qwen35.ts new file mode 100644 index 0000000000..7d8c571454 --- /dev/null +++ b/packages/sdk/examples/tools/llamacpp-tools-qwen35.ts @@ -0,0 +1,88 @@ +/** + * Tool-calling example using the Qwen3.5 dialect. + * + * Qwen3.5 emits tool calls in a Pythonic-XML format: + * VALUE + * + * The dialect is auto-detected from the model name/path when the model file + * contains "qwen3.5", "qwen3-5", "qwen3.6", or "qwen3-6". Pass + * toolDialect: "qwen35" explicitly if auto-detection does not pick it up. + * + * Usage: + * bun run bare:example dist/examples/tools/llamacpp-tools-qwen35.js + */ +import { + completion, + loadModel, + unloadModel, + type ToolCall, +} from "@qvac/sdk"; +import { tools, mockExecute } from "./shared"; + +const QWEN35_HF = + "https://huggingface.co/unsloth/Qwen3.5-0.8B-GGUF/resolve/main/Qwen3.5-0.8B-Q8_0.gguf"; + +const modelSrc = process.argv[2] ?? QWEN35_HF; + +let modelId: string | undefined; +try { + modelId = await loadModel({ + modelSrc, + modelType: "llm", + modelConfig: { ctx_size: 4096, tools: true }, + onProgress: (progress) => + console.log(`Loading: ${progress.percentage.toFixed(1)}%`), + }); + console.log(`Model loaded: ${modelId}`); + + const history = [ + { + role: "system", + content: + "You are a helpful assistant that can call tools to look up weather and horoscopes.", + }, + { + role: "user", + content: "What's the weather in Tokyo and my horoscope for Aquarius?", + }, + ]; + + const result = completion({ modelId, history, stream: true, tools }); + + const tokensTask = (async () => { + for await (const token of result.tokenStream) { + process.stdout.write(token); + } + })(); + + const toolsTask = (async () => { + for await (const evt of result.toolCallStream) { + if (evt.type === "toolCall") { + console.log( + `\n-> ${evt.call.name}(${JSON.stringify(evt.call.arguments)})`, + ); + } + } + })(); + + await Promise.all([tokensTask, toolsTask]); + + const toolCalls: ToolCall[] = await result.toolCalls; + + console.log("\n\nFinal tool calls:"); + if (toolCalls.length > 0) { + for (const call of toolCalls) { + console.log(` - ${call.name}(${JSON.stringify(call.arguments)})`); + const toolResult = mockExecute(call.name, call.arguments); + console.log(` result: ${toolResult}`); + } + } else { + console.log(" (none)"); + } + + await unloadModel({ modelId, clearStorage: false }); +} catch (error) { + console.error("Error:", error); + if (modelId) await unloadModel({ modelId, clearStorage: false }).catch(() => {}); + process.exit(1); +} diff --git a/packages/sdk/package.json b/packages/sdk/package.json index 3eb64d986c..5aab878501 100644 --- a/packages/sdk/package.json +++ b/packages/sdk/package.json @@ -177,7 +177,7 @@ "@qvac/embed-llamacpp": "^0.15.0", "@qvac/error": "^0.1.1", "@qvac/langdetect-text": "^0.1.2", - "@qvac/llm-llamacpp": "^0.18.0", + "@qvac/llm-llamacpp": "^0.20.0", "@qvac/logging": "^0.1.0", "@qvac/ocr-onnx": "^0.4.2", "@qvac/rag": "^0.4.4", diff --git a/packages/sdk/schemas/completion-stream.ts b/packages/sdk/schemas/completion-stream.ts index e3edc9f1a1..4e75f38b08 100644 --- a/packages/sdk/schemas/completion-stream.ts +++ b/packages/sdk/schemas/completion-stream.ts @@ -16,12 +16,16 @@ export { * - `"pythonic"`: `[get_weather(city="Tokyo")]` (optionally `<|tool_call_start|>...<|tool_call_end|>`-wrapped) * - `"json"`: `{"name":"get_weather","arguments":{"city":"Tokyo"}}` or `{"tool_calls":[{"name":"...","arguments":{...}}]}` * - `"harmony"`: `<|channel|>commentary to=functions.get_weather <|constrain|>json<|message|>{"city":"Tokyo"}<|call|>` + * - `"qwen35"`: `VALUE` + * - `"gemma4"`: `<|tool_call>call:NAME{key:<|"|>val<|"|>,...}` */ export const toolDialectSchema = z.enum([ "hermes", "pythonic", "json", "harmony", + "qwen35", + "gemma4" ]); export const attachmentSchema = z.object({ @@ -70,6 +74,12 @@ export const generationParamsSchema = z .number() .optional() .describe("Penalty applied to repeated tokens."), + reasoning_budget: z + .union([z.literal(-1), z.literal(0)]) + .optional() + .describe( + "Per-request reasoning channel budget. `-1` keeps the model's reasoning channel on; `0` disables it for this request. Equivalent to the load-time `reasoning_budget` config but scoped to a single `run()` call; the prior value is restored afterwards.", + ), }) .strict(); diff --git a/packages/sdk/schemas/llamacpp-config.ts b/packages/sdk/schemas/llamacpp-config.ts index f4f50eabad..9cd70e6877 100644 --- a/packages/sdk/schemas/llamacpp-config.ts +++ b/packages/sdk/schemas/llamacpp-config.ts @@ -33,6 +33,7 @@ export const llmConfigBaseSchema = z.object({ z.number().int().min(1), // positive integer: fixed token count ]) .optional(), + /** JS-side only: seeds conversation history. Never forwarded to the C++ addon. */ system_prompt: z.string().optional(), no_mmap: z.boolean().optional(), verbosity: verbositySchema.optional(), @@ -60,6 +61,10 @@ export const llmConfigBaseSchema = z.object({ * for fast GPU startup. */ openclCacheDir: z.string().optional(), + /** + * Reasoning channel token budget. `-1` = unrestricted, `0` = disabled. + */ + reasoning_budget: z.union([z.literal(-1), z.literal(0)]).optional(), projectionModelSrc: modelSrcInputSchema.optional(), }); diff --git a/packages/sdk/server/bare/plugins/llamacpp-completion/plugin.ts b/packages/sdk/server/bare/plugins/llamacpp-completion/plugin.ts index 89d36b7ea0..fd483e2411 100644 --- a/packages/sdk/server/bare/plugins/llamacpp-completion/plugin.ts +++ b/packages/sdk/server/bare/plugins/llamacpp-completion/plugin.ts @@ -12,7 +12,6 @@ import { ModelType, llmConfigBaseSchema, ADDON_LLM, - TOOLS_MODE, type CompletionEvent, type CreateModelParams, type PluginCapabilities, @@ -26,6 +25,7 @@ import { expandGGUFIntoShards } from "@/server/utils"; import { completion } from "@/server/bare/plugins/llamacpp-completion/ops/completion-stream"; import { finetune } from "@/server/bare/plugins/llamacpp-completion/ops/finetune"; import { translate } from "@/server/bare/ops/translate"; +import { transformLlmConfig } from "@/server/bare/plugins/llamacpp-completion/transform"; import { attachModelExecutionMs } from "@/profiling/model-execution"; import { getModelConfig } from "@/server/bare/registry/model-registry"; import { createCompletionNormalizer } from "@/server/utils/completion-normalizer"; @@ -33,44 +33,6 @@ import { detectToolDialect } from "@/server/utils/tool-integration"; import { getRequestRegistry } from "@/server/bare/runtime"; import { generateServerRequestId } from "@/server/bare/runtime/request-id"; -function transformLlmConfig(llmConfig: LlmConfig) { - const transformed = JSON.parse( - JSON.stringify(llmConfig, (key: string, v: unknown) => - key === "modelType" - ? undefined - : key === "stop_sequences" - ? Array.isArray(v) - ? v.join(", ") - : v - : typeof v === "number" || typeof v === "boolean" - ? String(v) - : v, - ).replace( - /"([a-z][A-Za-z]*)":/g, - (_, key: string) => - `"${key.replace(/[A-Z]/g, (l: string) => `_${l.toLowerCase()}`)}":`, - ), - ) as Record; - - if ("stop_sequences" in transformed) { - transformed["reverse_prompt"] = transformed["stop_sequences"]; - delete transformed["stop_sequences"]; - } - - if ("opencl_cache_dir" in transformed) { - transformed["openclCacheDir"] = transformed["opencl_cache_dir"]; - delete transformed["opencl_cache_dir"]; - } - - if ("tools_mode" in transformed) { - if (transformed["tools_mode"] === TOOLS_MODE.dynamic) { - transformed["tools_compact"] = "true"; - } - delete transformed["tools_mode"]; - } - - return transformed; -} function createLlmModel( modelId: string, diff --git a/packages/sdk/server/bare/plugins/llamacpp-completion/transform.ts b/packages/sdk/server/bare/plugins/llamacpp-completion/transform.ts new file mode 100644 index 0000000000..9c4dd358ca --- /dev/null +++ b/packages/sdk/server/bare/plugins/llamacpp-completion/transform.ts @@ -0,0 +1,47 @@ +import { TOOLS_MODE, type LlmConfig } from "@/schemas"; + +/** + * Converts an LlmConfig into the flat string-keyed map the C++ addon expects. + * + * JS-only fields excluded from the output (must NOT be forwarded to the addon): + * - modelType (schema discriminant, meaningless at C++ level) + * - system_prompt (JS-side history seeding only; C++ removed --system-prompt in 8189) + */ +export function transformLlmConfig(llmConfig: LlmConfig) { + const transformed = JSON.parse( + JSON.stringify(llmConfig, (key: string, v: unknown) => + key === "modelType" || key === "system_prompt" + ? undefined + : key === "stop_sequences" + ? Array.isArray(v) + ? v.join(", ") + : v + : typeof v === "number" || typeof v === "boolean" + ? String(v) + : v, + ).replace( + /"([a-z][A-Za-z]*)":/g, + (_, key: string) => + `"${key.replace(/[A-Z]/g, (l: string) => `_${l.toLowerCase()}`)}":`, + ), + ) as Record; + + if ("stop_sequences" in transformed) { + transformed["reverse_prompt"] = transformed["stop_sequences"]; + delete transformed["stop_sequences"]; + } + + if ("opencl_cache_dir" in transformed) { + transformed["openclCacheDir"] = transformed["opencl_cache_dir"]; + delete transformed["opencl_cache_dir"]; + } + + if ("tools_mode" in transformed) { + if (transformed["tools_mode"] === TOOLS_MODE.dynamic) { + transformed["tools_compact"] = "true"; + } + delete transformed["tools_mode"]; + } + + return transformed; +} diff --git a/packages/sdk/server/utils/completion-normalizer.ts b/packages/sdk/server/utils/completion-normalizer.ts index ccc1c782a8..a91b0fa319 100644 --- a/packages/sdk/server/utils/completion-normalizer.ts +++ b/packages/sdk/server/utils/completion-normalizer.ts @@ -71,6 +71,14 @@ const DIALECT_SPECS: Record = { "<|return|>", ], }, + qwen35: { + // Same framing as hermes; inner content is XML. + toolFrames: [{ open: "", close: "" }], + }, + gemma4: { + toolFrames: [{ open: "<|tool_call>", close: "" }], + thinkingFrames: [{ open: "<|channel>thought", close: "" }], + }, }; // Capture-gated reasoning marker — the generic `...` diff --git a/packages/sdk/server/utils/tools/dialect.ts b/packages/sdk/server/utils/tools/dialect.ts index 11bf614b53..bbbade9228 100644 --- a/packages/sdk/server/utils/tools/dialect.ts +++ b/packages/sdk/server/utils/tools/dialect.ts @@ -12,6 +12,8 @@ export function detectToolDialectFromName( const basename = path.toLowerCase().split(/[/\\]/).pop() ?? ""; const tag = `${(name ?? "").toLowerCase()}|${basename}`; + if (/qwen3[._-]?[56](?![a-z0-9])/.test(tag)) return "qwen35"; + if (/gemma[-_]?4(?=[^a-z0-9]|$)/.test(tag)) return "gemma4"; if (/gpt[_-]?oss/.test(tag)) return "harmony"; if (/lfm[_-]?\d/.test(tag)) return "pythonic"; return "hermes"; diff --git a/packages/sdk/server/utils/tools/parser.ts b/packages/sdk/server/utils/tools/parser.ts index 2a685593bf..065d6e3ce6 100644 --- a/packages/sdk/server/utils/tools/parser.ts +++ b/packages/sdk/server/utils/tools/parser.ts @@ -11,6 +11,8 @@ import { import { parseHermesFormat } from "@/server/utils/tools/parsers/hermes"; import { parsePythonicFormat } from "@/server/utils/tools/parsers/pythonic"; import { parseHarmonyFormat } from "@/server/utils/tools/parsers/harmony"; +import { parseQwen35Format } from "@/server/utils/tools/parsers/qwen35"; +import { parseGemma4NativeFormat } from "@/server/utils/tools/parsers/gemma4native"; function pickFormatParsers( dialect: ToolDialect | undefined, @@ -26,13 +28,26 @@ function pickFormatParsers( return [parseGemmaFormat, parseLlamacppFormat]; case "harmony": return [parseHarmonyFormat]; + case "qwen35": + // Hermes fallback: Qwen3.5 templates sometimes emit OpenAI-style JSON + // when the native XML format fails; Hermes chain recovers those. + return [parseQwen35Format, parseHermesFormat]; + case "gemma4": + // No JSON fallback: Gemma4 emits only its native channel-thought dialect + // and never falls back to JSON-envelope formats. + return [parseGemma4NativeFormat]; default: - // Harmony first: `to=functions.` is uniquely Harmony and can't + // Gemma4 first: `<|tool_call>` is uniquely distinctive and can't // false-match other dialects. + // Harmony next: `to=functions.` is also uniquely Harmony. + // Qwen35 before Hermes: defers to Hermes when JSON is inside , + // so the XML path is recovered without breaking Hermes-JSON payloads. // Pythonic last: its bare `[name(...)]` form can match payloads that // look like other dialects. return [ + parseGemma4NativeFormat, parseHarmonyFormat, + parseQwen35Format, parseHermesFormat, parseGemmaFormat, parseLlamacppFormat, diff --git a/packages/sdk/server/utils/tools/parsers/gemma4native.ts b/packages/sdk/server/utils/tools/parsers/gemma4native.ts new file mode 100644 index 0000000000..6b3cc9215a --- /dev/null +++ b/packages/sdk/server/utils/tools/parsers/gemma4native.ts @@ -0,0 +1,75 @@ +import type { Tool, ToolCall, ToolCallError } from "@/schemas"; +import { + generateStableToolCallId, + validateToolArguments, + type ParserResult, +} from "@/server/utils/tools/shared"; + +// Transliterates Gemma 4's JS-literal argument body to valid JSON so it can +// be parsed with JSON.parse. The body uses: +// - <|"|>...<|"|> instead of "..." for string values +// - bare (unquoted) object keys +// Strategy: split by <|"|> tokens so structural parts (even indices) and +// string value parts (odd indices) are processed separately, preventing +// the key-quoting regex from matching `, key:` patterns inside string values. +function gemmaArgsToJson(argsRaw: string): string { + const parts = ("{" + argsRaw + "}").split(/<\|"\|>/); + return parts + .map((part, i) => + i % 2 === 0 + ? part.replace(/([{,]\s*)([A-Za-z_][\w-]*)\s*:/g, '$1"$2":') + : '"' + part.replace(/\\/g, "\\\\").replace(/"/g, '\\"').replace(/[\x00-\x1f]/g, (c) => `\\u${c.charCodeAt(0).toString(16).padStart(4, "0")}`) + '"', + ) + .join(""); +} + +// Parses Gemma 4's native tool-call dialect: +// <|tool_call>call:NAME{key:<|"|>val<|"|>,key2:bareNum,...} +// Keys are bare identifiers; string values are wrapped in <|"|>...<|"|>; +// numbers/booleans are bare literals; nested objects/arrays use JS-literal +// syntax throughout. Transliterates to JSON then parses. +export function parseGemma4NativeFormat(text: string, tools: Tool[]): ParserResult { + const toolCalls: ToolCall[] = []; + const errors: ToolCallError[] = []; + + if (!text.includes("<|tool_call>")) { + return { matched: false, toolCalls, errors }; + } + + const callRegex = /<\|tool_call>call:([A-Za-z_][\w-]*)\{([\s\S]*?)\}/g; + const matches = Array.from(text.matchAll(callRegex)); + + if (matches.length === 0) return { matched: false, toolCalls, errors }; + + for (const match of matches) { + const name = match[1]!; + const argsRaw = match[2]!; + + let args: Record; + try { + args = JSON.parse(gemmaArgsToJson(argsRaw)) as Record; + } catch (err) { + errors.push({ + code: "PARSE_ERROR", + message: `Failed to parse Gemma 4 tool call arguments: ${err instanceof Error ? err.message : String(err)}`, + raw: match[0], + }); + continue; + } + + const validation = validateToolArguments(name, args, tools); + if (!validation.isValid && validation.error) { + errors.push({ ...validation.error, raw: match[0] }); + continue; + } + + toolCalls.push({ + id: generateStableToolCallId(name, args), + name, + arguments: args, + raw: match[0], + }); + } + + return { matched: true, toolCalls, errors }; +} diff --git a/packages/sdk/server/utils/tools/parsers/qwen35.ts b/packages/sdk/server/utils/tools/parsers/qwen35.ts new file mode 100644 index 0000000000..ac417e2e88 --- /dev/null +++ b/packages/sdk/server/utils/tools/parsers/qwen35.ts @@ -0,0 +1,133 @@ +import type { Tool, ToolCall, ToolCallError } from "@/schemas"; +import { + generateStableToolCallId, + validateToolArguments, + type ParserResult, +} from "@/server/utils/tools/shared"; + +// Coerce raw parameter text to the type declared in the tool's JSON schema. +// String values are raw (not JSON-quoted); arrays/objects are valid JSON. +function coerceParamValue( + raw: string, + schema?: { type?: string }, +): unknown { + const trimmed = raw.trim(); + if (!schema?.type) return trimmed; + switch (schema.type) { + case "number": { + if (trimmed.length === 0) throw new Error(`invalid numeric value: ""`); + const n = Number(trimmed); + if (Number.isNaN(n)) throw new Error(`invalid numeric value: "${trimmed}"`); + return n; + } + case "integer": { + if (trimmed.length === 0) throw new Error(`invalid integer value: ""`); + const n = Number(trimmed); + if (Number.isNaN(n) || !Number.isInteger(n)) throw new Error(`invalid integer value: "${trimmed}"`); + return n; + } + case "boolean": + if (trimmed === "true") return true; + if (trimmed === "false") return false; + throw new Error(`invalid boolean value: "${trimmed}"`); + case "array": + case "object": + return JSON.parse(trimmed); + default: + return trimmed; + } +} + +// Parses Qwen3.5/3.6 Pythonic-XML tool-call format: +// +// +// VALUE +// +// +// String parameter values are raw text (not JSON-quoted); arrays/objects +// are JSON. Type coercion uses the tool schema; unknown params pass through. +export function parseQwen35Format(text: string, tools: Tool[]): ParserResult { + const toolCalls: ToolCall[] = []; + const errors: ToolCallError[] = []; + + if (!text.includes("")) { + return { matched: false, toolCalls, errors }; + } + + const outerRegex = /([\s\S]*?)<\/tool_call>/g; + const outerMatches = Array.from(text.matchAll(outerRegex)); + + if (outerMatches.length === 0) return { matched: false, toolCalls, errors }; + + // If no match contains XML function syntax, check if this is JSON format + // (defer to hermes) or just malformed content (surface as PARSE_ERROR). + if (!outerMatches.some((m) => m[1]!.includes(" { + const inner = m[1]!.trim(); + return inner.startsWith("{") || inner.startsWith("["); + }); + if (looksLikeJson) return { matched: false, toolCalls, errors }; + return { + matched: true, + toolCalls, + errors: outerMatches.map((m) => ({ + code: "PARSE_ERROR" as const, + message: "Qwen3.5 tool call missing ...", + raw: m[1]!.trim(), + })), + }; + } + + for (const outerMatch of outerMatches) { + const inner = outerMatch[1]!.trim(); + + const fnMatch = /\s]+)\s*>([\s\S]*?)<\/function>/i.exec( + inner, + ); + if (!fnMatch) { + errors.push({ + code: "PARSE_ERROR", + message: "Qwen3.5 tool call missing ...", + raw: inner, + }); + continue; + } + + const name = fnMatch[1]!.trim(); + const paramsBlock = fnMatch[2]!; + const tool = tools.find((t) => t.name === name); + const schemaProperties = tool?.parameters?.properties ?? {}; + + const args: Record = {}; + let parseError: string | undefined; + try { + const paramRegex = /\s]+)\s*>([\s\S]*?)<\/parameter>/gi; + let pm: RegExpExecArray | null; + while ((pm = paramRegex.exec(paramsBlock)) !== null) { + const paramName = pm[1]!.trim(); + args[paramName] = coerceParamValue(pm[2]!, schemaProperties[paramName]); + } + } catch (err) { + parseError = err instanceof Error ? err.message : String(err); + } + if (parseError !== undefined) { + errors.push({ code: "PARSE_ERROR", message: parseError, raw: inner }); + continue; + } + + const validation = validateToolArguments(name, args, tools); + if (!validation.isValid && validation.error) { + errors.push({ ...validation.error, raw: inner }); + continue; + } + + toolCalls.push({ + id: generateStableToolCallId(name, args), + name, + arguments: args, + raw: inner, + }); + } + + return { matched: true, toolCalls, errors }; +} diff --git a/packages/sdk/test/unit/completion-normalizer.test.ts b/packages/sdk/test/unit/completion-normalizer.test.ts index f92a8f9c48..f5b92f7575 100644 --- a/packages/sdk/test/unit/completion-normalizer.test.ts +++ b/packages/sdk/test/unit/completion-normalizer.test.ts @@ -770,3 +770,91 @@ test("harmony spec defined but hermes dialect still strips as before", ( t.alike(texts(events, "thinkingDelta"), ["thought"]); t.alike(texts(events, "contentDelta"), ["A", "B"]); }); + +test("qwen35 streaming: tool frame emits toolCall mid-stream", (t) => { + const n = createCompletionNormalizer( + baseConfig({ + capabilities: TEXT_PARSE_CAPS, + tools: [GET_WEATHER_TOOL], + toolDialect: "qwen35", + }), + ); + const text = `Paris`; + const events = [...pushAll(n, [text]), ...n.finish()]; + const toolEvents = events.filter((e) => e.type === "toolCall"); + t.is(toolEvents.length, 1, "qwen35 tool frame emits toolCall"); + t.is((toolEvents[0] as { call: { name: string } }).call.name, "get_weather"); + t.alike((toolEvents[0] as { call: { arguments: unknown } }).call.arguments, { city: "Paris" }); + const contentJoined = texts(events, "contentDelta").join(""); + t.absent(contentJoined.includes(""), "open marker must not leak"); + t.absent(contentJoined.includes(""), "close marker must not leak"); +}); + +test("qwen35 streaming: marker split across pushes still detected", (t) => { + const n = createCompletionNormalizer( + baseConfig({ + capabilities: TEXT_PARSE_CAPS, + tools: [GET_WEATHER_TOOL], + toolDialect: "qwen35", + }), + ); + const events = pushAll(n, [ + "Lima", + ]); + const toolEvents = events.filter((e) => e.type === "toolCall"); + t.is(toolEvents.length, 1, "qwen35 frame detected across split marker"); +}); + +test("gemma4 streaming: tool frame emits toolCall mid-stream", (t) => { + const n = createCompletionNormalizer( + baseConfig({ + capabilities: TEXT_PARSE_CAPS, + tools: [GET_WEATHER_TOOL], + toolDialect: "gemma4", + }), + ); + const text = `<|tool_call>call:get_weather{city:<|"|>Tokyo<|"|>}`; + const events = [...pushAll(n, [text]), ...n.finish()]; + const toolEvents = events.filter((e) => e.type === "toolCall"); + t.is(toolEvents.length, 1, "gemma4 tool frame emits toolCall"); + t.is((toolEvents[0] as { call: { name: string } }).call.name, "get_weather"); + t.alike((toolEvents[0] as { call: { arguments: unknown } }).call.arguments, { city: "Tokyo" }); + const contentJoined = texts(events, "contentDelta").join(""); + t.absent(contentJoined.includes("<|tool_call>"), "open marker must not leak"); + t.absent(contentJoined.includes(""), "close marker must not leak"); +}); + +test("gemma4 thought frame: inner emitted as thinkingDelta (captureThinking=true)", (t) => { + const n = createCompletionNormalizer( + baseConfig({ + capabilities: NONE_CAPS, + toolDialect: "gemma4", + captureThinking: true, + }), + ); + const text = `<|channel>thoughtthinking hereafter`; + const events = [...pushAll(n, [text]), ...n.finish()]; + t.alike(texts(events, "thinkingDelta"), ["thinking here"]); + t.alike(texts(events, "contentDelta"), ["after"]); + t.is(n.getAccumulated().thinkingText, "thinking here"); + t.is(n.getAccumulated().contentText, "after"); +}); + +test("gemma4 thought frame: silently dropped (captureThinking=false)", (t) => { + const n = createCompletionNormalizer( + baseConfig({ + capabilities: NONE_CAPS, + toolDialect: "gemma4", + captureThinking: false, + }), + ); + const text = `<|channel>thoughtthinking hereafter`; + const events = [...pushAll(n, [text]), ...n.finish()]; + t.absent(types(events).includes("thinkingDelta"), "no thinkingDelta when not captured"); + t.alike(texts(events, "contentDelta"), ["after"]); + const contentJoined = texts(events, "contentDelta").join(""); + t.absent(contentJoined.includes("<|channel>thought"), "open marker must not leak"); + t.absent(contentJoined.includes(""), "close marker must not leak"); + t.absent(contentJoined.includes("thinking here"), "thought inner must be dropped"); +}); diff --git a/packages/sdk/test/unit/completion-stream-schemas.test.ts b/packages/sdk/test/unit/completion-stream-schemas.test.ts index 704b339927..37dac6aa47 100644 --- a/packages/sdk/test/unit/completion-stream-schemas.test.ts +++ b/packages/sdk/test/unit/completion-stream-schemas.test.ts @@ -3,6 +3,8 @@ import test from "brittle"; import { completionStreamResponseSchema, completionStatsSchema, + generationParamsSchema, + toolDialectSchema, } from "@/schemas/completion-stream"; test("completionStatsSchema: accepts backendDevice 'cpu' and 'gpu'", (t) => { @@ -29,6 +31,25 @@ test("completionStatsSchema: backendDevice is optional", (t) => { t.is(result.success, true); }); +test("generationParamsSchema: accepts reasoning_budget -1 and 0", (t) => { + t.is(generationParamsSchema.safeParse({ reasoning_budget: -1 }).success, true); + t.is(generationParamsSchema.safeParse({ reasoning_budget: 0 }).success, true); +}); + +test("generationParamsSchema: rejects reasoning_budget other values", (t) => { + t.is(generationParamsSchema.safeParse({ reasoning_budget: 1 }).success, false); + t.is(generationParamsSchema.safeParse({ reasoning_budget: -2 }).success, false); +}); + +test("toolDialectSchema: accepts qwen35 and gemma4", (t) => { + t.is(toolDialectSchema.safeParse("qwen35").success, true); + t.is(toolDialectSchema.safeParse("gemma4").success, true); +}); + +test("toolDialectSchema: rejects unknown dialects", (t) => { + t.is(toolDialectSchema.safeParse("unknown").success, false); +}); + test("completionStreamResponseSchema: round-trips backendDevice through completionStats event", (t) => { const result = completionStreamResponseSchema.safeParse({ type: "completionStream", diff --git a/packages/sdk/test/unit/llm-config-schema.test.ts b/packages/sdk/test/unit/llm-config-schema.test.ts index 3f23eb9184..da2dd1bcb6 100644 --- a/packages/sdk/test/unit/llm-config-schema.test.ts +++ b/packages/sdk/test/unit/llm-config-schema.test.ts @@ -101,3 +101,17 @@ test("loadModelSrcRequestSchema: accepts split-mode for LLM", (t) => { }); t.is(result.success, true); }); + +test("llmConfigBaseSchema: accepts reasoning_budget -1 (unrestricted)", (t) => { + t.is(llmConfigBaseSchema.safeParse({ reasoning_budget: -1 }).success, true); +}); + +test("llmConfigBaseSchema: accepts reasoning_budget 0 (disabled)", (t) => { + t.is(llmConfigBaseSchema.safeParse({ reasoning_budget: 0 }).success, true); +}); + +test("llmConfigBaseSchema: rejects reasoning_budget other values", (t) => { + t.is(llmConfigBaseSchema.safeParse({ reasoning_budget: 1 }).success, false); + t.is(llmConfigBaseSchema.safeParse({ reasoning_budget: -2 }).success, false); + t.is(llmConfigBaseSchema.safeParse({ reasoning_budget: 0.5 }).success, false); +}); diff --git a/packages/sdk/test/unit/llm-plugin-transform.test.ts b/packages/sdk/test/unit/llm-plugin-transform.test.ts new file mode 100644 index 0000000000..f5b7f66ccb --- /dev/null +++ b/packages/sdk/test/unit/llm-plugin-transform.test.ts @@ -0,0 +1,55 @@ +// @ts-expect-error brittle has no type declarations +import test from "brittle"; +import { transformLlmConfig } from "@/server/bare/plugins/llamacpp-completion/transform"; +import { llmConfigSchema } from "@/schemas/llamacpp-config"; + +function makeConfig(overrides: Record = {}) { + return llmConfigSchema.parse(overrides); +} + +test("transformLlmConfig: system_prompt is never forwarded to C++", (t) => { + const config = makeConfig({ system_prompt: "You are a helpful assistant." }); + const result = transformLlmConfig(config); + t.absent( + "system_prompt" in result, + "system_prompt must not appear in C++ arg map", + ); + t.absent( + "system-prompt" in result, + "hyphenated system-prompt must not appear in C++ arg map", + ); +}); + +test("transformLlmConfig: modelType is never forwarded to C++", (t) => { + const config = makeConfig({}); + const result = transformLlmConfig(config); + t.absent("modelType" in result, "modelType must not appear in C++ arg map"); + t.absent("model_type" in result); +}); + +test("transformLlmConfig: reasoning_budget survives as underscore key", (t) => { + const config = makeConfig({ reasoning_budget: 0 }); + const result = transformLlmConfig(config); + t.is(result["reasoning_budget"], "0", "reasoning_budget=0 must be forwarded as string '0'"); +}); + +test("transformLlmConfig: reasoning_budget=-1 survives", (t) => { + const config = makeConfig({ reasoning_budget: -1 }); + const result = transformLlmConfig(config); + t.is(result["reasoning_budget"], "-1"); +}); + +test("transformLlmConfig: stop_sequences is renamed to reverse_prompt", (t) => { + const config = makeConfig({ stop_sequences: ["", "<|im_end|>"] }); + const result = transformLlmConfig(config); + t.absent("stop_sequences" in result); + t.is(result["reverse_prompt"], ", <|im_end|>"); +}); + +test("transformLlmConfig: numeric fields are stringified", (t) => { + const config = makeConfig({ ctx_size: 4096, gpu_layers: 99, temp: 0.7 }); + const result = transformLlmConfig(config); + t.is(result["ctx_size"], "4096"); + t.is(result["gpu_layers"], "99"); + t.is(result["temp"], "0.7"); +}); diff --git a/packages/sdk/test/unit/tool-parser.test.ts b/packages/sdk/test/unit/tool-parser.test.ts index 2526bdaae8..1c03a54f2a 100644 --- a/packages/sdk/test/unit/tool-parser.test.ts +++ b/packages/sdk/test/unit/tool-parser.test.ts @@ -6,6 +6,8 @@ import { detectToolDialectFromName, } from "@/server/utils/tools"; import { parseHarmonyFormat } from "@/server/utils/tools/parsers/harmony"; +import { parseQwen35Format } from "@/server/utils/tools/parsers/qwen35"; +import { parseGemma4NativeFormat } from "@/server/utils/tools/parsers/gemma4native"; const weatherTool: Tool = { type: "function", name: "weather", @@ -268,6 +270,18 @@ test("detectToolDialectFromName: non-LFM models default to hermes", (t) => { [undefined, "/cache/abc_Llama-3.3-70B-Instruct-Tool-Calling.gguf"], [undefined, ""], ["", ""], + // Gemma 3 models (including 4B size variant) must not be detected as Gemma 4 + [undefined, "/cache/abc_gemma3-Q4_K_M.gguf"], + ["GEMMA3_Q4", "/Users/x/.qvac/models/abc_gemma-3-4b-q4_k_m.gguf"], + // Qwen3 5B (5 billion params) must not be mistaken for Qwen3.5 (model version 3.5) + [undefined, "/cache/abc_Qwen3-5B-Instruct-Q4_K_M.gguf"], + ["QWEN3_5B_INST", "/Users/x/.qvac/models/abc_qwen3-5b-instruct.gguf"], + [undefined, "/cache/abc_Qwen3-50B-Instruct-Q4_K_M.gguf"], + ["QWEN3_50B_INST", "/Users/x/.qvac/models/abc_qwen3-50b-instruct.gguf"], + [undefined, "/cache/abc_Qwen3-60B-Instruct-Q4_K_M.gguf"], + ["QWEN3_60B_INST", "/Users/x/.qvac/models/abc_qwen3-60b-instruct.gguf"], + // gemma-40b contains 'gemma-4' as a substring but the trailing '0' (digit) blocks the gemma4 lookahead + [undefined, "/cache/abc_gemma-40b-Q4_K_M.gguf"], ]; for (const [name, path] of cases) { @@ -601,3 +615,493 @@ test("detectToolDialectFromName: GPT-OSS variants → harmony", (t) => { t.is(detectToolDialectFromName(name, path), "harmony", `name=${name} path=${path}`); } }); + +test("detectToolDialectFromName: Qwen3.5 variants → qwen35", (t) => { + const cases: Array<[string | undefined, string]> = [ + [undefined, "/cache/abc_Qwen3.5-7B-Instruct-Q4_K_M.gguf"], + ["QWEN3_5_7B_INST_Q4", "/Users/x/.qvac/models/abc_qwen3.5-7b-instruct.gguf"], + [undefined, "/cache/abc_qwen3-5-7b.gguf"], + // Qwen3.6 shares the same Pythonic-XML tool-call format as Qwen3.5 + [undefined, "/cache/abc_Qwen3.6-7B-Instruct-Q4_K_M.gguf"], + ["QWEN3_6_7B_INST", "/Users/x/.qvac/models/abc_qwen3.6-7b-instruct.gguf"], + ]; + + for (const [name, path] of cases) { + t.is(detectToolDialectFromName(name, path), "qwen35", `name=${name} path=${path}`); + } +}); + +test("detectToolDialectFromName: Gemma 4 variants → gemma4", (t) => { + const cases: Array<[string | undefined, string]> = [ + [undefined, "/cache/abc_gemma4-9b-it-Q4_K_M.gguf"], + ["GEMMA4_27B_IT_Q4", "/Users/x/.qvac/models/abc_gemma-4-27b-it.gguf"], + [undefined, "/cache/abc_gemma4-27b.gguf"], + ]; + + for (const [name, path] of cases) { + t.is(detectToolDialectFromName(name, path), "gemma4", `name=${name} path=${path}`); + } +}); + +test("parseQwen35Format: single function call with parameters", (t) => { + const text = ` + +Paris +celsius + +`; + const result = parseQwen35Format(text, pythonicTools); + t.is(result.matched, true); + t.is(result.toolCalls.length, 1); + t.is(result.toolCalls[0]?.name, "get_weather"); + t.alike(result.toolCalls[0]?.arguments, { city: "Paris", unit: "celsius" }); + t.is(result.errors.length, 0); +}); + +test("parseQwen35Format: no tool_call markers → matched=false", (t) => { + const result = parseQwen35Format("No tool call here.", pythonicTools); + t.is(result.matched, false); + t.is(result.toolCalls.length, 0); +}); + +test("parseQwen35Format: missing function tag → PARSE_ERROR", (t) => { + const text = `some plain content`; + const result = parseQwen35Format(text, pythonicTools); + t.is(result.matched, true); + t.is(result.toolCalls.length, 0); + t.is(result.errors.length, 1); + t.is(result.errors[0]?.code, "PARSE_ERROR"); +}); + +test("parseToolCalls(dialect=qwen35): parses Qwen3.5 XML format", (t) => { + const text = `Tokyo`; + const { toolCalls, errors } = parseToolCalls(text, pythonicTools, "qwen35"); + t.is(errors.length, 0); + t.is(toolCalls.length, 1); + t.is(toolCalls[0]?.name, "get_weather"); + t.alike(toolCalls[0]?.arguments, { city: "Tokyo" }); +}); + +test("parseGemma4NativeFormat: single call with string values", (t) => { + const text = `<|tool_call>call:get_weather{city:<|"|>Paris<|"|>,country:<|"|>FR<|"|>}`; + const result = parseGemma4NativeFormat(text, pythonicTools); + t.is(result.matched, true); + t.is(result.toolCalls.length, 1); + t.is(result.toolCalls[0]?.name, "get_weather"); + t.alike(result.toolCalls[0]?.arguments, { city: "Paris", country: "FR" }); + t.is(result.errors.length, 0); +}); + +test("parseGemma4NativeFormat: no open marker → matched=false", (t) => { + const result = parseGemma4NativeFormat("No gemma call here.", pythonicTools); + t.is(result.matched, false); + t.is(result.toolCalls.length, 0); +}); + +test("parseGemma4NativeFormat: multiline string value is parsed correctly", (t) => { + const text = `<|tool_call>call:get_weather{city:<|"|>line1\nline2<|"|>}`; + const result = parseGemma4NativeFormat(text, pythonicTools); + t.is(result.matched, true); + t.is(result.errors.length, 0); + t.is(result.toolCalls.length, 1); + t.is(result.toolCalls[0]?.arguments?.city, "line1\nline2"); +}); + +test("parseToolCalls(dialect=gemma4): parses Gemma4 native format", (t) => { + const text = `<|tool_call>call:get_weather{city:<|"|>Berlin<|"|>}`; + const { toolCalls, errors } = parseToolCalls(text, pythonicTools, "gemma4"); + t.is(errors.length, 0); + t.is(toolCalls.length, 1); + t.is(toolCalls[0]?.name, "get_weather"); + t.alike(toolCalls[0]?.arguments, { city: "Berlin" }); +}); + +// --- qwen35 coercion and error-surface tests --- + +test("parseQwen35Format: integer param is coerced to number", (t) => { + const typedTool: Tool = { + type: "function", + name: "typed", + description: "typed", + parameters: { + type: "object", + properties: { count: { type: "integer" }, label: { type: "string" } }, + required: ["count"], + }, + }; + const text = `42hello`; + const result = parseQwen35Format(text, [typedTool]); + t.is(result.matched, true); + t.is(result.errors.length, 0); + t.is(result.toolCalls.length, 1); + t.is(result.toolCalls[0]?.arguments?.count, 42); + t.is(result.toolCalls[0]?.arguments?.label, "hello"); +}); + +test("parseQwen35Format: boolean param 'true' coerces to true", (t) => { + const typedTool: Tool = { + type: "function", + name: "typed", + description: "typed", + parameters: { + type: "object", + properties: { count: { type: "integer" }, flag: { type: "boolean" } }, + required: ["count"], + }, + }; + const text = `1true`; + const result = parseQwen35Format(text, [typedTool]); + t.is(result.matched, true); + t.is(result.errors.length, 0); + t.is(result.toolCalls[0]?.arguments?.flag, true); +}); + +test("parseQwen35Format: boolean param 'false' coerces to false", (t) => { + const typedTool: Tool = { + type: "function", + name: "typed", + description: "typed", + parameters: { + type: "object", + properties: { count: { type: "integer" }, flag: { type: "boolean" } }, + required: ["count"], + }, + }; + const text = `1false`; + const result = parseQwen35Format(text, [typedTool]); + t.is(result.matched, true); + t.is(result.errors.length, 0); + t.is(result.toolCalls[0]?.arguments?.flag, false); +}); + +test("parseQwen35Format: boolean param 'True' (uppercase) surfaces PARSE_ERROR", (t) => { + const typedTool: Tool = { + type: "function", + name: "typed", + description: "typed", + parameters: { + type: "object", + properties: { count: { type: "integer" }, flag: { type: "boolean" } }, + required: ["count"], + }, + }; + const text = `1True`; + const result = parseQwen35Format(text, [typedTool]); + t.is(result.matched, true); + t.is(result.toolCalls.length, 0); + t.is(result.errors.length, 1); + t.is(result.errors[0]?.code, "PARSE_ERROR"); +}); + +test("parseQwen35Format: integer param 'not-a-number' surfaces PARSE_ERROR", (t) => { + const typedTool: Tool = { + type: "function", + name: "typed", + description: "typed", + parameters: { + type: "object", + properties: { count: { type: "integer" } }, + required: ["count"], + }, + }; + const text = `not-a-number`; + const result = parseQwen35Format(text, [typedTool]); + t.is(result.matched, true); + t.is(result.toolCalls.length, 0); + t.is(result.errors.length, 1); + t.is(result.errors[0]?.code, "PARSE_ERROR"); +}); + +test("parseQwen35Format: integer param '1.5' (non-integer) surfaces PARSE_ERROR", (t) => { + const typedTool: Tool = { + type: "function", + name: "typed", + description: "typed", + parameters: { + type: "object", + properties: { count: { type: "integer" } }, + required: ["count"], + }, + }; + const text = `1.5`; + const result = parseQwen35Format(text, [typedTool]); + t.is(result.matched, true); + t.is(result.toolCalls.length, 0); + t.is(result.errors.length, 1); + t.is(result.errors[0]?.code, "PARSE_ERROR"); +}); + +test("parseQwen35Format: malformed array param surfaces PARSE_ERROR (no raw-string fallback)", (t) => { + const typedTool: Tool = { + type: "function", + name: "typed", + description: "typed", + parameters: { + type: "object", + properties: { count: { type: "integer" }, tags: { type: "array" } }, + required: ["count"], + }, + }; + const text = `1[1,2`; + const result = parseQwen35Format(text, [typedTool]); + t.is(result.matched, true); + t.is(result.toolCalls.length, 0); + t.is(result.errors.length, 1); + t.is(result.errors[0]?.code, "PARSE_ERROR"); +}); + +test("parseQwen35Format: malformed object param surfaces PARSE_ERROR (no raw-string fallback)", (t) => { + const typedTool: Tool = { + type: "function", + name: "typed", + description: "typed", + parameters: { + type: "object", + properties: { count: { type: "integer" }, meta: { type: "object" } }, + required: ["count"], + }, + }; + const text = `1{bad json`; + const result = parseQwen35Format(text, [typedTool]); + t.is(result.matched, true); + t.is(result.toolCalls.length, 0); + t.is(result.errors.length, 1); + t.is(result.errors[0]?.code, "PARSE_ERROR"); +}); + +test("parseQwen35Format: array param is parsed from JSON", (t) => { + const typedTool: Tool = { + type: "function", + name: "typed", + description: "typed", + parameters: { + type: "object", + properties: { count: { type: "integer" }, tags: { type: "array" } }, + required: ["count"], + }, + }; + const text = `1["a","b","c"]`; + const result = parseQwen35Format(text, [typedTool]); + t.is(result.matched, true); + t.is(result.errors.length, 0); + t.alike(result.toolCalls[0]?.arguments?.tags, ["a", "b", "c"]); +}); + +test("parseQwen35Format: multiple tool calls are all parsed", (t) => { + const text = `Paris +Aries`; + const result = parseQwen35Format(text, pythonicTools); + t.is(result.matched, true); + t.is(result.errors.length, 0); + t.is(result.toolCalls.length, 2); + t.is(result.toolCalls[0]?.name, "get_weather"); + t.is(result.toolCalls[1]?.name, "get_horoscope"); +}); + +test("parseQwen35Format: unknown tool name surfaces UNKNOWN_TOOL", (t) => { + const text = `1`; + const result = parseQwen35Format(text, pythonicTools); + t.is(result.matched, true); + t.is(result.toolCalls.length, 0); + t.is(result.errors.length, 1); + t.is(result.errors[0]?.code, "UNKNOWN_TOOL"); +}); + +test("parseQwen35Format: missing required param surfaces VALIDATION_ERROR", (t) => { + const text = `FR`; + const result = parseQwen35Format(text, pythonicTools); + t.is(result.matched, true); + t.is(result.toolCalls.length, 0); + t.is(result.errors.length, 1); + t.is(result.errors[0]?.code, "VALIDATION_ERROR"); +}); + +test("parseToolCalls(dialect=qwen35): JSON inside tool_call falls through to hermes parser", (t) => { + const text = ` +{"name": "get_weather", "arguments": {"city": "Seoul"}} +`; + const { toolCalls, errors } = parseToolCalls(text, pythonicTools, "qwen35"); + t.is(errors.length, 0); + t.is(toolCalls.length, 1); + t.is(toolCalls[0]?.name, "get_weather"); + t.alike(toolCalls[0]?.arguments, { city: "Seoul" }); +}); + +// --- gemma4 structural and error-surface tests --- + +test("parseGemma4NativeFormat: bare numeric arg is parsed as number", (t) => { + const numTool: Tool = { + type: "function", + name: "typed", + description: "typed", + parameters: { type: "object", properties: { count: { type: "integer" } }, required: ["count"] }, + }; + const text = `<|tool_call>call:typed{count:7}`; + const result = parseGemma4NativeFormat(text, [numTool]); + t.is(result.matched, true); + t.is(result.errors.length, 0); + t.is(result.toolCalls[0]?.arguments?.count, 7); +}); + +test("parseGemma4NativeFormat: bare boolean arg is parsed as boolean", (t) => { + const boolTool: Tool = { + type: "function", + name: "toggle", + description: "toggle", + parameters: { type: "object", properties: { enabled: { type: "boolean" } }, required: ["enabled"] }, + }; + const text = `<|tool_call>call:toggle{enabled:true}`; + const result = parseGemma4NativeFormat(text, [boolTool]); + t.is(result.matched, true); + t.is(result.errors.length, 0); + t.is(result.toolCalls[0]?.arguments?.enabled, true); +}); + +test("parseGemma4NativeFormat: nested object arg is parsed correctly", (t) => { + const searchTool: Tool = { + type: "function", + name: "search", + description: "search", + parameters: { + type: "object", + properties: { query: { type: "string" }, filters: { type: "object" } }, + required: ["query"], + }, + }; + const text = `<|tool_call>call:search{query:<|"|>test<|"|>,filters:{active:true,limit:10}}`; + const result = parseGemma4NativeFormat(text, [searchTool]); + t.is(result.matched, true); + t.is(result.errors.length, 0); + t.alike(result.toolCalls[0]?.arguments?.filters, { active: true, limit: 10 }); + t.is(result.toolCalls[0]?.arguments?.query, "test"); +}); + +test("parseGemma4NativeFormat: nested array arg is parsed correctly", (t) => { + const arrayTool: Tool = { + type: "function", + name: "get_weather", + description: "weather", + parameters: { + type: "object", + properties: { city: { type: "string" }, ids: { type: "array" } }, + required: ["city"], + }, + }; + const text = `<|tool_call>call:get_weather{city:<|"|>Paris<|"|>,ids:[1,2,3]}`; + const result = parseGemma4NativeFormat(text, [arrayTool]); + t.is(result.matched, true); + t.is(result.errors.length, 0); + t.alike(result.toolCalls[0]?.arguments?.ids, [1, 2, 3]); +}); + +test("parseGemma4NativeFormat: tab char in string value round-trips correctly", (t) => { + const text = `<|tool_call>call:get_weather{city:<|"|>col1\tcol2<|"|>}`; + const result = parseGemma4NativeFormat(text, pythonicTools); + t.is(result.matched, true); + t.is(result.errors.length, 0); + t.is(result.toolCalls[0]?.arguments?.city, "col1\tcol2"); +}); + +test("parseGemma4NativeFormat: CR char in string value round-trips correctly", (t) => { + const text = `<|tool_call>call:get_weather{city:<|"|>line1\rline2<|"|>}`; + const result = parseGemma4NativeFormat(text, pythonicTools); + t.is(result.matched, true); + t.is(result.errors.length, 0); + t.is(result.toolCalls[0]?.arguments?.city, "line1\rline2"); +}); + +test("parseGemma4NativeFormat: multiple tool calls are all parsed", (t) => { + const text = `<|tool_call>call:get_weather{city:<|"|>London<|"|>} +<|tool_call>call:get_horoscope{sign:<|"|>Leo<|"|>}`; + const result = parseGemma4NativeFormat(text, pythonicTools); + t.is(result.matched, true); + t.is(result.errors.length, 0); + t.is(result.toolCalls.length, 2); + t.is(result.toolCalls[0]?.name, "get_weather"); + t.is(result.toolCalls[1]?.name, "get_horoscope"); +}); + +test("parseGemma4NativeFormat: unknown tool name surfaces UNKNOWN_TOOL", (t) => { + const text = `<|tool_call>call:unknown_fn{x:<|"|>y<|"|>}`; + const result = parseGemma4NativeFormat(text, pythonicTools); + t.is(result.matched, true); + t.is(result.toolCalls.length, 0); + t.is(result.errors.length, 1); + t.is(result.errors[0]?.code, "UNKNOWN_TOOL"); +}); + +test("parseGemma4NativeFormat: malformed args (trailing comma) surface PARSE_ERROR", (t) => { + const text = `<|tool_call>call:get_weather{city:<|"|>Paris<|"|>,}`; + const result = parseGemma4NativeFormat(text, pythonicTools); + t.is(result.matched, true); + t.is(result.toolCalls.length, 0); + t.is(result.errors.length, 1); + t.is(result.errors[0]?.code, "PARSE_ERROR"); +}); + +test("parseQwen35Format: empty integer param surfaces PARSE_ERROR (not 0)", (t) => { + const typedTool: Tool = { + type: "function", + name: "typed", + description: "typed", + parameters: { + type: "object", + properties: { count: { type: "integer" } }, + required: ["count"], + }, + }; + const text = ``; + const result = parseQwen35Format(text, [typedTool]); + t.is(result.matched, true); + t.is(result.toolCalls.length, 0); + t.is(result.errors.length, 1); + t.is(result.errors[0]?.code, "PARSE_ERROR"); +}); + +test("parseQwen35Format: whitespace-only number param surfaces PARSE_ERROR (not 0)", (t) => { + const typedTool: Tool = { + type: "function", + name: "typed", + description: "typed", + parameters: { + type: "object", + properties: { score: { type: "number" } }, + required: ["score"], + }, + }; + const text = ` `; + const result = parseQwen35Format(text, [typedTool]); + t.is(result.matched, true); + t.is(result.toolCalls.length, 0); + t.is(result.errors.length, 1); + t.is(result.errors[0]?.code, "PARSE_ERROR"); +}); + +test("parseGemma4NativeFormat: hyphenated tool name parses correctly", (t) => { + const hyphenTool: Tool = { + type: "function", + name: "get-weather", + description: "Get current weather", + parameters: { + type: "object", + properties: { city: { type: "string" } }, + required: ["city"], + }, + }; + const text = `<|tool_call>call:get-weather{city:<|"|>Tokyo<|"|>}`; + const result = parseGemma4NativeFormat(text, [hyphenTool]); + t.is(result.matched, true); + t.is(result.errors.length, 0); + t.is(result.toolCalls.length, 1); + t.is(result.toolCalls[0]?.name, "get-weather"); + t.alike(result.toolCalls[0]?.arguments, { city: "Tokyo" }); +}); + +test("parseToolCalls(default): Qwen3.5 XML format is recovered without explicit dialect", (t) => { + const text = `Berlin`; + const { toolCalls, errors } = parseToolCalls(text, pythonicTools); + t.is(errors.length, 0); + t.is(toolCalls.length, 1); + t.is(toolCalls[0]?.name, "get_weather"); + t.alike(toolCalls[0]?.arguments, { city: "Berlin" }); +}); diff --git a/packages/sdk/tests-qvac/tests/completion-tests.ts b/packages/sdk/tests-qvac/tests/completion-tests.ts index d64d44040a..d7a0c0e540 100644 --- a/packages/sdk/tests-qvac/tests/completion-tests.ts +++ b/packages/sdk/tests-qvac/tests/completion-tests.ts @@ -607,6 +607,28 @@ export const completionResponseFormatJsonSchema: TestDefinition = { metadata: { category: "completion", dependency: "llm", estimatedDurationMs: 20000 }, }; +export const completionReasoningBudgetDisabled: TestDefinition = { + testId: "completion-reasoning-budget-disabled", + params: { + history: [{ role: "user", content: "What is 2+2? Answer with only the number." }], + stream: false, + generationParams: { reasoning_budget: 0, predict: 32 }, + }, + expectation: { validation: "type", expectedType: "string" }, + metadata: { category: "completion", dependency: "llm", estimatedDurationMs: 10000 }, +}; + +export const completionReasoningBudgetUnrestricted: TestDefinition = { + testId: "completion-reasoning-budget-unrestricted", + params: { + history: [{ role: "user", content: "What is 2+2? Answer with only the number." }], + stream: false, + generationParams: { reasoning_budget: -1, predict: 32 }, + }, + expectation: { validation: "type", expectedType: "string" }, + metadata: { category: "completion", dependency: "llm", estimatedDurationMs: 10000 }, +}; + export const completionResponseFormatWithToolsRejected: TestDefinition = { testId: "completion-response-format-with-tools-rejected", params: { @@ -677,4 +699,6 @@ export const completionTests = [ completionResponseFormatJsonObjectStreaming, completionResponseFormatJsonSchema, completionResponseFormatWithToolsRejected, + completionReasoningBudgetDisabled, + completionReasoningBudgetUnrestricted, ]; diff --git a/packages/sdk/tests-qvac/tests/shared/executors/completion-executor.ts b/packages/sdk/tests-qvac/tests/shared/executors/completion-executor.ts index 9457fcaf8f..6204b35a94 100644 --- a/packages/sdk/tests-qvac/tests/shared/executors/completion-executor.ts +++ b/packages/sdk/tests-qvac/tests/shared/executors/completion-executor.ts @@ -29,6 +29,7 @@ interface GenerationParams { frequency_penalty?: number; presence_penalty?: number; repeat_penalty?: number; + reasoning_budget?: -1 | 0; } interface CompletionTestParams { diff --git a/packages/sdk/tests-qvac/tests/shared/executors/tools-executor.ts b/packages/sdk/tests-qvac/tests/shared/executors/tools-executor.ts index be55a9a0ef..35cf887a1f 100644 --- a/packages/sdk/tests-qvac/tests/shared/executors/tools-executor.ts +++ b/packages/sdk/tests-qvac/tests/shared/executors/tools-executor.ts @@ -1,4 +1,5 @@ import { completion } from "@qvac/sdk"; +import type { ToolDialect } from "@qvac/sdk"; import { ValidationHelpers, type TestResult, @@ -24,9 +25,11 @@ export class ToolsExecutor extends AbstractModelExecutor { parameters: Record; }>; toolsMode?: "static" | "dynamic"; + toolDialect?: ToolDialect; + resourceKey?: string; stream?: boolean; }; - const resourceKey = p.toolsMode === "dynamic" ? "tools-dynamic" : "tools"; + const resourceKey = p.resourceKey ?? (p.toolsMode === "dynamic" ? "tools-dynamic" : "tools"); const toolsModelId = await this.resources.ensureLoaded(resourceKey); try { @@ -35,6 +38,7 @@ export class ToolsExecutor extends AbstractModelExecutor { history: p.history, tools: p.tools as never, stream: p.stream ?? false, + ...(p.toolDialect && { toolDialect: p.toolDialect }), }); const text = await result.text; diff --git a/packages/sdk/tests-qvac/tests/tools-tests.ts b/packages/sdk/tests-qvac/tests/tools-tests.ts index 271c3d62a5..72bac4b335 100644 --- a/packages/sdk/tests-qvac/tests/tools-tests.ts +++ b/packages/sdk/tests-qvac/tests/tools-tests.ts @@ -1,5 +1,6 @@ // Tools/Function calling test definitions import type { TestDefinition } from "@tetherto/qvac-test-suite"; +import type { ToolDialect } from "@qvac/sdk"; // Helper for creating tools tests const createToolsTest = ( @@ -21,6 +22,8 @@ const createToolsTest = ( expectedType: "string" | "number" | "array"; }; toolsMode?: "static" | "dynamic"; + toolDialect?: ToolDialect; + resourceKey?: string; suites?: string[]; } = {}, ): TestDefinition => { @@ -29,7 +32,7 @@ const createToolsTest = ( expectedType: "string" as const, }; const dependency = - options.toolsMode === "dynamic" ? "tools-dynamic" : "tools"; + options.resourceKey ?? (options.toolsMode === "dynamic" ? "tools-dynamic" : "tools"); return { testId, params: { @@ -37,6 +40,8 @@ const createToolsTest = ( tools, stream: false, ...(options.toolsMode && { toolsMode: options.toolsMode }), + ...(options.toolDialect && { toolDialect: options.toolDialect }), + ...(options.resourceKey && { resourceKey: options.resourceKey }), }, expectation, ...(options.suites && { suites: options.suites }),