From 0494ff4dbe1e15924f4a0c75af3ffe8d6a8ea10a Mon Sep 17 00:00:00 2001
From: Ridwan Taiwo <donriddo@gmail.com>
Date: Mon, 11 May 2026 14:51:56 +0100
Subject: [PATCH 01/13] feat[api]: add Qwen3.5, Gemma4 tool-call dialects and
 reasoning_budget param

- Extend toolDialectSchema with 'qwen35' and 'gemma4' values
- Add Qwen3.5 Pythonic-XML parser (qwen35.ts): <tool_call><function=NAME>
  <parameter=KEY>VALUE</parameter></function></tool_call>; string values are
  raw text, arrays/objects are JSON; type coercion from tool schema
- Add Gemma4 native parser (gemma4native.ts): <|tool_call>call:NAME{...}<tool_call|>;
  JS-literal args with <|"|> quote tokens, split-then-transliterate approach
  to safely quote bare keys without corrupting string values containing ', key:'
- Wire both parsers into parser.ts dispatch and the default catch-all chain
- Add dialect specs to completion-normalizer.ts: qwen35 reuses <tool_call>
  framing; gemma4 has asymmetric <|tool_call>/<tool_call|> + thinking frames
- Auto-detect qwen35/gemma4 from model name/path in dialect.ts with guards
  against Gemma3+Q4 quant suffix and Qwen3 5B parameter-count collisions
- Add reasoning_budget (-1 | 0) to LlmConfig (load-time) and GenerationParams
  (per-request); passes through transformLlmConfig unchanged (snake_case key
  bypasses camelCase regex, number-to-string conversion handles the value)
- Mirror reasoning_budget in CLI SDKGenerationParams type
- Add tests-qvac completion tests for reasoning_budget passthrough
- Add tool-calling examples for qwen35 and gemma4 in examples/tools/
- Bump @qvac/llm-llamacpp to ^0.20.0 (adds reasoning_budget and new model
  support shipped in fabric-8189)
---
 packages/cli/src/serve/core/sdk.ts            |   1 +
 .../examples/tools/llamacpp-tools-gemma4.ts   |  94 ++++++++++++++
 .../examples/tools/llamacpp-tools-qwen35.ts   |  88 +++++++++++++
 packages/sdk/package.json                     |   2 +-
 packages/sdk/schemas/completion-stream.ts     |  10 +-
 packages/sdk/schemas/llamacpp-config.ts       |   4 +
 .../sdk/server/utils/completion-normalizer.ts |   8 ++
 packages/sdk/server/utils/tools/dialect.ts    |   2 +
 packages/sdk/server/utils/tools/parser.ts     |   8 ++
 .../utils/tools/parsers/gemma4native.ts       |  75 +++++++++++
 .../sdk/server/utils/tools/parsers/qwen35.ts  | 117 ++++++++++++++++++
 .../unit/completion-stream-schemas.test.ts    |  21 ++++
 .../sdk/test/unit/llm-config-schema.test.ts   |  14 +++
 packages/sdk/test/unit/tool-parser.test.ts    |  96 ++++++++++++++
 .../sdk/tests-qvac/tests/completion-tests.ts  |  24 ++++
 15 files changed, 562 insertions(+), 2 deletions(-)
 create mode 100644 packages/sdk/examples/tools/llamacpp-tools-gemma4.ts
 create mode 100644 packages/sdk/examples/tools/llamacpp-tools-qwen35.ts
 create mode 100644 packages/sdk/server/utils/tools/parsers/gemma4native.ts
 create mode 100644 packages/sdk/server/utils/tools/parsers/qwen35.ts
diff --git a/packages/cli/src/serve/core/sdk.ts b/packages/cli/src/serve/core/sdk.ts
index b6db7823a8..faac11614e 100644
--- a/packages/cli/src/serve/core/sdk.ts
+++ b/packages/cli/src/serve/core/sdk.ts
@@ -11,6 +11,7 @@ export interface SDKGenerationParams {
   frequency_penalty?: number
   presence_penalty?: number
   repeat_penalty?: number
+  reasoning_budget?: -1 | 0
 }
 
 export type SDKResponseFormat =
diff --git a/packages/sdk/examples/tools/llamacpp-tools-gemma4.ts b/packages/sdk/examples/tools/llamacpp-tools-gemma4.ts
new file mode 100644
index 0000000000..d9aa1427ab
--- /dev/null
+++ b/packages/sdk/examples/tools/llamacpp-tools-gemma4.ts
@@ -0,0 +1,94 @@
+/**
+ * Tool-calling example using the Gemma4 native dialect.
+ *
+ * Gemma4 emits tool calls in a JS-literal format with custom quote tokens:
+ *   <|tool_call>call:NAME{key:<|"|>val<|"|>,...}<tool_call|>
+ *
+ * Reasoning output (thinking) is emitted inside <|channel>thought...<channel|>
+ * frames, which are stripped from contentDelta and forwarded as thinkingDelta
+ * when captureThinking is true.
+ *
+ * The dialect is auto-detected from the model name/path when the file name
+ * contains "gemma4" or "gemma-4". Pass toolDialect: "gemma4" explicitly to
+ * completion() if auto-detection does not pick it up for a given file name.
+ *
+ * Usage:
+ *   bun run bare:example dist/examples/tools/llamacpp-tools-gemma4.js <model-url>
+ */
+import {
+  completion,
+  loadModel,
+  unloadModel,
+  type ToolCall,
+} from "@qvac/sdk";
+import { tools, mockExecute } from "./shared";
+
+// bartowski's pack tags <eos> as the EOG token (matching the base tokenizer),
+// ensuring generation stops correctly; unsloth's variant maps it differently.
+const GEMMA4_HF =
+  "https://huggingface.co/bartowski/google_gemma-4-E2B-it-GGUF/resolve/main/google_gemma-4-E2B-it-Q4_K_M.gguf";
+
+const modelSrc = process.argv[2] ?? GEMMA4_HF;
+
+let modelId: string | undefined;
+try {
+  modelId = await loadModel({
+    modelSrc,
+    modelType: "llm",
+    modelConfig: { ctx_size: 4096, tools: true },
+    onProgress: (progress) =>
+      console.log(`Loading: ${progress.percentage.toFixed(1)}%`),
+  });
+  console.log(`Model loaded: ${modelId}`);
+
+  const history = [
+    {
+      role: "system",
+      content:
+        "You are a helpful assistant that can call tools to look up weather and horoscopes.",
+    },
+    {
+      role: "user",
+      content: "What's the weather in Tokyo and my horoscope for Aquarius?",
+    },
+  ];
+
+  const result = completion({ modelId, history, stream: true, tools });
+
+  const tokensTask = (async () => {
+    for await (const token of result.tokenStream) {
+      process.stdout.write(token);
+    }
+  })();
+
+  const toolsTask = (async () => {
+    for await (const evt of result.toolCallStream) {
+      if (evt.type === "toolCall") {
+        console.log(
+          `\n-> ${evt.call.name}(${JSON.stringify(evt.call.arguments)})`,
+        );
+      }
+    }
+  })();
+
+  await Promise.all([tokensTask, toolsTask]);
+
+  const toolCalls: ToolCall[] = await result.toolCalls;
+
+  console.log("\n\nFinal tool calls:");
+  if (toolCalls.length > 0) {
+    for (const call of toolCalls) {
+      console.log(`  - ${call.name}(${JSON.stringify(call.arguments)})`);
+      const toolResult = mockExecute(call.name, call.arguments);
+      console.log(`    result: ${toolResult}`);
+    }
+  } else {
+    console.log("  (none)");
+  }
+
+  await unloadModel({ modelId, clearStorage: false });
+} catch (error) {
+  console.error("Error:", error);
+  if (modelId) await unloadModel({ modelId, clearStorage: false }).catch(() => {});
+  process.exit(1);
+}
diff --git a/packages/sdk/examples/tools/llamacpp-tools-qwen35.ts b/packages/sdk/examples/tools/llamacpp-tools-qwen35.ts
new file mode 100644
index 0000000000..ebd2797c63
--- /dev/null
+++ b/packages/sdk/examples/tools/llamacpp-tools-qwen35.ts
@@ -0,0 +1,88 @@
+/**
+ * Tool-calling example using the Qwen3.5 dialect.
+ *
+ * Qwen3.5 emits tool calls in a Pythonic-XML format:
+ *   <tool_call><function=NAME><parameter=KEY>VALUE</parameter></function></tool_call>
+ *
+ * The dialect is auto-detected from the model name/path when the model file
+ * contains "qwen3.5" or "qwen3-5". Pass toolDialect: "qwen35" explicitly to
+ * completion() if auto-detection does not pick it up for a given file name.
+ *
+ * Usage:
+ *   bun run bare:example dist/examples/tools/llamacpp-tools-qwen35.js <model-url>
+ */
+import {
+  completion,
+  loadModel,
+  unloadModel,
+  type ToolCall,
+} from "@qvac/sdk";
+import { tools, mockExecute } from "./shared";
+
+const QWEN35_HF =
+  "https://huggingface.co/unsloth/Qwen3.5-0.8B-GGUF/resolve/main/Qwen3.5-0.8B-Q8_0.gguf";
+
+const modelSrc = process.argv[2] ?? QWEN35_HF;
+
+let modelId: string | undefined;
+try {
+  modelId = await loadModel({
+    modelSrc,
+    modelType: "llm",
+    modelConfig: { ctx_size: 4096, tools: true },
+    onProgress: (progress) =>
+      console.log(`Loading: ${progress.percentage.toFixed(1)}%`),
+  });
+  console.log(`Model loaded: ${modelId}`);
+
+  const history = [
+    {
+      role: "system",
+      content:
+        "You are a helpful assistant that can call tools to look up weather and horoscopes.",
+    },
+    {
+      role: "user",
+      content: "What's the weather in Tokyo and my horoscope for Aquarius?",
+    },
+  ];
+
+  const result = completion({ modelId, history, stream: true, tools });
+
+  const tokensTask = (async () => {
+    for await (const token of result.tokenStream) {
+      process.stdout.write(token);
+    }
+  })();
+
+  const toolsTask = (async () => {
+    for await (const evt of result.toolCallStream) {
+      if (evt.type === "toolCall") {
+        console.log(
+          `\n-> ${evt.call.name}(${JSON.stringify(evt.call.arguments)})`,
+        );
+      }
+    }
+  })();
+
+  await Promise.all([tokensTask, toolsTask]);
+
+  const toolCalls: ToolCall[] = await result.toolCalls;
+
+  console.log("\n\nFinal tool calls:");
+  if (toolCalls.length > 0) {
+    for (const call of toolCalls) {
+      console.log(`  - ${call.name}(${JSON.stringify(call.arguments)})`);
+      const toolResult = mockExecute(call.name, call.arguments);
+      console.log(`    result: ${toolResult}`);
+    }
+  } else {
+    console.log("  (none)");
+  }
+
+  await unloadModel({ modelId, clearStorage: false });
+} catch (error) {
+  console.error("Error:", error);
+  if (modelId) await unloadModel({ modelId, clearStorage: false }).catch(() => {});
+  process.exit(1);
+}
diff --git a/packages/sdk/package.json b/packages/sdk/package.json
index 70670c4a9a..fc66be62ff 100644
--- a/packages/sdk/package.json
+++ b/packages/sdk/package.json
@@ -177,7 +177,7 @@
     "@qvac/embed-llamacpp": "^0.15.0",
     "@qvac/error": "^0.1.1",
     "@qvac/langdetect-text": "^0.1.2",
-    "@qvac/llm-llamacpp": "^0.18.0",
+    "@qvac/llm-llamacpp": "^0.20.0",
     "@qvac/logging": "^0.1.0",
     "@qvac/ocr-onnx": "^0.4.2",
     "@qvac/rag": "^0.4.4",
diff --git a/packages/sdk/schemas/completion-stream.ts b/packages/sdk/schemas/completion-stream.ts
index 7f4b6fa1bb..68fe8c3d17 100644
--- a/packages/sdk/schemas/completion-stream.ts
+++ b/packages/sdk/schemas/completion-stream.ts
@@ -13,8 +13,10 @@ export { completionStatsSchema, type CompletionStats } from "./completion-event"
  * - `"pythonic"`: `[get_weather(city="Tokyo")]` (optionally `<|tool_call_start|>...<|tool_call_end|>`-wrapped)
  * - `"json"`:     `{"name":"get_weather","arguments":{"city":"Tokyo"}}` or `{"tool_calls":[{"name":"...","arguments":{...}}]}`
  * - `"harmony"`:  `<|channel|>commentary to=functions.get_weather <|constrain|>json<|message|>{"city":"Tokyo"}<|call|>`
+ * - `"qwen35"`:   `<tool_call><function=NAME><parameter=KEY>VALUE</parameter></function></tool_call>`
+ * - `"gemma4"`:   `<|tool_call>call:NAME{key:<|"|>val<|"|>,...}<tool_call|>`
  */
-export const toolDialectSchema = z.enum(["hermes", "pythonic", "json", "harmony"]);
+export const toolDialectSchema = z.enum(["hermes", "pythonic", "json", "harmony", "qwen35", "gemma4"]);
 
 export const attachmentSchema = z.object({
   path: z
@@ -65,6 +67,12 @@ export const generationParamsSchema = z
       .number()
       .optional()
       .describe("Penalty applied to repeated tokens."),
+    reasoning_budget: z
+      .union([z.literal(-1), z.literal(0)])
+      .optional()
+      .describe(
+        "Per-request reasoning channel budget. `-1` keeps the model's reasoning channel on; `0` disables it for this request. Equivalent to the load-time `reasoning_budget` config but scoped to a single `run()` call; the prior value is restored afterwards.",
+      ),
   })
   .strict();
 
diff --git a/packages/sdk/schemas/llamacpp-config.ts b/packages/sdk/schemas/llamacpp-config.ts
index f4f50eabad..0f8f36ee34 100644
--- a/packages/sdk/schemas/llamacpp-config.ts
+++ b/packages/sdk/schemas/llamacpp-config.ts
@@ -60,6 +60,10 @@ export const llmConfigBaseSchema = z.object({
    * for fast GPU startup.
    */
   openclCacheDir: z.string().optional(),
+  /**
+   * Reasoning channel token budget. `-1` = unrestricted, `0` = disabled.
+   */
+  reasoning_budget: z.union([z.literal(-1), z.literal(0)]).optional(),
   projectionModelSrc: modelSrcInputSchema.optional(),
 });
 
diff --git a/packages/sdk/server/utils/completion-normalizer.ts b/packages/sdk/server/utils/completion-normalizer.ts
index ccc1c782a8..a91b0fa319 100644
--- a/packages/sdk/server/utils/completion-normalizer.ts
+++ b/packages/sdk/server/utils/completion-normalizer.ts
@@ -71,6 +71,14 @@ const DIALECT_SPECS: Record<Dialect, DialectSpec> = {
       "<|return|>",
     ],
   },
+  qwen35: {
+    // Same <tool_call>…</tool_call> framing as hermes; inner content is XML.
+    toolFrames: [{ open: "<tool_call>", close: "</tool_call>" }],
+  },
+  gemma4: {
+    toolFrames: [{ open: "<|tool_call>", close: "<tool_call|>" }],
+    thinkingFrames: [{ open: "<|channel>thought", close: "<channel|>" }],
+  },
 };
 
 // Capture-gated reasoning marker — the generic `<think>...</think>`
diff --git a/packages/sdk/server/utils/tools/dialect.ts b/packages/sdk/server/utils/tools/dialect.ts
index 11bf614b53..a292aac1f8 100644
--- a/packages/sdk/server/utils/tools/dialect.ts
+++ b/packages/sdk/server/utils/tools/dialect.ts
@@ -12,6 +12,8 @@ export function detectToolDialectFromName(
   const basename = path.toLowerCase().split(/[/\\]/).pop() ?? "";
   const tag = `${(name ?? "").toLowerCase()}|${basename}`;
 
+  if (/qwen3[._-]?5(?![a-z])|qwen.*3\.5/.test(tag)) return "qwen35";
+  if (/gemma[-_]?4(?=[^a-z]|$)/.test(tag)) return "gemma4";
   if (/gpt[_-]?oss/.test(tag)) return "harmony";
   if (/lfm[_-]?\d/.test(tag)) return "pythonic";
   return "hermes";
diff --git a/packages/sdk/server/utils/tools/parser.ts b/packages/sdk/server/utils/tools/parser.ts
index 2a685593bf..d208df9fb3 100644
--- a/packages/sdk/server/utils/tools/parser.ts
+++ b/packages/sdk/server/utils/tools/parser.ts
@@ -11,6 +11,8 @@ import {
 import { parseHermesFormat } from "@/server/utils/tools/parsers/hermes";
 import { parsePythonicFormat } from "@/server/utils/tools/parsers/pythonic";
 import { parseHarmonyFormat } from "@/server/utils/tools/parsers/harmony";
+import { parseQwen35Format } from "@/server/utils/tools/parsers/qwen35";
+import { parseGemma4NativeFormat } from "@/server/utils/tools/parsers/gemma4native";
 
 function pickFormatParsers(
   dialect: ToolDialect | undefined,
@@ -26,12 +28,18 @@ function pickFormatParsers(
       return [parseGemmaFormat, parseLlamacppFormat];
     case "harmony":
       return [parseHarmonyFormat];
+    case "qwen35":
+      return [parseQwen35Format, parseHermesFormat];
+    case "gemma4":
+      return [parseGemma4NativeFormat];
     default:
       // Harmony first: `to=functions.` is uniquely Harmony and can't
       // false-match other dialects.
+      // Gemma4 next: `<|tool_call>` is distinctive and won't false-match.
       // Pythonic last: its bare `[name(...)]` form can match payloads that
       // look like other dialects.
       return [
+        parseGemma4NativeFormat,
         parseHarmonyFormat,
         parseHermesFormat,
         parseGemmaFormat,
diff --git a/packages/sdk/server/utils/tools/parsers/gemma4native.ts b/packages/sdk/server/utils/tools/parsers/gemma4native.ts
new file mode 100644
index 0000000000..17f034292f
--- /dev/null
+++ b/packages/sdk/server/utils/tools/parsers/gemma4native.ts
@@ -0,0 +1,75 @@
+import type { Tool, ToolCall, ToolCallError } from "@/schemas";
+import {
+  generateStableToolCallId,
+  validateToolArguments,
+  type ParserResult,
+} from "@/server/utils/tools/shared";
+
+// Transliterates Gemma 4's JS-literal argument body to valid JSON so it can
+// be parsed with JSON.parse. The body uses:
+//   - <|"|>...<|"|> instead of "..." for string values
+//   - bare (unquoted) object keys
+// Strategy: split by <|"|> tokens so structural parts (even indices) and
+// string value parts (odd indices) are processed separately, preventing
+// the key-quoting regex from matching `, key:` patterns inside string values.
+function gemmaArgsToJson(argsRaw: string): string {
+  const parts = ("{" + argsRaw + "}").split(/<\|"\|>/);
+  return parts
+    .map((part, i) =>
+      i % 2 === 0
+        ? part.replace(/([{,]\s*)([A-Za-z_]\w*)\s*:/g, '$1"$2":')
+        : '"' + part.replace(/\\/g, "\\\\").replace(/"/g, '\\"') + '"',
+    )
+    .join("");
+}
+
+// Parses Gemma 4's native tool-call dialect:
+//   <|tool_call>call:NAME{key:<|"|>val<|"|>,key2:bareNum,...}<tool_call|>
+// Keys are bare identifiers; string values are wrapped in <|"|>...<|"|>;
+// numbers/booleans are bare literals; nested objects/arrays use JS-literal
+// syntax throughout. Transliterates to JSON then parses.
+export function parseGemma4NativeFormat(text: string, tools: Tool[]): ParserResult {
+  const toolCalls: ToolCall[] = [];
+  const errors: ToolCallError[] = [];
+
+  if (!text.includes("<|tool_call>")) {
+    return { matched: false, toolCalls, errors };
+  }
+
+  const callRegex = /<\|tool_call>call:([A-Za-z_]\w*)\{([\s\S]*?)\}<tool_call\|>/g;
+  const matches = Array.from(text.matchAll(callRegex));
+
+  if (matches.length === 0) return { matched: false, toolCalls, errors };
+
+  for (const match of matches) {
+    const name = match[1]!;
+    const argsRaw = match[2]!;
+
+    let args: Record<string, unknown>;
+    try {
+      args = JSON.parse(gemmaArgsToJson(argsRaw)) as Record<string, unknown>;
+    } catch (err) {
+      errors.push({
+        code: "PARSE_ERROR",
+        message: `Failed to parse Gemma 4 tool call arguments: ${err instanceof Error ? err.message : String(err)}`,
+        raw: match[0],
+      });
+      continue;
+    }
+
+    const validation = validateToolArguments(name, args, tools);
+    if (!validation.isValid && validation.error) {
+      errors.push({ ...validation.error, raw: match[0] });
+      continue;
+    }
+
+    toolCalls.push({
+      id: generateStableToolCallId(name, args),
+      name,
+      arguments: args,
+      raw: match[0],
+    });
+  }
+
+  return { matched: true, toolCalls, errors };
+}
diff --git a/packages/sdk/server/utils/tools/parsers/qwen35.ts b/packages/sdk/server/utils/tools/parsers/qwen35.ts
new file mode 100644
index 0000000000..1ec49a5a03
--- /dev/null
+++ b/packages/sdk/server/utils/tools/parsers/qwen35.ts
@@ -0,0 +1,117 @@
+import type { Tool, ToolCall, ToolCallError } from "@/schemas";
+import {
+  generateStableToolCallId,
+  validateToolArguments,
+  type ParserResult,
+} from "@/server/utils/tools/shared";
+
+// Coerce raw parameter text to the type declared in the tool's JSON schema.
+// String values are raw (not JSON-quoted); arrays/objects are valid JSON.
+function coerceParamValue(
+  raw: string,
+  schema?: { type?: string },
+): unknown {
+  const trimmed = raw.trim();
+  if (!schema?.type) return trimmed;
+  switch (schema.type) {
+    case "number":
+    case "integer":
+      return Number(trimmed);
+    case "boolean":
+      return trimmed === "true";
+    case "array":
+    case "object":
+      try {
+        return JSON.parse(trimmed);
+      } catch {
+        return trimmed;
+      }
+    default:
+      return trimmed;
+  }
+}
+
+// Parses Qwen3.5/3.6 Pythonic-XML tool-call format:
+//   <tool_call>
+//   <function=NAME>
+//   <parameter=KEY>VALUE</parameter>
+//   </function>
+//   </tool_call>
+// String parameter values are raw text (not JSON-quoted); arrays/objects
+// are JSON. Type coercion uses the tool schema; unknown params pass through.
+export function parseQwen35Format(text: string, tools: Tool[]): ParserResult {
+  const toolCalls: ToolCall[] = [];
+  const errors: ToolCallError[] = [];
+
+  if (!text.includes("<tool_call>")) {
+    return { matched: false, toolCalls, errors };
+  }
+
+  const outerRegex = /<tool_call>([\s\S]*?)<\/tool_call>/g;
+  const outerMatches = Array.from(text.matchAll(outerRegex));
+
+  if (outerMatches.length === 0) return { matched: false, toolCalls, errors };
+
+  // If no match contains XML function syntax, check if this is JSON format
+  // (defer to hermes) or just malformed content (surface as PARSE_ERROR).
+  if (!outerMatches.some((m) => m[1]!.includes("<function="))) {
+    const looksLikeJson = outerMatches.some((m) => {
+      const inner = m[1]!.trim();
+      return inner.startsWith("{") || inner.startsWith("[");
+    });
+    if (looksLikeJson) return { matched: false, toolCalls, errors };
+    return {
+      matched: true,
+      toolCalls,
+      errors: outerMatches.map((m) => ({
+        code: "PARSE_ERROR" as const,
+        message: "Qwen3.5 tool call missing <function=NAME>...</function>",
+        raw: m[1]!.trim(),
+      })),
+    };
+  }
+
+  for (const outerMatch of outerMatches) {
+    const inner = outerMatch[1]!.trim();
+
+    const fnMatch = /<function=([^>\s]+)\s*>([\s\S]*?)<\/function>/i.exec(
+      inner,
+    );
+    if (!fnMatch) {
+      errors.push({
+        code: "PARSE_ERROR",
+        message: "Qwen3.5 tool call missing <function=NAME>...</function>",
+        raw: inner,
+      });
+      continue;
+    }
+
+    const name = fnMatch[1]!.trim();
+    const paramsBlock = fnMatch[2]!;
+    const tool = tools.find((t) => t.name === name);
+    const schemaProperties = tool?.parameters?.properties ?? {};
+
+    const args: Record<string, unknown> = {};
+    const paramRegex = /<parameter=([^>\s]+)\s*>([\s\S]*?)<\/parameter>/gi;
+    let pm: RegExpExecArray | null;
+    while ((pm = paramRegex.exec(paramsBlock)) !== null) {
+      const paramName = pm[1]!.trim();
+      args[paramName] = coerceParamValue(pm[2]!, schemaProperties[paramName]);
+    }
+
+    const validation = validateToolArguments(name, args, tools);
+    if (!validation.isValid && validation.error) {
+      errors.push({ ...validation.error, raw: inner });
+      continue;
+    }
+
+    toolCalls.push({
+      id: generateStableToolCallId(name, args),
+      name,
+      arguments: args,
+      raw: inner,
+    });
+  }
+
+  return { matched: true, toolCalls, errors };
+}
diff --git a/packages/sdk/test/unit/completion-stream-schemas.test.ts b/packages/sdk/test/unit/completion-stream-schemas.test.ts
index 704b339927..37dac6aa47 100644
--- a/packages/sdk/test/unit/completion-stream-schemas.test.ts
+++ b/packages/sdk/test/unit/completion-stream-schemas.test.ts
@@ -3,6 +3,8 @@ import test from "brittle";
 import {
   completionStreamResponseSchema,
   completionStatsSchema,
+  generationParamsSchema,
+  toolDialectSchema,
 } from "@/schemas/completion-stream";
 
 test("completionStatsSchema: accepts backendDevice 'cpu' and 'gpu'", (t) => {
@@ -29,6 +31,25 @@ test("completionStatsSchema: backendDevice is optional", (t) => {
   t.is(result.success, true);
 });
 
+test("generationParamsSchema: accepts reasoning_budget -1 and 0", (t) => {
+  t.is(generationParamsSchema.safeParse({ reasoning_budget: -1 }).success, true);
+  t.is(generationParamsSchema.safeParse({ reasoning_budget: 0 }).success, true);
+});
+
+test("generationParamsSchema: rejects reasoning_budget other values", (t) => {
+  t.is(generationParamsSchema.safeParse({ reasoning_budget: 1 }).success, false);
+  t.is(generationParamsSchema.safeParse({ reasoning_budget: -2 }).success, false);
+});
+
+test("toolDialectSchema: accepts qwen35 and gemma4", (t) => {
+  t.is(toolDialectSchema.safeParse("qwen35").success, true);
+  t.is(toolDialectSchema.safeParse("gemma4").success, true);
+});
+
+test("toolDialectSchema: rejects unknown dialects", (t) => {
+  t.is(toolDialectSchema.safeParse("unknown").success, false);
+});
+
 test("completionStreamResponseSchema: round-trips backendDevice through completionStats event", (t) => {
   const result = completionStreamResponseSchema.safeParse({
     type: "completionStream",
diff --git a/packages/sdk/test/unit/llm-config-schema.test.ts b/packages/sdk/test/unit/llm-config-schema.test.ts
index 3f23eb9184..da2dd1bcb6 100644
--- a/packages/sdk/test/unit/llm-config-schema.test.ts
+++ b/packages/sdk/test/unit/llm-config-schema.test.ts
@@ -101,3 +101,17 @@ test("loadModelSrcRequestSchema: accepts split-mode for LLM", (t) => {
   });
   t.is(result.success, true);
 });
+
+test("llmConfigBaseSchema: accepts reasoning_budget -1 (unrestricted)", (t) => {
+  t.is(llmConfigBaseSchema.safeParse({ reasoning_budget: -1 }).success, true);
+});
+
+test("llmConfigBaseSchema: accepts reasoning_budget 0 (disabled)", (t) => {
+  t.is(llmConfigBaseSchema.safeParse({ reasoning_budget: 0 }).success, true);
+});
+
+test("llmConfigBaseSchema: rejects reasoning_budget other values", (t) => {
+  t.is(llmConfigBaseSchema.safeParse({ reasoning_budget: 1 }).success, false);
+  t.is(llmConfigBaseSchema.safeParse({ reasoning_budget: -2 }).success, false);
+  t.is(llmConfigBaseSchema.safeParse({ reasoning_budget: 0.5 }).success, false);
+});
diff --git a/packages/sdk/test/unit/tool-parser.test.ts b/packages/sdk/test/unit/tool-parser.test.ts
index 2526bdaae8..12b531f109 100644
--- a/packages/sdk/test/unit/tool-parser.test.ts
+++ b/packages/sdk/test/unit/tool-parser.test.ts
@@ -6,6 +6,8 @@ import {
   detectToolDialectFromName,
 } from "@/server/utils/tools";
 import { parseHarmonyFormat } from "@/server/utils/tools/parsers/harmony";
+import { parseQwen35Format } from "@/server/utils/tools/parsers/qwen35";
+import { parseGemma4NativeFormat } from "@/server/utils/tools/parsers/gemma4native";
 const weatherTool: Tool = {
   type: "function",
   name: "weather",
@@ -268,6 +270,12 @@ test("detectToolDialectFromName: non-LFM models default to hermes", (t) => {
     [undefined, "/cache/abc_Llama-3.3-70B-Instruct-Tool-Calling.gguf"],
     [undefined, ""],
     ["", ""],
+    // Gemma3 with Q4 quantization suffix must not be mistaken for Gemma4 model generation
+    [undefined, "/cache/abc_gemma3-Q4_K_M.gguf"],
+    ["GEMMA3_Q4", "/Users/x/.qvac/models/abc_gemma-3-4b-q4_k_m.gguf"],
+    // Qwen3 5B (5 billion params) must not be mistaken for Qwen3.5 (model version 3.5)
+    [undefined, "/cache/abc_Qwen3-5B-Instruct-Q4_K_M.gguf"],
+    ["QWEN3_5B_INST", "/Users/x/.qvac/models/abc_qwen3-5b-instruct.gguf"],
   ];
 
   for (const [name, path] of cases) {
@@ -601,3 +609,91 @@ test("detectToolDialectFromName: GPT-OSS variants → harmony", (t) => {
     t.is(detectToolDialectFromName(name, path), "harmony", `name=${name} path=${path}`);
   }
 });
+
+test("detectToolDialectFromName: Qwen3.5 variants → qwen35", (t) => {
+  const cases: Array<[string | undefined, string]> = [
+    [undefined, "/cache/abc_Qwen3.5-7B-Instruct-Q4_K_M.gguf"],
+    ["QWEN3_5_7B_INST_Q4", "/Users/x/.qvac/models/abc_qwen3.5-7b-instruct.gguf"],
+    [undefined, "/cache/abc_qwen3-5-7b.gguf"],
+  ];
+
+  for (const [name, path] of cases) {
+    t.is(detectToolDialectFromName(name, path), "qwen35", `name=${name} path=${path}`);
+  }
+});
+
+test("detectToolDialectFromName: Gemma 4 variants → gemma4", (t) => {
+  const cases: Array<[string | undefined, string]> = [
+    [undefined, "/cache/abc_gemma4-9b-it-Q4_K_M.gguf"],
+    ["GEMMA4_27B_IT_Q4", "/Users/x/.qvac/models/abc_gemma-4-27b-it.gguf"],
+    [undefined, "/cache/abc_gemma4-27b.gguf"],
+  ];
+
+  for (const [name, path] of cases) {
+    t.is(detectToolDialectFromName(name, path), "gemma4", `name=${name} path=${path}`);
+  }
+});
+
+test("parseQwen35Format: single function call with parameters", (t) => {
+  const text = `<tool_call>
+<function=get_weather>
+<parameter=city>Paris</parameter>
+<parameter=unit>celsius</parameter>
+</function>
+</tool_call>`;
+  const result = parseQwen35Format(text, pythonicTools);
+  t.is(result.matched, true);
+  t.is(result.toolCalls.length, 1);
+  t.is(result.toolCalls[0]?.name, "get_weather");
+  t.alike(result.toolCalls[0]?.arguments, { city: "Paris", unit: "celsius" });
+  t.is(result.errors.length, 0);
+});
+
+test("parseQwen35Format: no tool_call markers → matched=false", (t) => {
+  const result = parseQwen35Format("No tool call here.", pythonicTools);
+  t.is(result.matched, false);
+  t.is(result.toolCalls.length, 0);
+});
+
+test("parseQwen35Format: missing function tag → PARSE_ERROR", (t) => {
+  const text = `<tool_call>some plain content</tool_call>`;
+  const result = parseQwen35Format(text, pythonicTools);
+  t.is(result.matched, true);
+  t.is(result.toolCalls.length, 0);
+  t.is(result.errors.length, 1);
+  t.is(result.errors[0]?.code, "PARSE_ERROR");
+});
+
+test("parseToolCalls(dialect=qwen35): parses Qwen3.5 XML format", (t) => {
+  const text = `<tool_call><function=get_weather><parameter=city>Tokyo</parameter></function></tool_call>`;
+  const { toolCalls, errors } = parseToolCalls(text, pythonicTools, "qwen35");
+  t.is(errors.length, 0);
+  t.is(toolCalls.length, 1);
+  t.is(toolCalls[0]?.name, "get_weather");
+  t.alike(toolCalls[0]?.arguments, { city: "Tokyo" });
+});
+
+test("parseGemma4NativeFormat: single call with string values", (t) => {
+  const text = `<|tool_call>call:get_weather{city:<|"|>Paris<|"|>,country:<|"|>FR<|"|>}<tool_call|>`;
+  const result = parseGemma4NativeFormat(text, pythonicTools);
+  t.is(result.matched, true);
+  t.is(result.toolCalls.length, 1);
+  t.is(result.toolCalls[0]?.name, "get_weather");
+  t.alike(result.toolCalls[0]?.arguments, { city: "Paris", country: "FR" });
+  t.is(result.errors.length, 0);
+});
+
+test("parseGemma4NativeFormat: no open marker → matched=false", (t) => {
+  const result = parseGemma4NativeFormat("No gemma call here.", pythonicTools);
+  t.is(result.matched, false);
+  t.is(result.toolCalls.length, 0);
+});
+
+test("parseToolCalls(dialect=gemma4): parses Gemma4 native format", (t) => {
+  const text = `<|tool_call>call:get_weather{city:<|"|>Berlin<|"|>}<tool_call|>`;
+  const { toolCalls, errors } = parseToolCalls(text, pythonicTools, "gemma4");
+  t.is(errors.length, 0);
+  t.is(toolCalls.length, 1);
+  t.is(toolCalls[0]?.name, "get_weather");
+  t.alike(toolCalls[0]?.arguments, { city: "Berlin" });
+});
diff --git a/packages/sdk/tests-qvac/tests/completion-tests.ts b/packages/sdk/tests-qvac/tests/completion-tests.ts
index d64d44040a..d7a0c0e540 100644
--- a/packages/sdk/tests-qvac/tests/completion-tests.ts
+++ b/packages/sdk/tests-qvac/tests/completion-tests.ts
@@ -607,6 +607,28 @@ export const completionResponseFormatJsonSchema: TestDefinition = {
   metadata: { category: "completion", dependency: "llm", estimatedDurationMs: 20000 },
 };
 
+export const completionReasoningBudgetDisabled: TestDefinition = {
+  testId: "completion-reasoning-budget-disabled",
+  params: {
+    history: [{ role: "user", content: "What is 2+2? Answer with only the number." }],
+    stream: false,
+    generationParams: { reasoning_budget: 0, predict: 32 },
+  },
+  expectation: { validation: "type", expectedType: "string" },
+  metadata: { category: "completion", dependency: "llm", estimatedDurationMs: 10000 },
+};
+
+export const completionReasoningBudgetUnrestricted: TestDefinition = {
+  testId: "completion-reasoning-budget-unrestricted",
+  params: {
+    history: [{ role: "user", content: "What is 2+2? Answer with only the number." }],
+    stream: false,
+    generationParams: { reasoning_budget: -1, predict: 32 },
+  },
+  expectation: { validation: "type", expectedType: "string" },
+  metadata: { category: "completion", dependency: "llm", estimatedDurationMs: 10000 },
+};
+
 export const completionResponseFormatWithToolsRejected: TestDefinition = {
   testId: "completion-response-format-with-tools-rejected",
   params: {
@@ -677,4 +699,6 @@ export const completionTests = [
   completionResponseFormatJsonObjectStreaming,
   completionResponseFormatJsonSchema,
   completionResponseFormatWithToolsRejected,
+  completionReasoningBudgetDisabled,
+  completionReasoningBudgetUnrestricted,
 ];

From d6c7a395f4719c38ecbca330f5e93ea1e84cff03 Mon Sep 17 00:00:00 2001
From: Ridwan Taiwo <donriddo@gmail.com>
Date: Mon, 11 May 2026 15:16:54 +0100
Subject: [PATCH 02/13] fix: exclude system_prompt from C++ config transform;
 add reasoning_budget to completion-executor

llamacpp 8189+ (in @qvac/llm-llamacpp@0.20.0) removed --system-prompt from its
CLI argument parser. The SDK was forwarding system_prompt through transformLlmConfig
causing all model loads to fail with 'invalid argument: --system-prompt'.

system_prompt is JS-only: completion-stream.ts reads it to seed the conversation
history. It has no meaning at the C++ level and must be excluded alongside modelType.

Also mirrors reasoning_budget in completion-executor.ts GenerationParams so the
new tests-qvac reasoning_budget tests type-check correctly.
---
 packages/sdk/server/bare/plugins/llamacpp-completion/plugin.ts  | 2 +-
 .../tests-qvac/tests/shared/executors/completion-executor.ts    | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/packages/sdk/server/bare/plugins/llamacpp-completion/plugin.ts b/packages/sdk/server/bare/plugins/llamacpp-completion/plugin.ts
index ef0b60b7f3..eae639af5e 100644
--- a/packages/sdk/server/bare/plugins/llamacpp-completion/plugin.ts
+++ b/packages/sdk/server/bare/plugins/llamacpp-completion/plugin.ts
@@ -34,7 +34,7 @@ import { detectToolDialect } from "@/server/utils/tool-integration";
 function transformLlmConfig(llmConfig: LlmConfig) {
   const transformed = JSON.parse(
     JSON.stringify(llmConfig, (key: string, v: unknown) =>
-      key === "modelType"
+      key === "modelType" || key === "system_prompt"
         ? undefined
         : key === "stop_sequences"
           ? Array.isArray(v)
diff --git a/packages/sdk/tests-qvac/tests/shared/executors/completion-executor.ts b/packages/sdk/tests-qvac/tests/shared/executors/completion-executor.ts
index 9457fcaf8f..6204b35a94 100644
--- a/packages/sdk/tests-qvac/tests/shared/executors/completion-executor.ts
+++ b/packages/sdk/tests-qvac/tests/shared/executors/completion-executor.ts
@@ -29,6 +29,7 @@ interface GenerationParams {
   frequency_penalty?: number;
   presence_penalty?: number;
   repeat_penalty?: number;
+  reasoning_budget?: -1 | 0;
 }
 
 interface CompletionTestParams {

From 319db098c91f18734a5c2cb351d5239bbbabd0a6 Mon Sep 17 00:00:00 2001
From: Ridwan Taiwo <donriddo@gmail.com>
Date: Mon, 11 May 2026 16:27:05 +0100
Subject: [PATCH 03/13] fix: tighten dialect regexes, extract
 transformLlmConfig, add exclusion tests

- Drop the over-broad qwen.*3\.5 alternative from the qwen35 regex and
  tighten the lookahead to (?![a-z0-9]) so qwen3-50b-instruct no longer
  false-matches as qwen35
- Tighten gemma4 lookahead to (?=[^a-z0-9]|$) so gemma-40b no longer
  false-matches as gemma4
- Extract transformLlmConfig to transform.ts (no addon imports) so it
  can be unit-tested without the native addon loading
- Add llm-plugin-transform.test.ts pinning that system_prompt and
  modelType are never forwarded to C++ and that reasoning_budget survives
- Add negative test cases for qwen3-50b and gemma-40b to tool-parser.test.ts
- Fix stale default-chain comment in parser.ts (was 'Harmony first',
  actual order is Gemma4 first)
- Add inline justification for qwen35/gemma4 fallback asymmetry
---
 packages/sdk/schemas/llamacpp-config.ts       |  1 +
 .../plugins/llamacpp-completion/plugin.ts     | 40 +-------------
 .../plugins/llamacpp-completion/transform.ts  | 47 ++++++++++++++++
 packages/sdk/server/utils/tools/dialect.ts    |  4 +-
 packages/sdk/server/utils/tools/parser.ts     |  8 ++-
 .../test/unit/llm-plugin-transform.test.ts    | 55 +++++++++++++++++++
 packages/sdk/test/unit/tool-parser.test.ts    |  5 ++
 7 files changed, 117 insertions(+), 43 deletions(-)
 create mode 100644 packages/sdk/server/bare/plugins/llamacpp-completion/transform.ts
 create mode 100644 packages/sdk/test/unit/llm-plugin-transform.test.ts

diff --git a/packages/sdk/schemas/llamacpp-config.ts b/packages/sdk/schemas/llamacpp-config.ts
index 0f8f36ee34..9cd70e6877 100644
--- a/packages/sdk/schemas/llamacpp-config.ts
+++ b/packages/sdk/schemas/llamacpp-config.ts
@@ -33,6 +33,7 @@ export const llmConfigBaseSchema = z.object({
       z.number().int().min(1), // positive integer: fixed token count
     ])
     .optional(),
+  /** JS-side only: seeds conversation history. Never forwarded to the C++ addon. */
   system_prompt: z.string().optional(),
   no_mmap: z.boolean().optional(),
   verbosity: verbositySchema.optional(),
diff --git a/packages/sdk/server/bare/plugins/llamacpp-completion/plugin.ts b/packages/sdk/server/bare/plugins/llamacpp-completion/plugin.ts
index eae639af5e..2189da4213 100644
--- a/packages/sdk/server/bare/plugins/llamacpp-completion/plugin.ts
+++ b/packages/sdk/server/bare/plugins/llamacpp-completion/plugin.ts
@@ -12,7 +12,6 @@ import {
   ModelType,
   llmConfigBaseSchema,
   ADDON_LLM,
-  TOOLS_MODE,
   type CompletionEvent,
   type CreateModelParams,
   type PluginCapabilities,
@@ -26,49 +25,12 @@ import { expandGGUFIntoShards } from "@/server/utils";
 import { completion } from "@/server/bare/plugins/llamacpp-completion/ops/completion-stream";
 import { finetune } from "@/server/bare/plugins/llamacpp-completion/ops/finetune";
 import { translate } from "@/server/bare/ops/translate";
+import { transformLlmConfig } from "@/server/bare/plugins/llamacpp-completion/transform";
 import { attachModelExecutionMs } from "@/profiling/model-execution";
 import { getModelConfig } from "@/server/bare/registry/model-registry";
 import { createCompletionNormalizer } from "@/server/utils/completion-normalizer";
 import { detectToolDialect } from "@/server/utils/tool-integration";
 
-function transformLlmConfig(llmConfig: LlmConfig) {
-  const transformed = JSON.parse(
-    JSON.stringify(llmConfig, (key: string, v: unknown) =>
-      key === "modelType" || key === "system_prompt"
-        ? undefined
-        : key === "stop_sequences"
-          ? Array.isArray(v)
-            ? v.join(", ")
-            : v
-          : typeof v === "number" || typeof v === "boolean"
-            ? String(v)
-            : v,
-    ).replace(
-      /"([a-z][A-Za-z]*)":/g,
-      (_, key: string) =>
-        `"${key.replace(/[A-Z]/g, (l: string) => `_${l.toLowerCase()}`)}":`,
-    ),
-  ) as Record<string, string>;
-
-  if ("stop_sequences" in transformed) {
-    transformed["reverse_prompt"] = transformed["stop_sequences"];
-    delete transformed["stop_sequences"];
-  }
-
-  if ("opencl_cache_dir" in transformed) {
-    transformed["openclCacheDir"] = transformed["opencl_cache_dir"];
-    delete transformed["opencl_cache_dir"];
-  }
-
-  if ("tools_mode" in transformed) {
-    if (transformed["tools_mode"] === TOOLS_MODE.dynamic) {
-      transformed["tools_compact"] = "true";
-    }
-    delete transformed["tools_mode"];
-  }
-
-  return transformed;
-}
 
 function createLlmModel(
   modelId: string,
diff --git a/packages/sdk/server/bare/plugins/llamacpp-completion/transform.ts b/packages/sdk/server/bare/plugins/llamacpp-completion/transform.ts
new file mode 100644
index 0000000000..9c4dd358ca
--- /dev/null
+++ b/packages/sdk/server/bare/plugins/llamacpp-completion/transform.ts
@@ -0,0 +1,47 @@
+import { TOOLS_MODE, type LlmConfig } from "@/schemas";
+
+/**
+ * Converts an LlmConfig into the flat string-keyed map the C++ addon expects.
+ *
+ * JS-only fields excluded from the output (must NOT be forwarded to the addon):
+ *   - modelType   (schema discriminant, meaningless at C++ level)
+ *   - system_prompt  (JS-side history seeding only; C++ removed --system-prompt in 8189)
+ */
+export function transformLlmConfig(llmConfig: LlmConfig) {
+  const transformed = JSON.parse(
+    JSON.stringify(llmConfig, (key: string, v: unknown) =>
+      key === "modelType" || key === "system_prompt"
+        ? undefined
+        : key === "stop_sequences"
+          ? Array.isArray(v)
+            ? v.join(", ")
+            : v
+          : typeof v === "number" || typeof v === "boolean"
+            ? String(v)
+            : v,
+    ).replace(
+      /"([a-z][A-Za-z]*)":/g,
+      (_, key: string) =>
+        `"${key.replace(/[A-Z]/g, (l: string) => `_${l.toLowerCase()}`)}":`,
+    ),
+  ) as Record<string, string>;
+
+  if ("stop_sequences" in transformed) {
+    transformed["reverse_prompt"] = transformed["stop_sequences"];
+    delete transformed["stop_sequences"];
+  }
+
+  if ("opencl_cache_dir" in transformed) {
+    transformed["openclCacheDir"] = transformed["opencl_cache_dir"];
+    delete transformed["opencl_cache_dir"];
+  }
+
+  if ("tools_mode" in transformed) {
+    if (transformed["tools_mode"] === TOOLS_MODE.dynamic) {
+      transformed["tools_compact"] = "true";
+    }
+    delete transformed["tools_mode"];
+  }
+
+  return transformed;
+}
diff --git a/packages/sdk/server/utils/tools/dialect.ts b/packages/sdk/server/utils/tools/dialect.ts
index a292aac1f8..c8f78cfa84 100644
--- a/packages/sdk/server/utils/tools/dialect.ts
+++ b/packages/sdk/server/utils/tools/dialect.ts
@@ -12,8 +12,8 @@ export function detectToolDialectFromName(
   const basename = path.toLowerCase().split(/[/\\]/).pop() ?? "";
   const tag = `${(name ?? "").toLowerCase()}|${basename}`;
 
-  if (/qwen3[._-]?5(?![a-z])|qwen.*3\.5/.test(tag)) return "qwen35";
-  if (/gemma[-_]?4(?=[^a-z]|$)/.test(tag)) return "gemma4";
+  if (/qwen3[._-]?5(?![a-z0-9])/.test(tag)) return "qwen35";
+  if (/gemma[-_]?4(?=[^a-z0-9]|$)/.test(tag)) return "gemma4";
   if (/gpt[_-]?oss/.test(tag)) return "harmony";
   if (/lfm[_-]?\d/.test(tag)) return "pythonic";
   return "hermes";
diff --git a/packages/sdk/server/utils/tools/parser.ts b/packages/sdk/server/utils/tools/parser.ts
index d208df9fb3..5a49688d3d 100644
--- a/packages/sdk/server/utils/tools/parser.ts
+++ b/packages/sdk/server/utils/tools/parser.ts
@@ -29,13 +29,17 @@ function pickFormatParsers(
     case "harmony":
       return [parseHarmonyFormat];
     case "qwen35":
+      // Hermes fallback: Qwen3.5 templates sometimes emit OpenAI-style JSON
+      // when the native XML format fails; Hermes chain recovers those.
       return [parseQwen35Format, parseHermesFormat];
     case "gemma4":
+      // No JSON fallback: Gemma4 emits only its native channel-thought dialect
+      // and never falls back to JSON-envelope formats.
       return [parseGemma4NativeFormat];
     default:
-      // Harmony first: `to=functions.` is uniquely Harmony and can't
+      // Gemma4 first: `<|tool_call>` is uniquely distinctive and can't
       // false-match other dialects.
-      // Gemma4 next: `<|tool_call>` is distinctive and won't false-match.
+      // Harmony next: `to=functions.` is also uniquely Harmony.
       // Pythonic last: its bare `[name(...)]` form can match payloads that
       // look like other dialects.
       return [
diff --git a/packages/sdk/test/unit/llm-plugin-transform.test.ts b/packages/sdk/test/unit/llm-plugin-transform.test.ts
new file mode 100644
index 0000000000..f5b7f66ccb
--- /dev/null
+++ b/packages/sdk/test/unit/llm-plugin-transform.test.ts
@@ -0,0 +1,55 @@
+// @ts-expect-error brittle has no type declarations
+import test from "brittle";
+import { transformLlmConfig } from "@/server/bare/plugins/llamacpp-completion/transform";
+import { llmConfigSchema } from "@/schemas/llamacpp-config";
+
+function makeConfig(overrides: Record<string, unknown> = {}) {
+  return llmConfigSchema.parse(overrides);
+}
+
+test("transformLlmConfig: system_prompt is never forwarded to C++", (t) => {
+  const config = makeConfig({ system_prompt: "You are a helpful assistant." });
+  const result = transformLlmConfig(config);
+  t.absent(
+    "system_prompt" in result,
+    "system_prompt must not appear in C++ arg map",
+  );
+  t.absent(
+    "system-prompt" in result,
+    "hyphenated system-prompt must not appear in C++ arg map",
+  );
+});
+
+test("transformLlmConfig: modelType is never forwarded to C++", (t) => {
+  const config = makeConfig({});
+  const result = transformLlmConfig(config);
+  t.absent("modelType" in result, "modelType must not appear in C++ arg map");
+  t.absent("model_type" in result);
+});
+
+test("transformLlmConfig: reasoning_budget survives as underscore key", (t) => {
+  const config = makeConfig({ reasoning_budget: 0 });
+  const result = transformLlmConfig(config);
+  t.is(result["reasoning_budget"], "0", "reasoning_budget=0 must be forwarded as string '0'");
+});
+
+test("transformLlmConfig: reasoning_budget=-1 survives", (t) => {
+  const config = makeConfig({ reasoning_budget: -1 });
+  const result = transformLlmConfig(config);
+  t.is(result["reasoning_budget"], "-1");
+});
+
+test("transformLlmConfig: stop_sequences is renamed to reverse_prompt", (t) => {
+  const config = makeConfig({ stop_sequences: ["</s>", "<|im_end|>"] });
+  const result = transformLlmConfig(config);
+  t.absent("stop_sequences" in result);
+  t.is(result["reverse_prompt"], "</s>, <|im_end|>");
+});
+
+test("transformLlmConfig: numeric fields are stringified", (t) => {
+  const config = makeConfig({ ctx_size: 4096, gpu_layers: 99, temp: 0.7 });
+  const result = transformLlmConfig(config);
+  t.is(result["ctx_size"], "4096");
+  t.is(result["gpu_layers"], "99");
+  t.is(result["temp"], "0.7");
+});
diff --git a/packages/sdk/test/unit/tool-parser.test.ts b/packages/sdk/test/unit/tool-parser.test.ts
index 12b531f109..b95f1c867d 100644
--- a/packages/sdk/test/unit/tool-parser.test.ts
+++ b/packages/sdk/test/unit/tool-parser.test.ts
@@ -276,6 +276,11 @@ test("detectToolDialectFromName: non-LFM models default to hermes", (t) => {
     // Qwen3 5B (5 billion params) must not be mistaken for Qwen3.5 (model version 3.5)
     [undefined, "/cache/abc_Qwen3-5B-Instruct-Q4_K_M.gguf"],
     ["QWEN3_5B_INST", "/Users/x/.qvac/models/abc_qwen3-5b-instruct.gguf"],
+    // Qwen3 50B must not be mistaken for Qwen3.5 (digit after 5, not a letter)
+    [undefined, "/cache/abc_Qwen3-50B-Instruct-Q4_K_M.gguf"],
+    ["QWEN3_50B_INST", "/Users/x/.qvac/models/abc_qwen3-50b-instruct.gguf"],
+    // Gemma-4 billion params (generation 3, 4B size) must not be mistaken for Gemma 4 generation
+    [undefined, "/cache/abc_gemma-40b-Q4_K_M.gguf"],
   ];
 
   for (const [name, path] of cases) {

From d9b82db087a865d8cc587e3a96f0e68a5459e932 Mon Sep 17 00:00:00 2001
From: Ridwan Taiwo <donriddo@gmail.com>
Date: Mon, 11 May 2026 19:30:59 +0100
Subject: [PATCH 04/13] fix: extend qwen35 dialect to Qwen3.6; escape newlines
 in Gemma4 arg transliterator

---
 packages/sdk/server/utils/tools/dialect.ts        |  2 +-
 .../server/utils/tools/parsers/gemma4native.ts    |  2 +-
 packages/sdk/test/unit/tool-parser.test.ts        | 15 +++++++++++++++
 3 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/packages/sdk/server/utils/tools/dialect.ts b/packages/sdk/server/utils/tools/dialect.ts
index c8f78cfa84..bbbade9228 100644
--- a/packages/sdk/server/utils/tools/dialect.ts
+++ b/packages/sdk/server/utils/tools/dialect.ts
@@ -12,7 +12,7 @@ export function detectToolDialectFromName(
   const basename = path.toLowerCase().split(/[/\\]/).pop() ?? "";
   const tag = `${(name ?? "").toLowerCase()}|${basename}`;
 
-  if (/qwen3[._-]?5(?![a-z0-9])/.test(tag)) return "qwen35";
+  if (/qwen3[._-]?[56](?![a-z0-9])/.test(tag)) return "qwen35";
   if (/gemma[-_]?4(?=[^a-z0-9]|$)/.test(tag)) return "gemma4";
   if (/gpt[_-]?oss/.test(tag)) return "harmony";
   if (/lfm[_-]?\d/.test(tag)) return "pythonic";
diff --git a/packages/sdk/server/utils/tools/parsers/gemma4native.ts b/packages/sdk/server/utils/tools/parsers/gemma4native.ts
index 17f034292f..248bca76f5 100644
--- a/packages/sdk/server/utils/tools/parsers/gemma4native.ts
+++ b/packages/sdk/server/utils/tools/parsers/gemma4native.ts
@@ -18,7 +18,7 @@ function gemmaArgsToJson(argsRaw: string): string {
     .map((part, i) =>
       i % 2 === 0
         ? part.replace(/([{,]\s*)([A-Za-z_]\w*)\s*:/g, '$1"$2":')
-        : '"' + part.replace(/\\/g, "\\\\").replace(/"/g, '\\"') + '"',
+        : '"' + part.replace(/\\/g, "\\\\").replace(/"/g, '\\"').replace(/\n/g, "\\n").replace(/\r/g, "\\r") + '"',
     )
     .join("");
 }
diff --git a/packages/sdk/test/unit/tool-parser.test.ts b/packages/sdk/test/unit/tool-parser.test.ts
index b95f1c867d..a5a3eb2f2a 100644
--- a/packages/sdk/test/unit/tool-parser.test.ts
+++ b/packages/sdk/test/unit/tool-parser.test.ts
@@ -279,6 +279,9 @@ test("detectToolDialectFromName: non-LFM models default to hermes", (t) => {
     // Qwen3 50B must not be mistaken for Qwen3.5 (digit after 5, not a letter)
     [undefined, "/cache/abc_Qwen3-50B-Instruct-Q4_K_M.gguf"],
     ["QWEN3_50B_INST", "/Users/x/.qvac/models/abc_qwen3-50b-instruct.gguf"],
+    // Qwen3 60B must not be mistaken for Qwen3.6 (digit after 6, not a letter)
+    [undefined, "/cache/abc_Qwen3-60B-Instruct-Q4_K_M.gguf"],
+    ["QWEN3_60B_INST", "/Users/x/.qvac/models/abc_qwen3-60b-instruct.gguf"],
     // Gemma-4 billion params (generation 3, 4B size) must not be mistaken for Gemma 4 generation
     [undefined, "/cache/abc_gemma-40b-Q4_K_M.gguf"],
   ];
@@ -620,6 +623,9 @@ test("detectToolDialectFromName: Qwen3.5 variants → qwen35", (t) => {
     [undefined, "/cache/abc_Qwen3.5-7B-Instruct-Q4_K_M.gguf"],
     ["QWEN3_5_7B_INST_Q4", "/Users/x/.qvac/models/abc_qwen3.5-7b-instruct.gguf"],
     [undefined, "/cache/abc_qwen3-5-7b.gguf"],
+    // Qwen3.6 shares the same Pythonic-XML tool-call format as Qwen3.5
+    [undefined, "/cache/abc_Qwen3.6-7B-Instruct-Q4_K_M.gguf"],
+    ["QWEN3_6_7B_INST", "/Users/x/.qvac/models/abc_qwen3.6-7b-instruct.gguf"],
   ];
 
   for (const [name, path] of cases) {
@@ -694,6 +700,15 @@ test("parseGemma4NativeFormat: no open marker → matched=false", (t) => {
   t.is(result.toolCalls.length, 0);
 });
 
+test("parseGemma4NativeFormat: multiline string value is parsed correctly", (t) => {
+  const text = `<|tool_call>call:get_weather{city:<|"|>line1\nline2<|"|>}<tool_call|>`;
+  const result = parseGemma4NativeFormat(text, pythonicTools);
+  t.is(result.matched, true);
+  t.is(result.errors.length, 0);
+  t.is(result.toolCalls.length, 1);
+  t.is(result.toolCalls[0]?.arguments?.city, "line1\nline2");
+});
+
 test("parseToolCalls(dialect=gemma4): parses Gemma4 native format", (t) => {
   const text = `<|tool_call>call:get_weather{city:<|"|>Berlin<|"|>}<tool_call|>`;
   const { toolCalls, errors } = parseToolCalls(text, pythonicTools, "gemma4");

From c6d1933ac2fe6f9399f0edff1982983868e22a4d Mon Sep 17 00:00:00 2001
From: Ridwan Taiwo <donriddo@gmail.com>
Date: Mon, 11 May 2026 20:39:03 +0100
Subject: [PATCH 05/13] fix: update toolDialect docs to list all dialects; add
 qwen35/gemma4 normalizer tests

---
 packages/sdk/client/api/completion-stream.ts  |  2 +-
 .../examples/tools/llamacpp-tools-qwen35.ts   |  4 +-
 .../test/unit/completion-normalizer.test.ts   | 88 +++++++++++++++++++
 3 files changed, 91 insertions(+), 3 deletions(-)

diff --git a/packages/sdk/client/api/completion-stream.ts b/packages/sdk/client/api/completion-stream.ts
index eac63fd1ab..9172359280 100644
--- a/packages/sdk/client/api/completion-stream.ts
+++ b/packages/sdk/client/api/completion-stream.ts
@@ -54,7 +54,7 @@ type CompletionParams = Omit<CompletionClientParams, "tools"> & {
  * @param params.mcp - Optional array of MCP client inputs for tool integration
  * @param params.captureThinking - Best-effort parsing of `<think>` blocks into `thinkingDelta` events; `final.raw.fullText` always preserves the original output
  * @param params.emitRawDeltas - When true, every raw model token is also emitted as a `rawDelta` event
- * @param params.toolDialect - Override the SDK's name-based dialect detection. Use when your model emits a known format (`"hermes"`, `"pythonic"`, or `"json"`) the auto-router doesn't recognise. Drives both streaming frame detection and finalization parsing.
+ * @param params.toolDialect - Override the SDK's name-based dialect detection. Supported values: `"hermes"`, `"pythonic"`, `"json"`, `"harmony"`, `"qwen35"` (Qwen3.5/3.6), `"gemma4"`. Use when the auto-router doesn't recognise your model name. Drives both streaming frame detection and finalization parsing.
  * Common override case: Llama 3.x tool-calling fine-tunes that emit the native pythonic header (`<|start_header_id|>tool_call<|end_header_id|>...<|eot_id|>`).
  * @param params.responseFormat - Optional structured-output constraint applied to the model's output:
  *   - `{ type: "text" }` — no constraint (default behavior)
diff --git a/packages/sdk/examples/tools/llamacpp-tools-qwen35.ts b/packages/sdk/examples/tools/llamacpp-tools-qwen35.ts
index ebd2797c63..7d8c571454 100644
--- a/packages/sdk/examples/tools/llamacpp-tools-qwen35.ts
+++ b/packages/sdk/examples/tools/llamacpp-tools-qwen35.ts
@@ -5,8 +5,8 @@
  *   <tool_call><function=NAME><parameter=KEY>VALUE</parameter></function></tool_call>
  *
  * The dialect is auto-detected from the model name/path when the model file
- * contains "qwen3.5" or "qwen3-5". Pass toolDialect: "qwen35" explicitly to
- * completion() if auto-detection does not pick it up for a given file name.
+ * contains "qwen3.5", "qwen3-5", "qwen3.6", or "qwen3-6". Pass
+ * toolDialect: "qwen35" explicitly if auto-detection does not pick it up.
  *
  * Usage:
  *   bun run bare:example dist/examples/tools/llamacpp-tools-qwen35.js <model-url>
diff --git a/packages/sdk/test/unit/completion-normalizer.test.ts b/packages/sdk/test/unit/completion-normalizer.test.ts
index f92a8f9c48..f5b92f7575 100644
--- a/packages/sdk/test/unit/completion-normalizer.test.ts
+++ b/packages/sdk/test/unit/completion-normalizer.test.ts
@@ -770,3 +770,91 @@ test("harmony spec defined but hermes dialect still strips <think> as before", (
   t.alike(texts(events, "thinkingDelta"), ["thought"]);
   t.alike(texts(events, "contentDelta"), ["A", "B"]);
 });
+
+test("qwen35 streaming: tool frame emits toolCall mid-stream", (t) => {
+  const n = createCompletionNormalizer(
+    baseConfig({
+      capabilities: TEXT_PARSE_CAPS,
+      tools: [GET_WEATHER_TOOL],
+      toolDialect: "qwen35",
+    }),
+  );
+  const text = `<tool_call><function=get_weather><parameter=city>Paris</parameter></function></tool_call>`;
+  const events = [...pushAll(n, [text]), ...n.finish()];
+  const toolEvents = events.filter((e) => e.type === "toolCall");
+  t.is(toolEvents.length, 1, "qwen35 tool frame emits toolCall");
+  t.is((toolEvents[0] as { call: { name: string } }).call.name, "get_weather");
+  t.alike((toolEvents[0] as { call: { arguments: unknown } }).call.arguments, { city: "Paris" });
+  const contentJoined = texts(events, "contentDelta").join("");
+  t.absent(contentJoined.includes("<tool_call>"), "open marker must not leak");
+  t.absent(contentJoined.includes("</tool_call>"), "close marker must not leak");
+});
+
+test("qwen35 streaming: marker split across pushes still detected", (t) => {
+  const n = createCompletionNormalizer(
+    baseConfig({
+      capabilities: TEXT_PARSE_CAPS,
+      tools: [GET_WEATHER_TOOL],
+      toolDialect: "qwen35",
+    }),
+  );
+  const events = pushAll(n, [
+    "<tool_",
+    "call><function=get_weather><parameter=city>Lima</parameter></function></tool_call>",
+  ]);
+  const toolEvents = events.filter((e) => e.type === "toolCall");
+  t.is(toolEvents.length, 1, "qwen35 frame detected across split marker");
+});
+
+test("gemma4 streaming: tool frame emits toolCall mid-stream", (t) => {
+  const n = createCompletionNormalizer(
+    baseConfig({
+      capabilities: TEXT_PARSE_CAPS,
+      tools: [GET_WEATHER_TOOL],
+      toolDialect: "gemma4",
+    }),
+  );
+  const text = `<|tool_call>call:get_weather{city:<|"|>Tokyo<|"|>}<tool_call|>`;
+  const events = [...pushAll(n, [text]), ...n.finish()];
+  const toolEvents = events.filter((e) => e.type === "toolCall");
+  t.is(toolEvents.length, 1, "gemma4 tool frame emits toolCall");
+  t.is((toolEvents[0] as { call: { name: string } }).call.name, "get_weather");
+  t.alike((toolEvents[0] as { call: { arguments: unknown } }).call.arguments, { city: "Tokyo" });
+  const contentJoined = texts(events, "contentDelta").join("");
+  t.absent(contentJoined.includes("<|tool_call>"), "open marker must not leak");
+  t.absent(contentJoined.includes("<tool_call|>"), "close marker must not leak");
+});
+
+test("gemma4 thought frame: inner emitted as thinkingDelta (captureThinking=true)", (t) => {
+  const n = createCompletionNormalizer(
+    baseConfig({
+      capabilities: NONE_CAPS,
+      toolDialect: "gemma4",
+      captureThinking: true,
+    }),
+  );
+  const text = `<|channel>thoughtthinking here<channel|>after`;
+  const events = [...pushAll(n, [text]), ...n.finish()];
+  t.alike(texts(events, "thinkingDelta"), ["thinking here"]);
+  t.alike(texts(events, "contentDelta"), ["after"]);
+  t.is(n.getAccumulated().thinkingText, "thinking here");
+  t.is(n.getAccumulated().contentText, "after");
+});
+
+test("gemma4 thought frame: silently dropped (captureThinking=false)", (t) => {
+  const n = createCompletionNormalizer(
+    baseConfig({
+      capabilities: NONE_CAPS,
+      toolDialect: "gemma4",
+      captureThinking: false,
+    }),
+  );
+  const text = `<|channel>thoughtthinking here<channel|>after`;
+  const events = [...pushAll(n, [text]), ...n.finish()];
+  t.absent(types(events).includes("thinkingDelta"), "no thinkingDelta when not captured");
+  t.alike(texts(events, "contentDelta"), ["after"]);
+  const contentJoined = texts(events, "contentDelta").join("");
+  t.absent(contentJoined.includes("<|channel>thought"), "open marker must not leak");
+  t.absent(contentJoined.includes("<channel|>"), "close marker must not leak");
+  t.absent(contentJoined.includes("thinking here"), "thought inner must be dropped");
+});

From 2310ace28bb5ce6a65e863ca0bb78b69b02028ad Mon Sep 17 00:00:00 2001
From: Ridwan Taiwo <donriddo@gmail.com>
Date: Tue, 12 May 2026 07:30:36 +0100
Subject: [PATCH 06/13] fix: harden qwen35 coercion errors and gemma4
 control-char escaping

- qwen35: boolean coercion now throws on non-"true"/"false" values
  ("True" from Python models) instead of silently returning false
- qwen35: integer/number coercion now throws on NaN values
- qwen35: parameter coercion errors caught per-call and surfaced as
  PARSE_ERROR instead of propagating as uncaught exceptions
- gemma4: control-char escape regex corrected to cover full U+0000-U+001F
  range using \x00-\x1f escape-sequence text
- add 19 new unit tests: typed coercions, error cases, multiple calls,
  unknown-tool and validation errors, hermes-JSON fallback in qwen35 chain,
  bare numerics/booleans, nested objects/arrays, tab and CR round-trips,
  malformed-args PARSE_ERROR in gemma4
---
 .../utils/tools/parsers/gemma4native.ts       |   2 +-
 .../sdk/server/utils/tools/parsers/qwen35.ts  |  23 +-
 packages/sdk/test/unit/tool-parser.test.ts    | 266 ++++++++++++++++++
 3 files changed, 286 insertions(+), 5 deletions(-)

diff --git a/packages/sdk/server/utils/tools/parsers/gemma4native.ts b/packages/sdk/server/utils/tools/parsers/gemma4native.ts
index 248bca76f5..ab6b17aa71 100644
--- a/packages/sdk/server/utils/tools/parsers/gemma4native.ts
+++ b/packages/sdk/server/utils/tools/parsers/gemma4native.ts
@@ -18,7 +18,7 @@ function gemmaArgsToJson(argsRaw: string): string {
     .map((part, i) =>
       i % 2 === 0
         ? part.replace(/([{,]\s*)([A-Za-z_]\w*)\s*:/g, '$1"$2":')
-        : '"' + part.replace(/\\/g, "\\\\").replace(/"/g, '\\"').replace(/\n/g, "\\n").replace(/\r/g, "\\r") + '"',
+        : '"' + part.replace(/\\/g, "\\\\").replace(/"/g, '\\"').replace(/[\x00-\x1f]/g, (c) => `\\u${c.charCodeAt(0).toString(16).padStart(4, "0")}`) + '"',
     )
     .join("");
 }
diff --git a/packages/sdk/server/utils/tools/parsers/qwen35.ts b/packages/sdk/server/utils/tools/parsers/qwen35.ts
index 1ec49a5a03..4ccf5f6e21 100644
--- a/packages/sdk/server/utils/tools/parsers/qwen35.ts
+++ b/packages/sdk/server/utils/tools/parsers/qwen35.ts
@@ -15,10 +15,15 @@ function coerceParamValue(
   if (!schema?.type) return trimmed;
   switch (schema.type) {
     case "number":
-    case "integer":
-      return Number(trimmed);
+    case "integer": {
+      const n = Number(trimmed);
+      if (Number.isNaN(n)) throw new Error(`invalid numeric value: "${trimmed}"`);
+      return n;
+    }
     case "boolean":
-      return trimmed === "true";
+      if (trimmed === "true") return true;
+      if (trimmed === "false") return false;
+      throw new Error(`invalid boolean value: "${trimmed}"`);
     case "array":
     case "object":
       try {
@@ -94,9 +99,19 @@ export function parseQwen35Format(text: string, tools: Tool[]): ParserResult {
     const args: Record<string, unknown> = {};
     const paramRegex = /<parameter=([^>\s]+)\s*>([\s\S]*?)<\/parameter>/gi;
     let pm: RegExpExecArray | null;
+    let coercionError: string | undefined;
     while ((pm = paramRegex.exec(paramsBlock)) !== null) {
       const paramName = pm[1]!.trim();
-      args[paramName] = coerceParamValue(pm[2]!, schemaProperties[paramName]);
+      try {
+        args[paramName] = coerceParamValue(pm[2]!, schemaProperties[paramName]);
+      } catch (err) {
+        coercionError = err instanceof Error ? err.message : String(err);
+        break;
+      }
+    }
+    if (coercionError !== undefined) {
+      errors.push({ code: "PARSE_ERROR", message: coercionError, raw: inner });
+      continue;
     }
 
     const validation = validateToolArguments(name, args, tools);
diff --git a/packages/sdk/test/unit/tool-parser.test.ts b/packages/sdk/test/unit/tool-parser.test.ts
index a5a3eb2f2a..5131c26efa 100644
--- a/packages/sdk/test/unit/tool-parser.test.ts
+++ b/packages/sdk/test/unit/tool-parser.test.ts
@@ -717,3 +717,269 @@ test("parseToolCalls(dialect=gemma4): parses Gemma4 native format", (t) => {
   t.is(toolCalls[0]?.name, "get_weather");
   t.alike(toolCalls[0]?.arguments, { city: "Berlin" });
 });
+
+// --- qwen35 coercion and error-surface tests ---
+
+test("parseQwen35Format: integer param is coerced to number", (t) => {
+  const typedTool: Tool = {
+    type: "function",
+    name: "typed",
+    description: "typed",
+    parameters: {
+      type: "object",
+      properties: { count: { type: "integer" }, label: { type: "string" } },
+      required: ["count"],
+    },
+  };
+  const text = `<tool_call><function=typed><parameter=count>42</parameter><parameter=label>hello</parameter></function></tool_call>`;
+  const result = parseQwen35Format(text, [typedTool]);
+  t.is(result.matched, true);
+  t.is(result.errors.length, 0);
+  t.is(result.toolCalls.length, 1);
+  t.is(result.toolCalls[0]?.arguments?.count, 42);
+  t.is(result.toolCalls[0]?.arguments?.label, "hello");
+});
+
+test("parseQwen35Format: boolean param 'true' coerces to true", (t) => {
+  const typedTool: Tool = {
+    type: "function",
+    name: "typed",
+    description: "typed",
+    parameters: {
+      type: "object",
+      properties: { count: { type: "integer" }, flag: { type: "boolean" } },
+      required: ["count"],
+    },
+  };
+  const text = `<tool_call><function=typed><parameter=count>1</parameter><parameter=flag>true</parameter></function></tool_call>`;
+  const result = parseQwen35Format(text, [typedTool]);
+  t.is(result.matched, true);
+  t.is(result.errors.length, 0);
+  t.is(result.toolCalls[0]?.arguments?.flag, true);
+});
+
+test("parseQwen35Format: boolean param 'false' coerces to false", (t) => {
+  const typedTool: Tool = {
+    type: "function",
+    name: "typed",
+    description: "typed",
+    parameters: {
+      type: "object",
+      properties: { count: { type: "integer" }, flag: { type: "boolean" } },
+      required: ["count"],
+    },
+  };
+  const text = `<tool_call><function=typed><parameter=count>1</parameter><parameter=flag>false</parameter></function></tool_call>`;
+  const result = parseQwen35Format(text, [typedTool]);
+  t.is(result.matched, true);
+  t.is(result.errors.length, 0);
+  t.is(result.toolCalls[0]?.arguments?.flag, false);
+});
+
+test("parseQwen35Format: boolean param 'True' (uppercase) surfaces PARSE_ERROR", (t) => {
+  const typedTool: Tool = {
+    type: "function",
+    name: "typed",
+    description: "typed",
+    parameters: {
+      type: "object",
+      properties: { count: { type: "integer" }, flag: { type: "boolean" } },
+      required: ["count"],
+    },
+  };
+  const text = `<tool_call><function=typed><parameter=count>1</parameter><parameter=flag>True</parameter></function></tool_call>`;
+  const result = parseQwen35Format(text, [typedTool]);
+  t.is(result.matched, true);
+  t.is(result.toolCalls.length, 0);
+  t.is(result.errors.length, 1);
+  t.is(result.errors[0]?.code, "PARSE_ERROR");
+});
+
+test("parseQwen35Format: integer param 'not-a-number' surfaces PARSE_ERROR", (t) => {
+  const typedTool: Tool = {
+    type: "function",
+    name: "typed",
+    description: "typed",
+    parameters: {
+      type: "object",
+      properties: { count: { type: "integer" } },
+      required: ["count"],
+    },
+  };
+  const text = `<tool_call><function=typed><parameter=count>not-a-number</parameter></function></tool_call>`;
+  const result = parseQwen35Format(text, [typedTool]);
+  t.is(result.matched, true);
+  t.is(result.toolCalls.length, 0);
+  t.is(result.errors.length, 1);
+  t.is(result.errors[0]?.code, "PARSE_ERROR");
+});
+
+test("parseQwen35Format: array param is parsed from JSON", (t) => {
+  const typedTool: Tool = {
+    type: "function",
+    name: "typed",
+    description: "typed",
+    parameters: {
+      type: "object",
+      properties: { count: { type: "integer" }, tags: { type: "array" } },
+      required: ["count"],
+    },
+  };
+  const text = `<tool_call><function=typed><parameter=count>1</parameter><parameter=tags>["a","b","c"]</parameter></function></tool_call>`;
+  const result = parseQwen35Format(text, [typedTool]);
+  t.is(result.matched, true);
+  t.is(result.errors.length, 0);
+  t.alike(result.toolCalls[0]?.arguments?.tags, ["a", "b", "c"]);
+});
+
+test("parseQwen35Format: multiple tool calls are all parsed", (t) => {
+  const text = `<tool_call><function=get_weather><parameter=city>Paris</parameter></function></tool_call>
+<tool_call><function=get_horoscope><parameter=sign>Aries</parameter></function></tool_call>`;
+  const result = parseQwen35Format(text, pythonicTools);
+  t.is(result.matched, true);
+  t.is(result.errors.length, 0);
+  t.is(result.toolCalls.length, 2);
+  t.is(result.toolCalls[0]?.name, "get_weather");
+  t.is(result.toolCalls[1]?.name, "get_horoscope");
+});
+
+test("parseQwen35Format: unknown tool name surfaces UNKNOWN_TOOL", (t) => {
+  const text = `<tool_call><function=unknown_fn><parameter=x>1</parameter></function></tool_call>`;
+  const result = parseQwen35Format(text, pythonicTools);
+  t.is(result.matched, true);
+  t.is(result.toolCalls.length, 0);
+  t.is(result.errors.length, 1);
+  t.is(result.errors[0]?.code, "UNKNOWN_TOOL");
+});
+
+test("parseQwen35Format: missing required param surfaces VALIDATION_ERROR", (t) => {
+  const text = `<tool_call><function=get_weather><parameter=country>FR</parameter></function></tool_call>`;
+  const result = parseQwen35Format(text, pythonicTools);
+  t.is(result.matched, true);
+  t.is(result.toolCalls.length, 0);
+  t.is(result.errors.length, 1);
+  t.is(result.errors[0]?.code, "VALIDATION_ERROR");
+});
+
+test("parseToolCalls(dialect=qwen35): JSON inside tool_call falls through to hermes parser", (t) => {
+  const text = `<tool_call>
+{"name": "get_weather", "arguments": {"city": "Seoul"}}
+</tool_call>`;
+  const { toolCalls, errors } = parseToolCalls(text, pythonicTools, "qwen35");
+  t.is(errors.length, 0);
+  t.is(toolCalls.length, 1);
+  t.is(toolCalls[0]?.name, "get_weather");
+  t.alike(toolCalls[0]?.arguments, { city: "Seoul" });
+});
+
+// --- gemma4 structural and error-surface tests ---
+
+test("parseGemma4NativeFormat: bare numeric arg is parsed as number", (t) => {
+  const numTool: Tool = {
+    type: "function",
+    name: "typed",
+    description: "typed",
+    parameters: { type: "object", properties: { count: { type: "integer" } }, required: ["count"] },
+  };
+  const text = `<|tool_call>call:typed{count:7}<tool_call|>`;
+  const result = parseGemma4NativeFormat(text, [numTool]);
+  t.is(result.matched, true);
+  t.is(result.errors.length, 0);
+  t.is(result.toolCalls[0]?.arguments?.count, 7);
+});
+
+test("parseGemma4NativeFormat: bare boolean arg is parsed as boolean", (t) => {
+  const boolTool: Tool = {
+    type: "function",
+    name: "toggle",
+    description: "toggle",
+    parameters: { type: "object", properties: { enabled: { type: "boolean" } }, required: ["enabled"] },
+  };
+  const text = `<|tool_call>call:toggle{enabled:true}<tool_call|>`;
+  const result = parseGemma4NativeFormat(text, [boolTool]);
+  t.is(result.matched, true);
+  t.is(result.errors.length, 0);
+  t.is(result.toolCalls[0]?.arguments?.enabled, true);
+});
+
+test("parseGemma4NativeFormat: nested object arg is parsed correctly", (t) => {
+  const searchTool: Tool = {
+    type: "function",
+    name: "search",
+    description: "search",
+    parameters: {
+      type: "object",
+      properties: { query: { type: "string" }, filters: { type: "object" } },
+      required: ["query"],
+    },
+  };
+  const text = `<|tool_call>call:search{query:<|"|>test<|"|>,filters:{active:true,limit:10}}<tool_call|>`;
+  const result = parseGemma4NativeFormat(text, [searchTool]);
+  t.is(result.matched, true);
+  t.is(result.errors.length, 0);
+  t.alike(result.toolCalls[0]?.arguments?.filters, { active: true, limit: 10 });
+  t.is(result.toolCalls[0]?.arguments?.query, "test");
+});
+
+test("parseGemma4NativeFormat: nested array arg is parsed correctly", (t) => {
+  const arrayTool: Tool = {
+    type: "function",
+    name: "get_weather",
+    description: "weather",
+    parameters: {
+      type: "object",
+      properties: { city: { type: "string" }, ids: { type: "array" } },
+      required: ["city"],
+    },
+  };
+  const text = `<|tool_call>call:get_weather{city:<|"|>Paris<|"|>,ids:[1,2,3]}<tool_call|>`;
+  const result = parseGemma4NativeFormat(text, [arrayTool]);
+  t.is(result.matched, true);
+  t.is(result.errors.length, 0);
+  t.alike(result.toolCalls[0]?.arguments?.ids, [1, 2, 3]);
+});
+
+test("parseGemma4NativeFormat: tab char in string value round-trips correctly", (t) => {
+  const text = `<|tool_call>call:get_weather{city:<|"|>col1\tcol2<|"|>}<tool_call|>`;
+  const result = parseGemma4NativeFormat(text, pythonicTools);
+  t.is(result.matched, true);
+  t.is(result.errors.length, 0);
+  t.is(result.toolCalls[0]?.arguments?.city, "col1\tcol2");
+});
+
+test("parseGemma4NativeFormat: CR char in string value round-trips correctly", (t) => {
+  const text = `<|tool_call>call:get_weather{city:<|"|>line1\rline2<|"|>}<tool_call|>`;
+  const result = parseGemma4NativeFormat(text, pythonicTools);
+  t.is(result.matched, true);
+  t.is(result.errors.length, 0);
+  t.is(result.toolCalls[0]?.arguments?.city, "line1\rline2");
+});
+
+test("parseGemma4NativeFormat: multiple tool calls are all parsed", (t) => {
+  const text = `<|tool_call>call:get_weather{city:<|"|>London<|"|>}<tool_call|>
+<|tool_call>call:get_horoscope{sign:<|"|>Leo<|"|>}<tool_call|>`;
+  const result = parseGemma4NativeFormat(text, pythonicTools);
+  t.is(result.matched, true);
+  t.is(result.errors.length, 0);
+  t.is(result.toolCalls.length, 2);
+  t.is(result.toolCalls[0]?.name, "get_weather");
+  t.is(result.toolCalls[1]?.name, "get_horoscope");
+});
+
+test("parseGemma4NativeFormat: unknown tool name surfaces UNKNOWN_TOOL", (t) => {
+  const text = `<|tool_call>call:unknown_fn{x:<|"|>y<|"|>}<tool_call|>`;
+  const result = parseGemma4NativeFormat(text, pythonicTools);
+  t.is(result.matched, true);
+  t.is(result.toolCalls.length, 0);
+  t.is(result.errors.length, 1);
+  t.is(result.errors[0]?.code, "UNKNOWN_TOOL");
+});
+
+test("parseGemma4NativeFormat: malformed args (trailing comma) surface PARSE_ERROR", (t) => {
+  const text = `<|tool_call>call:get_weather{city:<|"|>Paris<|"|>,}<tool_call|>`;
+  const result = parseGemma4NativeFormat(text, pythonicTools);
+  t.is(result.matched, true);
+  t.is(result.toolCalls.length, 0);
+  t.is(result.errors.length, 1);
+  t.is(result.errors[0]?.code, "PARSE_ERROR");
+});

From 85beb8416c92d5b8bc41d4986e133babba0949a8 Mon Sep 17 00:00:00 2001
From: Ridwan Taiwo <donriddo@gmail.com>
Date: Tue, 12 May 2026 07:33:09 +0100
Subject: [PATCH 07/13] fix: align qwen35 coercion error handling with
 pythonic/hermes pattern

Wrap the full parameter extraction block in a single try/catch instead
of an inner try/catch inside the while loop. Matches the convention used
by parsePythonicFormat and parseHermesFormat.
---
 .../sdk/server/utils/tools/parsers/qwen35.ts  | 21 +++++++++----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/packages/sdk/server/utils/tools/parsers/qwen35.ts b/packages/sdk/server/utils/tools/parsers/qwen35.ts
index 4ccf5f6e21..486c76ec5a 100644
--- a/packages/sdk/server/utils/tools/parsers/qwen35.ts
+++ b/packages/sdk/server/utils/tools/parsers/qwen35.ts
@@ -97,20 +97,19 @@ export function parseQwen35Format(text: string, tools: Tool[]): ParserResult {
     const schemaProperties = tool?.parameters?.properties ?? {};
 
     const args: Record<string, unknown> = {};
-    const paramRegex = /<parameter=([^>\s]+)\s*>([\s\S]*?)<\/parameter>/gi;
-    let pm: RegExpExecArray | null;
-    let coercionError: string | undefined;
-    while ((pm = paramRegex.exec(paramsBlock)) !== null) {
-      const paramName = pm[1]!.trim();
-      try {
+    let parseError: string | undefined;
+    try {
+      const paramRegex = /<parameter=([^>\s]+)\s*>([\s\S]*?)<\/parameter>/gi;
+      let pm: RegExpExecArray | null;
+      while ((pm = paramRegex.exec(paramsBlock)) !== null) {
+        const paramName = pm[1]!.trim();
         args[paramName] = coerceParamValue(pm[2]!, schemaProperties[paramName]);
-      } catch (err) {
-        coercionError = err instanceof Error ? err.message : String(err);
-        break;
       }
+    } catch (err) {
+      parseError = err instanceof Error ? err.message : String(err);
     }
-    if (coercionError !== undefined) {
-      errors.push({ code: "PARSE_ERROR", message: coercionError, raw: inner });
+    if (parseError !== undefined) {
+      errors.push({ code: "PARSE_ERROR", message: parseError, raw: inner });
       continue;
     }
 

From cfa382060219c4a44ddd78d35b532f5f80193930 Mon Sep 17 00:00:00 2001
From: Ridwan Taiwo <donriddo@gmail.com>
Date: Tue, 12 May 2026 07:47:50 +0100
Subject: [PATCH 08/13] test: fix incorrect comments in dialect negative-case
 tests

- gemma3 comment: removed misleading "Q4 quantization suffix" framing;
  the real concern is a Gemma 3 4B model not being detected as Gemma 4
- gemma-40b comment: corrected factually wrong "4 billion params" description;
  the actual mechanism is the trailing '0' digit blocking the gemma4 lookahead
---
 packages/sdk/test/unit/tool-parser.test.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/packages/sdk/test/unit/tool-parser.test.ts b/packages/sdk/test/unit/tool-parser.test.ts
index 5131c26efa..eec1492cba 100644
--- a/packages/sdk/test/unit/tool-parser.test.ts
+++ b/packages/sdk/test/unit/tool-parser.test.ts
@@ -270,7 +270,7 @@ test("detectToolDialectFromName: non-LFM models default to hermes", (t) => {
     [undefined, "/cache/abc_Llama-3.3-70B-Instruct-Tool-Calling.gguf"],
     [undefined, ""],
     ["", ""],
-    // Gemma3 with Q4 quantization suffix must not be mistaken for Gemma4 model generation
+    // Gemma 3 models (including 4B size variant) must not be detected as Gemma 4
     [undefined, "/cache/abc_gemma3-Q4_K_M.gguf"],
     ["GEMMA3_Q4", "/Users/x/.qvac/models/abc_gemma-3-4b-q4_k_m.gguf"],
     // Qwen3 5B (5 billion params) must not be mistaken for Qwen3.5 (model version 3.5)
@@ -282,7 +282,7 @@ test("detectToolDialectFromName: non-LFM models default to hermes", (t) => {
     // Qwen3 60B must not be mistaken for Qwen3.6 (digit after 6, not a letter)
     [undefined, "/cache/abc_Qwen3-60B-Instruct-Q4_K_M.gguf"],
     ["QWEN3_60B_INST", "/Users/x/.qvac/models/abc_qwen3-60b-instruct.gguf"],
-    // Gemma-4 billion params (generation 3, 4B size) must not be mistaken for Gemma 4 generation
+    // gemma-40b contains 'gemma-4' as a substring but the trailing '0' (digit) blocks the gemma4 lookahead
     [undefined, "/cache/abc_gemma-40b-Q4_K_M.gguf"],
   ];
 

From 35a9ebff986eb6c6955e0e632dbe756dd2e13b93 Mon Sep 17 00:00:00 2001
From: Ridwan Taiwo <donriddo@gmail.com>
Date: Tue, 12 May 2026 07:53:21 +0100
Subject: [PATCH 09/13] test: remove confusing dialect negative-case comments
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

'digit after 5, not a letter' and 'digit after 6, not a letter' were
both wrong — the negative lookahead (?![a-z0-9]) blocks any alphanumeric
character, not just digits. Remove rather than rephrase.
---
 packages/sdk/test/unit/tool-parser.test.ts | 2 --
 1 file changed, 2 deletions(-)

diff --git a/packages/sdk/test/unit/tool-parser.test.ts b/packages/sdk/test/unit/tool-parser.test.ts
index eec1492cba..f35c04ea77 100644
--- a/packages/sdk/test/unit/tool-parser.test.ts
+++ b/packages/sdk/test/unit/tool-parser.test.ts
@@ -276,10 +276,8 @@ test("detectToolDialectFromName: non-LFM models default to hermes", (t) => {
     // Qwen3 5B (5 billion params) must not be mistaken for Qwen3.5 (model version 3.5)
     [undefined, "/cache/abc_Qwen3-5B-Instruct-Q4_K_M.gguf"],
     ["QWEN3_5B_INST", "/Users/x/.qvac/models/abc_qwen3-5b-instruct.gguf"],
-    // Qwen3 50B must not be mistaken for Qwen3.5 (digit after 5, not a letter)
     [undefined, "/cache/abc_Qwen3-50B-Instruct-Q4_K_M.gguf"],
     ["QWEN3_50B_INST", "/Users/x/.qvac/models/abc_qwen3-50b-instruct.gguf"],
-    // Qwen3 60B must not be mistaken for Qwen3.6 (digit after 6, not a letter)
     [undefined, "/cache/abc_Qwen3-60B-Instruct-Q4_K_M.gguf"],
     ["QWEN3_60B_INST", "/Users/x/.qvac/models/abc_qwen3-60b-instruct.gguf"],
     // gemma-40b contains 'gemma-4' as a substring but the trailing '0' (digit) blocks the gemma4 lookahead

From b3a390dfc888818608afc25d5ffd79fe86243e05 Mon Sep 17 00:00:00 2001
From: Ridwan Taiwo <donriddo@gmail.com>
Date: Tue, 12 May 2026 08:11:18 +0100
Subject: [PATCH 10/13] fix: reject non-integer floats and malformed
 array/object params in qwen35 parser

integer schema type now rejects non-integer floats (e.g. 1.5) via
Number.isInteger check. array/object schema types now propagate PARSE_ERROR
on JSON.parse failure instead of silently falling back to the raw string.

Add regression tests for both cases.
---
 .../sdk/server/utils/tools/parsers/qwen35.ts  | 14 ++---
 packages/sdk/test/unit/tool-parser.test.ts    | 57 +++++++++++++++++++
 2 files changed, 64 insertions(+), 7 deletions(-)

diff --git a/packages/sdk/server/utils/tools/parsers/qwen35.ts b/packages/sdk/server/utils/tools/parsers/qwen35.ts
index 486c76ec5a..28af36ccdb 100644
--- a/packages/sdk/server/utils/tools/parsers/qwen35.ts
+++ b/packages/sdk/server/utils/tools/parsers/qwen35.ts
@@ -14,23 +14,23 @@ function coerceParamValue(
   const trimmed = raw.trim();
   if (!schema?.type) return trimmed;
   switch (schema.type) {
-    case "number":
-    case "integer": {
+    case "number": {
       const n = Number(trimmed);
       if (Number.isNaN(n)) throw new Error(`invalid numeric value: "${trimmed}"`);
       return n;
     }
+    case "integer": {
+      const n = Number(trimmed);
+      if (Number.isNaN(n) || !Number.isInteger(n)) throw new Error(`invalid integer value: "${trimmed}"`);
+      return n;
+    }
     case "boolean":
       if (trimmed === "true") return true;
       if (trimmed === "false") return false;
       throw new Error(`invalid boolean value: "${trimmed}"`);
     case "array":
     case "object":
-      try {
-        return JSON.parse(trimmed);
-      } catch {
-        return trimmed;
-      }
+      return JSON.parse(trimmed);
     default:
       return trimmed;
   }
diff --git a/packages/sdk/test/unit/tool-parser.test.ts b/packages/sdk/test/unit/tool-parser.test.ts
index f35c04ea77..9171afa201 100644
--- a/packages/sdk/test/unit/tool-parser.test.ts
+++ b/packages/sdk/test/unit/tool-parser.test.ts
@@ -812,6 +812,63 @@ test("parseQwen35Format: integer param 'not-a-number' surfaces PARSE_ERROR", (t)
   t.is(result.errors[0]?.code, "PARSE_ERROR");
 });
 
+test("parseQwen35Format: integer param '1.5' (non-integer) surfaces PARSE_ERROR", (t) => {
+  const typedTool: Tool = {
+    type: "function",
+    name: "typed",
+    description: "typed",
+    parameters: {
+      type: "object",
+      properties: { count: { type: "integer" } },
+      required: ["count"],
+    },
+  };
+  const text = `<tool_call><function=typed><parameter=count>1.5</parameter></function></tool_call>`;
+  const result = parseQwen35Format(text, [typedTool]);
+  t.is(result.matched, true);
+  t.is(result.toolCalls.length, 0);
+  t.is(result.errors.length, 1);
+  t.is(result.errors[0]?.code, "PARSE_ERROR");
+});
+
+test("parseQwen35Format: malformed array param surfaces PARSE_ERROR (no raw-string fallback)", (t) => {
+  const typedTool: Tool = {
+    type: "function",
+    name: "typed",
+    description: "typed",
+    parameters: {
+      type: "object",
+      properties: { count: { type: "integer" }, tags: { type: "array" } },
+      required: ["count"],
+    },
+  };
+  const text = `<tool_call><function=typed><parameter=count>1</parameter><parameter=tags>[1,2</parameter></function></tool_call>`;
+  const result = parseQwen35Format(text, [typedTool]);
+  t.is(result.matched, true);
+  t.is(result.toolCalls.length, 0);
+  t.is(result.errors.length, 1);
+  t.is(result.errors[0]?.code, "PARSE_ERROR");
+});
+
+test("parseQwen35Format: malformed object param surfaces PARSE_ERROR (no raw-string fallback)", (t) => {
+  const typedTool: Tool = {
+    type: "function",
+    name: "typed",
+    description: "typed",
+    parameters: {
+      type: "object",
+      properties: { count: { type: "integer" }, meta: { type: "object" } },
+      required: ["count"],
+    },
+  };
+  const text = `<tool_call><function=typed><parameter=count>1</parameter><parameter=meta>{bad json</parameter></function></tool_call>`;
+  const result = parseQwen35Format(text, [typedTool]);
+  t.is(result.matched, true);
+  t.is(result.toolCalls.length, 0);
+  t.is(result.errors.length, 1);
+  t.is(result.errors[0]?.code, "PARSE_ERROR");
+});
+
 test("parseQwen35Format: array param is parsed from JSON", (t) => {
   const typedTool: Tool = {
     type: "function",

From 6ec9e74f8c5e27e8cb0c99dc1a9ac8934235c025 Mon Sep 17 00:00:00 2001
From: Ridwan Taiwo <donriddo@gmail.com>
Date: Tue, 12 May 2026 13:52:11 +0100
Subject: [PATCH 11/13] fix: expose reasoning_budget as boolean in CLI,
 transform to -1|0 for SDK
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

SDKGenerationParams.reasoning_budget changes from -1|0 (SDK-internal
representation) to boolean (true = keep reasoning on, false = disable).
sdkCompletion now maps true→-1 and false→0 before forwarding to the SDK.
extractGenerationParams parses incoming boolean reasoning_budget from the
request body. Tests added for both true and false paths.
---
 .../cli/src/serve/adapters/openai/translate.ts  |  2 ++
 packages/cli/src/serve/core/sdk.ts              |  9 +++++++--
 packages/cli/test/translate.test.ts             | 17 +++++++++++++++++
 3 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/packages/cli/src/serve/adapters/openai/translate.ts b/packages/cli/src/serve/adapters/openai/translate.ts
index 74d910b8eb..618944ca68 100644
--- a/packages/cli/src/serve/adapters/openai/translate.ts
+++ b/packages/cli/src/serve/adapters/openai/translate.ts
@@ -146,6 +146,8 @@ export function extractGenerationParams (body: Record<string, unknown>): SDKGene
   if (typeof body['max_tokens'] === 'number') params.predict = body['max_tokens']
   if (typeof body['max_completion_tokens'] === 'number') params.predict = body['max_completion_tokens']
 
+  if (typeof body['reasoning_budget'] === 'boolean') params.reasoning_budget = body['reasoning_budget']
+
   return Object.keys(params).length > 0 ? params : undefined
 }
 
diff --git a/packages/cli/src/serve/core/sdk.ts b/packages/cli/src/serve/core/sdk.ts
index faac11614e..2327df9e99 100644
--- a/packages/cli/src/serve/core/sdk.ts
+++ b/packages/cli/src/serve/core/sdk.ts
@@ -11,7 +11,7 @@ export interface SDKGenerationParams {
   frequency_penalty?: number
   presence_penalty?: number
   repeat_penalty?: number
-  reasoning_budget?: -1 | 0
+  reasoning_budget?: boolean
 }
 
 export type SDKResponseFormat =
@@ -163,7 +163,12 @@ export async function sdkCompletion (opts: {
     params['tools'] = opts.tools
   }
   if (opts.generationParams) {
-    params['generationParams'] = opts.generationParams
+    const { reasoning_budget, ...rest } = opts.generationParams
+    const sdkGenParams: Record<string, unknown> = { ...rest }
+    if (reasoning_budget !== undefined) {
+      sdkGenParams['reasoning_budget'] = reasoning_budget ? -1 : 0
+    }
+    params['generationParams'] = sdkGenParams
   }
   if (opts.responseFormat) {
     params['responseFormat'] = opts.responseFormat
diff --git a/packages/cli/test/translate.test.ts b/packages/cli/test/translate.test.ts
index 5a1c1f8bcb..eb374a624a 100644
--- a/packages/cli/test/translate.test.ts
+++ b/packages/cli/test/translate.test.ts
@@ -344,6 +344,23 @@ describe('extractGenerationParams', () => {
     assert.equal(params.presence_penalty, 0.1)
   })
 
+  it('extracts reasoning_budget true', () => {
+    const params = extractGenerationParams({ reasoning_budget: true })
+    assert.ok(params)
+    assert.equal(params.reasoning_budget, true)
+  })
+
+  it('extracts reasoning_budget false', () => {
+    const params = extractGenerationParams({ reasoning_budget: false })
+    assert.ok(params)
+    assert.equal(params.reasoning_budget, false)
+  })
+
+  it('ignores non-boolean reasoning_budget', () => {
+    const params = extractGenerationParams({ reasoning_budget: -1 })
+    assert.equal(params, undefined)
+  })
+
   it('ignores non-number values', () => {
     const params = extractGenerationParams({ temperature: 'hot', max_tokens: '100' })
     assert.equal(params, undefined)

From 509f48e05e439cbb3a6f2b3aa5ce4c038b3c8bef Mon Sep 17 00:00:00 2001
From: Ridwan Taiwo <donriddo@gmail.com>
Date: Tue, 12 May 2026 13:52:27 +0100
Subject: [PATCH 12/13] feat: wire toolDialect and resourceKey through
 ToolsExecutor and createToolsTest

ToolsExecutor.generic now reads toolDialect (forwarded to completion()) and
resourceKey (selects which loaded model to use) from test params. The
createToolsTest helper accepts both as optional options, so dialect-specific
e2e test definitions can be added once the model constants are available
from update-models.
---
 .../tests-qvac/tests/shared/executors/tools-executor.ts    | 6 +++++-
 packages/sdk/tests-qvac/tests/tools-tests.ts               | 7 ++++++-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/packages/sdk/tests-qvac/tests/shared/executors/tools-executor.ts b/packages/sdk/tests-qvac/tests/shared/executors/tools-executor.ts
index be55a9a0ef..35cf887a1f 100644
--- a/packages/sdk/tests-qvac/tests/shared/executors/tools-executor.ts
+++ b/packages/sdk/tests-qvac/tests/shared/executors/tools-executor.ts
@@ -1,4 +1,5 @@
 import { completion } from "@qvac/sdk";
+import type { ToolDialect } from "@qvac/sdk";
 import {
   ValidationHelpers,
   type TestResult,
@@ -24,9 +25,11 @@ export class ToolsExecutor extends AbstractModelExecutor<typeof toolsTests> {
         parameters: Record<string, unknown>;
       }>;
       toolsMode?: "static" | "dynamic";
+      toolDialect?: ToolDialect;
+      resourceKey?: string;
       stream?: boolean;
     };
-    const resourceKey = p.toolsMode === "dynamic" ? "tools-dynamic" : "tools";
+    const resourceKey = p.resourceKey ?? (p.toolsMode === "dynamic" ? "tools-dynamic" : "tools");
     const toolsModelId = await this.resources.ensureLoaded(resourceKey);
 
     try {
@@ -35,6 +38,7 @@ export class ToolsExecutor extends AbstractModelExecutor<typeof toolsTests> {
         history: p.history,
         tools: p.tools as never,
         stream: p.stream ?? false,
+        ...(p.toolDialect && { toolDialect: p.toolDialect }),
       });
 
       const text = await result.text;
diff --git a/packages/sdk/tests-qvac/tests/tools-tests.ts b/packages/sdk/tests-qvac/tests/tools-tests.ts
index 271c3d62a5..72bac4b335 100644
--- a/packages/sdk/tests-qvac/tests/tools-tests.ts
+++ b/packages/sdk/tests-qvac/tests/tools-tests.ts
@@ -1,5 +1,6 @@
 // Tools/Function calling test definitions
 import type { TestDefinition } from "@tetherto/qvac-test-suite";
+import type { ToolDialect } from "@qvac/sdk";
 
 // Helper for creating tools tests
 const createToolsTest = (
@@ -21,6 +22,8 @@ const createToolsTest = (
       expectedType: "string" | "number" | "array";
     };
     toolsMode?: "static" | "dynamic";
+    toolDialect?: ToolDialect;
+    resourceKey?: string;
     suites?: string[];
   } = {},
 ): TestDefinition => {
@@ -29,7 +32,7 @@ const createToolsTest = (
     expectedType: "string" as const,
   };
   const dependency =
-    options.toolsMode === "dynamic" ? "tools-dynamic" : "tools";
+    options.resourceKey ?? (options.toolsMode === "dynamic" ? "tools-dynamic" : "tools");
   return {
     testId,
     params: {
@@ -37,6 +40,8 @@ const createToolsTest = (
       tools,
       stream: false,
       ...(options.toolsMode && { toolsMode: options.toolsMode }),
+      ...(options.toolDialect && { toolDialect: options.toolDialect }),
+      ...(options.resourceKey && { resourceKey: options.resourceKey }),
     },
     expectation,
     ...(options.suites && { suites: options.suites }),

From 1c9f0853eb42a622ed631c39ca1a578143ecf14e Mon Sep 17 00:00:00 2001
From: Ridwan Taiwo <donriddo@gmail.com>
Date: Tue, 12 May 2026 14:55:09 +0100
Subject: [PATCH 13/13] fix: reject empty numeric params in qwen35, allow
 hyphens in gemma4 tool names, add qwen35 to default parser chain

- coerceParamValue: reject empty/whitespace-only numeric params before Number() for both
  number and integer types; Number("") === 0 caused silent semantic corruption
- gemma4native callRegex and bare-key quoting regex: broaden [A-Za-z_]\w* to
  [A-Za-z_][\w-]* so hyphenated tool names (and param keys) are matched instead
  of returning matched=false and leaking raw frame markers as contentDelta
- pickFormatParsers default chain: insert parseQwen35Format ahead of parseHermesFormat
  so raw Qwen XML payloads are recovered when the model-name heuristic misses
- regression tests for all three cases
---
 packages/sdk/server/utils/tools/parser.ts     |  3 +
 .../utils/tools/parsers/gemma4native.ts       |  4 +-
 .../sdk/server/utils/tools/parsers/qwen35.ts  |  2 +
 packages/sdk/test/unit/tool-parser.test.ts    | 67 +++++++++++++++++++
 4 files changed, 74 insertions(+), 2 deletions(-)

diff --git a/packages/sdk/server/utils/tools/parser.ts b/packages/sdk/server/utils/tools/parser.ts
index 5a49688d3d..065d6e3ce6 100644
--- a/packages/sdk/server/utils/tools/parser.ts
+++ b/packages/sdk/server/utils/tools/parser.ts
@@ -40,11 +40,14 @@ function pickFormatParsers(
       // Gemma4 first: `<|tool_call>` is uniquely distinctive and can't
       // false-match other dialects.
       // Harmony next: `to=functions.` is also uniquely Harmony.
+      // Qwen35 before Hermes: defers to Hermes when JSON is inside <tool_call>,
+      // so the XML path is recovered without breaking Hermes-JSON payloads.
       // Pythonic last: its bare `[name(...)]` form can match payloads that
       // look like other dialects.
       return [
         parseGemma4NativeFormat,
         parseHarmonyFormat,
+        parseQwen35Format,
         parseHermesFormat,
         parseGemmaFormat,
         parseLlamacppFormat,
diff --git a/packages/sdk/server/utils/tools/parsers/gemma4native.ts b/packages/sdk/server/utils/tools/parsers/gemma4native.ts
index ab6b17aa71..6b3cc9215a 100644
--- a/packages/sdk/server/utils/tools/parsers/gemma4native.ts
+++ b/packages/sdk/server/utils/tools/parsers/gemma4native.ts
@@ -17,7 +17,7 @@ function gemmaArgsToJson(argsRaw: string): string {
   return parts
     .map((part, i) =>
       i % 2 === 0
-        ? part.replace(/([{,]\s*)([A-Za-z_]\w*)\s*:/g, '$1"$2":')
+        ? part.replace(/([{,]\s*)([A-Za-z_][\w-]*)\s*:/g, '$1"$2":')
         : '"' + part.replace(/\\/g, "\\\\").replace(/"/g, '\\"').replace(/[\x00-\x1f]/g, (c) => `\\u${c.charCodeAt(0).toString(16).padStart(4, "0")}`) + '"',
     )
     .join("");
@@ -36,7 +36,7 @@ export function parseGemma4NativeFormat(text: string, tools: Tool[]): ParserResu
     return { matched: false, toolCalls, errors };
   }
 
-  const callRegex = /<\|tool_call>call:([A-Za-z_]\w*)\{([\s\S]*?)\}<tool_call\|>/g;
+  const callRegex = /<\|tool_call>call:([A-Za-z_][\w-]*)\{([\s\S]*?)\}<tool_call\|>/g;
   const matches = Array.from(text.matchAll(callRegex));
 
   if (matches.length === 0) return { matched: false, toolCalls, errors };
diff --git a/packages/sdk/server/utils/tools/parsers/qwen35.ts b/packages/sdk/server/utils/tools/parsers/qwen35.ts
index 28af36ccdb..ac417e2e88 100644
--- a/packages/sdk/server/utils/tools/parsers/qwen35.ts
+++ b/packages/sdk/server/utils/tools/parsers/qwen35.ts
@@ -15,11 +15,13 @@ function coerceParamValue(
   if (!schema?.type) return trimmed;
   switch (schema.type) {
     case "number": {
+      if (trimmed.length === 0) throw new Error(`invalid numeric value: ""`);
       const n = Number(trimmed);
       if (Number.isNaN(n)) throw new Error(`invalid numeric value: "${trimmed}"`);
       return n;
     }
     case "integer": {
+      if (trimmed.length === 0) throw new Error(`invalid integer value: ""`);
       const n = Number(trimmed);
       if (Number.isNaN(n) || !Number.isInteger(n)) throw new Error(`invalid integer value: "${trimmed}"`);
       return n;
diff --git a/packages/sdk/test/unit/tool-parser.test.ts b/packages/sdk/test/unit/tool-parser.test.ts
index 9171afa201..1c03a54f2a 100644
--- a/packages/sdk/test/unit/tool-parser.test.ts
+++ b/packages/sdk/test/unit/tool-parser.test.ts
@@ -1038,3 +1038,70 @@ test("parseGemma4NativeFormat: malformed args (trailing comma) surface PARSE_ERR
   t.is(result.errors.length, 1);
   t.is(result.errors[0]?.code, "PARSE_ERROR");
 });
+
+test("parseQwen35Format: empty integer param surfaces PARSE_ERROR (not 0)", (t) => {
+  const typedTool: Tool = {
+    type: "function",
+    name: "typed",
+    description: "typed",
+    parameters: {
+      type: "object",
+      properties: { count: { type: "integer" } },
+      required: ["count"],
+    },
+  };
+  const text = `<tool_call><function=typed><parameter=count></parameter></function></tool_call>`;
+  const result = parseQwen35Format(text, [typedTool]);
+  t.is(result.matched, true);
+  t.is(result.toolCalls.length, 0);
+  t.is(result.errors.length, 1);
+  t.is(result.errors[0]?.code, "PARSE_ERROR");
+});
+
+test("parseQwen35Format: whitespace-only number param surfaces PARSE_ERROR (not 0)", (t) => {
+  const typedTool: Tool = {
+    type: "function",
+    name: "typed",
+    description: "typed",
+    parameters: {
+      type: "object",
+      properties: { score: { type: "number" } },
+      required: ["score"],
+    },
+  };
+  const text = `<tool_call><function=typed><parameter=score>   </parameter></function></tool_call>`;
+  const result = parseQwen35Format(text, [typedTool]);
+  t.is(result.matched, true);
+  t.is(result.toolCalls.length, 0);
+  t.is(result.errors.length, 1);
+  t.is(result.errors[0]?.code, "PARSE_ERROR");
+});
+
+test("parseGemma4NativeFormat: hyphenated tool name parses correctly", (t) => {
+  const hyphenTool: Tool = {
+    type: "function",
+    name: "get-weather",
+    description: "Get current weather",
+    parameters: {
+      type: "object",
+      properties: { city: { type: "string" } },
+      required: ["city"],
+    },
+  };
+  const text = `<|tool_call>call:get-weather{city:<|"|>Tokyo<|"|>}<tool_call|>`;
+  const result = parseGemma4NativeFormat(text, [hyphenTool]);
+  t.is(result.matched, true);
+  t.is(result.errors.length, 0);
+  t.is(result.toolCalls.length, 1);
+  t.is(result.toolCalls[0]?.name, "get-weather");
+  t.alike(result.toolCalls[0]?.arguments, { city: "Tokyo" });
+});
+
+test("parseToolCalls(default): Qwen3.5 XML format is recovered without explicit dialect", (t) => {
+  const text = `<tool_call><function=get_weather><parameter=city>Berlin</parameter></function></tool_call>`;
+  const { toolCalls, errors } = parseToolCalls(text, pythonicTools);
+  t.is(errors.length, 0);
+  t.is(toolCalls.length, 1);
+  t.is(toolCalls[0]?.name, "get_weather");
+  t.alike(toolCalls[0]?.arguments, { city: "Berlin" });
+});