diff --git a/packages/cli/src/serve/adapters/openai/translate.ts b/packages/cli/src/serve/adapters/openai/translate.ts
index 74d910b8eb..618944ca68 100644
--- a/packages/cli/src/serve/adapters/openai/translate.ts
+++ b/packages/cli/src/serve/adapters/openai/translate.ts
@@ -146,6 +146,8 @@ export function extractGenerationParams (body: Record<string, unknown>): SDKGene
   if (typeof body['max_tokens'] === 'number') params.predict = body['max_tokens']
   if (typeof body['max_completion_tokens'] === 'number') params.predict = body['max_completion_tokens']
 
+  if (typeof body['reasoning_budget'] === 'boolean') params.reasoning_budget = body['reasoning_budget']
+
   return Object.keys(params).length > 0 ? params : undefined
 }
 
diff --git a/packages/cli/src/serve/core/sdk.ts b/packages/cli/src/serve/core/sdk.ts
index b6db7823a8..2327df9e99 100644
--- a/packages/cli/src/serve/core/sdk.ts
+++ b/packages/cli/src/serve/core/sdk.ts
@@ -11,6 +11,7 @@ export interface SDKGenerationParams {
   frequency_penalty?: number
   presence_penalty?: number
   repeat_penalty?: number
+  reasoning_budget?: boolean
 }
 
 export type SDKResponseFormat =
@@ -162,7 +163,12 @@ export async function sdkCompletion (opts: {
     params['tools'] = opts.tools
   }
   if (opts.generationParams) {
-    params['generationParams'] = opts.generationParams
+    const { reasoning_budget, ...rest } = opts.generationParams
+    const sdkGenParams: Record<string, unknown> = { ...rest }
+    if (reasoning_budget !== undefined) {
+      sdkGenParams['reasoning_budget'] = reasoning_budget ? -1 : 0
+    }
+    params['generationParams'] = sdkGenParams
   }
   if (opts.responseFormat) {
     params['responseFormat'] = opts.responseFormat
diff --git a/packages/cli/test/translate.test.ts b/packages/cli/test/translate.test.ts
index 5a1c1f8bcb..eb374a624a 100644
--- a/packages/cli/test/translate.test.ts
+++ b/packages/cli/test/translate.test.ts
@@ -344,6 +344,23 @@ describe('extractGenerationParams', () => {
     assert.equal(params.presence_penalty, 0.1)
   })
 
+  it('extracts reasoning_budget true', () => {
+    const params = extractGenerationParams({ reasoning_budget: true })
+    assert.ok(params)
+    assert.equal(params.reasoning_budget, true)
+  })
+
+  it('extracts reasoning_budget false', () => {
+    const params = extractGenerationParams({ reasoning_budget: false })
+    assert.ok(params)
+    assert.equal(params.reasoning_budget, false)
+  })
+
+  it('ignores non-boolean reasoning_budget', () => {
+    const params = extractGenerationParams({ reasoning_budget: -1 })
+    assert.equal(params, undefined)
+  })
+
   it('ignores non-number values', () => {
     const params = extractGenerationParams({ temperature: 'hot', max_tokens: '100' })
     assert.equal(params, undefined)
diff --git a/packages/sdk/client/api/completion-stream.ts b/packages/sdk/client/api/completion-stream.ts
index 18a52f0f26..0fe0ef0c1a 100644
--- a/packages/sdk/client/api/completion-stream.ts
+++ b/packages/sdk/client/api/completion-stream.ts
@@ -54,7 +54,7 @@ type CompletionParams = Omit<CompletionClientParams, "tools"> & {
  * @param params.mcp - Optional array of MCP client inputs for tool integration
  * @param params.captureThinking - Best-effort parsing of `<think>` blocks into `thinkingDelta` events; `final.raw.fullText` always preserves the original output
  * @param params.emitRawDeltas - When true, every raw model token is also emitted as a `rawDelta` event
- * @param params.toolDialect - Override the SDK's name-based dialect detection. Use when your model emits a known format (`"hermes"`, `"pythonic"`, or `"json"`) the auto-router doesn't recognise. Drives both streaming frame detection and finalization parsing.
+ * @param params.toolDialect - Override the SDK's name-based dialect detection. Supported values: `"hermes"`, `"pythonic"`, `"json"`, `"harmony"`, `"qwen35"` (Qwen3.5/3.6), `"gemma4"`. Use when the auto-router doesn't recognise your model name. Drives both streaming frame detection and finalization parsing.
  * Common override case: Llama 3.x tool-calling fine-tunes that emit the native pythonic header (`<|start_header_id|>tool_call<|end_header_id|>...<|eot_id|>`).
  * @param params.responseFormat - Optional structured-output constraint applied to the model's output:
  *   - `{ type: "text" }` — no constraint (default behavior)
diff --git a/packages/sdk/examples/tools/llamacpp-tools-gemma4.ts b/packages/sdk/examples/tools/llamacpp-tools-gemma4.ts
new file mode 100644
index 0000000000..d9aa1427ab
--- /dev/null
+++ b/packages/sdk/examples/tools/llamacpp-tools-gemma4.ts
@@ -0,0 +1,94 @@
+/**
+ * Tool-calling example using the Gemma4 native dialect.
+ *
+ * Gemma4 emits tool calls in a JS-literal format with custom quote tokens:
+ *   <|tool_call>call:NAME{key:<|"|>val<|"|>,...}<tool_call|>
+ *
+ * Reasoning output (thinking) is emitted inside <|channel>thought...<channel|>
+ * frames, which are stripped from contentDelta and forwarded as thinkingDelta
+ * when captureThinking is true.
+ *
+ * The dialect is auto-detected from the model name/path when the file name
+ * contains "gemma4" or "gemma-4". Pass toolDialect: "gemma4" explicitly to
+ * completion() if auto-detection does not pick it up for a given file name.
+ *
+ * Usage:
+ *   bun run bare:example dist/examples/tools/llamacpp-tools-gemma4.js <model-url>
+ */
+import {
+  completion,
+  loadModel,
+  unloadModel,
+  type ToolCall,
+} from "@qvac/sdk";
+import { tools, mockExecute } from "./shared";
+
+// bartowski's pack tags <eos> as the EOG token (matching the base tokenizer),
+// ensuring generation stops correctly; unsloth's variant maps it differently.
+const GEMMA4_HF =
+  "https://huggingface.co/bartowski/google_gemma-4-E2B-it-GGUF/resolve/main/google_gemma-4-E2B-it-Q4_K_M.gguf";
+
+const modelSrc = process.argv[2] ?? GEMMA4_HF;
+
+let modelId: string | undefined;
+try {
+  modelId = await loadModel({
+    modelSrc,
+    modelType: "llm",
+    modelConfig: { ctx_size: 4096, tools: true },
+    onProgress: (progress) =>
+      console.log(`Loading: ${progress.percentage.toFixed(1)}%`),
+  });
+  console.log(`Model loaded: ${modelId}`);
+
+  const history = [
+    {
+      role: "system",
+      content:
+        "You are a helpful assistant that can call tools to look up weather and horoscopes.",
+    },
+    {
+      role: "user",
+      content: "What's the weather in Tokyo and my horoscope for Aquarius?",
+    },
+  ];
+
+  const result = completion({ modelId, history, stream: true, tools });
+
+  const tokensTask = (async () => {
+    for await (const token of result.tokenStream) {
+      process.stdout.write(token);
+    }
+  })();
+
+  const toolsTask = (async () => {
+    for await (const evt of result.toolCallStream) {
+      if (evt.type === "toolCall") {
+        console.log(
+          `\n-> ${evt.call.name}(${JSON.stringify(evt.call.arguments)})`,
+        );
+      }
+    }
+  })();
+
+  await Promise.all([tokensTask, toolsTask]);
+
+  const toolCalls: ToolCall[] = await result.toolCalls;
+
+  console.log("\n\nFinal tool calls:");
+  if (toolCalls.length > 0) {
+    for (const call of toolCalls) {
+      console.log(`  - ${call.name}(${JSON.stringify(call.arguments)})`);
+      const toolResult = mockExecute(call.name, call.arguments);
+      console.log(`    result: ${toolResult}`);
+    }
+  } else {
+    console.log("  (none)");
+  }
+
+  await unloadModel({ modelId, clearStorage: false });
+} catch (error) {
+  console.error("Error:", error);
+  if (modelId) await unloadModel({ modelId, clearStorage: false }).catch(() => {});
+  process.exit(1);
+}
diff --git a/packages/sdk/examples/tools/llamacpp-tools-qwen35.ts b/packages/sdk/examples/tools/llamacpp-tools-qwen35.ts
new file mode 100644
index 0000000000..7d8c571454
--- /dev/null
+++ b/packages/sdk/examples/tools/llamacpp-tools-qwen35.ts
@@ -0,0 +1,88 @@
+/**
+ * Tool-calling example using the Qwen3.5 dialect.
+ *
+ * Qwen3.5 emits tool calls in a Pythonic-XML format:
+ *   <tool_call><function=NAME><parameter=KEY>VALUE</parameter></function></tool_call>
+ *
+ * The dialect is auto-detected from the model name/path when the model file
+ * contains "qwen3.5", "qwen3-5", "qwen3.6", or "qwen3-6". Pass
+ * toolDialect: "qwen35" explicitly if auto-detection does not pick it up.
+ *
+ * Usage:
+ *   bun run bare:example dist/examples/tools/llamacpp-tools-qwen35.js <model-url>
+ */
+import {
+  completion,
+  loadModel,
+  unloadModel,
+  type ToolCall,
+} from "@qvac/sdk";
+import { tools, mockExecute } from "./shared";
+
+const QWEN35_HF =
+  "https://huggingface.co/unsloth/Qwen3.5-0.8B-GGUF/resolve/main/Qwen3.5-0.8B-Q8_0.gguf";
+
+const modelSrc = process.argv[2] ?? QWEN35_HF;
+
+let modelId: string | undefined;
+try {
+  modelId = await loadModel({
+    modelSrc,
+    modelType: "llm",
+    modelConfig: { ctx_size: 4096, tools: true },
+    onProgress: (progress) =>
+      console.log(`Loading: ${progress.percentage.toFixed(1)}%`),
+  });
+  console.log(`Model loaded: ${modelId}`);
+
+  const history = [
+    {
+      role: "system",
+      content:
+        "You are a helpful assistant that can call tools to look up weather and horoscopes.",
+    },
+    {
+      role: "user",
+      content: "What's the weather in Tokyo and my horoscope for Aquarius?",
+    },
+  ];
+
+  const result = completion({ modelId, history, stream: true, tools });
+
+  const tokensTask = (async () => {
+    for await (const token of result.tokenStream) {
+      process.stdout.write(token);
+    }
+  })();
+
+  const toolsTask = (async () => {
+    for await (const evt of result.toolCallStream) {
+      if (evt.type === "toolCall") {
+        console.log(
+          `\n-> ${evt.call.name}(${JSON.stringify(evt.call.arguments)})`,
+        );
+      }
+    }
+  })();
+
+  await Promise.all([tokensTask, toolsTask]);
+
+  const toolCalls: ToolCall[] = await result.toolCalls;
+
+  console.log("\n\nFinal tool calls:");
+  if (toolCalls.length > 0) {
+    for (const call of toolCalls) {
+      console.log(`  - ${call.name}(${JSON.stringify(call.arguments)})`);
+      const toolResult = mockExecute(call.name, call.arguments);
+      console.log(`    result: ${toolResult}`);
+    }
+  } else {
+    console.log("  (none)");
+  }
+
+  await unloadModel({ modelId, clearStorage: false });
+} catch (error) {
+  console.error("Error:", error);
+  if (modelId) await unloadModel({ modelId, clearStorage: false }).catch(() => {});
+  process.exit(1);
+}
diff --git a/packages/sdk/package.json b/packages/sdk/package.json
index 3eb64d986c..5aab878501 100644
--- a/packages/sdk/package.json
+++ b/packages/sdk/package.json
@@ -177,7 +177,7 @@
     "@qvac/embed-llamacpp": "^0.15.0",
     "@qvac/error": "^0.1.1",
     "@qvac/langdetect-text": "^0.1.2",
-    "@qvac/llm-llamacpp": "^0.18.0",
+    "@qvac/llm-llamacpp": "^0.20.0",
     "@qvac/logging": "^0.1.0",
     "@qvac/ocr-onnx": "^0.4.2",
     "@qvac/rag": "^0.4.4",
diff --git a/packages/sdk/schemas/completion-stream.ts b/packages/sdk/schemas/completion-stream.ts
index e3edc9f1a1..4e75f38b08 100644
--- a/packages/sdk/schemas/completion-stream.ts
+++ b/packages/sdk/schemas/completion-stream.ts
@@ -16,12 +16,16 @@ export {
  * - `"pythonic"`: `[get_weather(city="Tokyo")]` (optionally `<|tool_call_start|>...<|tool_call_end|>`-wrapped)
  * - `"json"`:     `{"name":"get_weather","arguments":{"city":"Tokyo"}}` or `{"tool_calls":[{"name":"...","arguments":{...}}]}`
  * - `"harmony"`:  `<|channel|>commentary to=functions.get_weather <|constrain|>json<|message|>{"city":"Tokyo"}<|call|>`
+ * - `"qwen35"`:   `<tool_call><function=NAME><parameter=KEY>VALUE</parameter></function></tool_call>`
+ * - `"gemma4"`:   `<|tool_call>call:NAME{key:<|"|>val<|"|>,...}<tool_call|>`
  */
 export const toolDialectSchema = z.enum([
   "hermes",
   "pythonic",
   "json",
   "harmony",
+  "qwen35",
+  "gemma4"
 ]);
 
 export const attachmentSchema = z.object({
@@ -70,6 +74,12 @@ export const generationParamsSchema = z
       .number()
       .optional()
       .describe("Penalty applied to repeated tokens."),
+    reasoning_budget: z
+      .union([z.literal(-1), z.literal(0)])
+      .optional()
+      .describe(
+        "Per-request reasoning channel budget. `-1` keeps the model's reasoning channel on; `0` disables it for this request. Equivalent to the load-time `reasoning_budget` config but scoped to a single `run()` call; the prior value is restored afterwards.",
+      ),
   })
   .strict();
 
diff --git a/packages/sdk/schemas/llamacpp-config.ts b/packages/sdk/schemas/llamacpp-config.ts
index f4f50eabad..9cd70e6877 100644
--- a/packages/sdk/schemas/llamacpp-config.ts
+++ b/packages/sdk/schemas/llamacpp-config.ts
@@ -33,6 +33,7 @@ export const llmConfigBaseSchema = z.object({
       z.number().int().min(1), // positive integer: fixed token count
     ])
     .optional(),
+  /** JS-side only: seeds conversation history. Never forwarded to the C++ addon. */
   system_prompt: z.string().optional(),
   no_mmap: z.boolean().optional(),
   verbosity: verbositySchema.optional(),
@@ -60,6 +61,10 @@ export const llmConfigBaseSchema = z.object({
    * for fast GPU startup.
    */
   openclCacheDir: z.string().optional(),
+  /**
+   * Reasoning channel token budget. `-1` = unrestricted, `0` = disabled.
+   */
+  reasoning_budget: z.union([z.literal(-1), z.literal(0)]).optional(),
   projectionModelSrc: modelSrcInputSchema.optional(),
 });
 
diff --git a/packages/sdk/server/bare/plugins/llamacpp-completion/plugin.ts b/packages/sdk/server/bare/plugins/llamacpp-completion/plugin.ts
index 89d36b7ea0..fd483e2411 100644
--- a/packages/sdk/server/bare/plugins/llamacpp-completion/plugin.ts
+++ b/packages/sdk/server/bare/plugins/llamacpp-completion/plugin.ts
@@ -12,7 +12,6 @@ import {
   ModelType,
   llmConfigBaseSchema,
   ADDON_LLM,
-  TOOLS_MODE,
   type CompletionEvent,
   type CreateModelParams,
   type PluginCapabilities,
@@ -26,6 +25,7 @@ import { expandGGUFIntoShards } from "@/server/utils";
 import { completion } from "@/server/bare/plugins/llamacpp-completion/ops/completion-stream";
 import { finetune } from "@/server/bare/plugins/llamacpp-completion/ops/finetune";
 import { translate } from "@/server/bare/ops/translate";
+import { transformLlmConfig } from "@/server/bare/plugins/llamacpp-completion/transform";
 import { attachModelExecutionMs } from "@/profiling/model-execution";
 import { getModelConfig } from "@/server/bare/registry/model-registry";
 import { createCompletionNormalizer } from "@/server/utils/completion-normalizer";
@@ -33,44 +33,6 @@ import { detectToolDialect } from "@/server/utils/tool-integration";
 import { getRequestRegistry } from "@/server/bare/runtime";
 import { generateServerRequestId } from "@/server/bare/runtime/request-id";
 
-function transformLlmConfig(llmConfig: LlmConfig) {
-  const transformed = JSON.parse(
-    JSON.stringify(llmConfig, (key: string, v: unknown) =>
-      key === "modelType"
-        ? undefined
-        : key === "stop_sequences"
-          ? Array.isArray(v)
-            ? v.join(", ")
-            : v
-          : typeof v === "number" || typeof v === "boolean"
-            ? String(v)
-            : v,
-    ).replace(
-      /"([a-z][A-Za-z]*)":/g,
-      (_, key: string) =>
-        `"${key.replace(/[A-Z]/g, (l: string) => `_${l.toLowerCase()}`)}":`,
-    ),
-  ) as Record<string, string>;
-
-  if ("stop_sequences" in transformed) {
-    transformed["reverse_prompt"] = transformed["stop_sequences"];
-    delete transformed["stop_sequences"];
-  }
-
-  if ("opencl_cache_dir" in transformed) {
-    transformed["openclCacheDir"] = transformed["opencl_cache_dir"];
-    delete transformed["opencl_cache_dir"];
-  }
-
-  if ("tools_mode" in transformed) {
-    if (transformed["tools_mode"] === TOOLS_MODE.dynamic) {
-      transformed["tools_compact"] = "true";
-    }
-    delete transformed["tools_mode"];
-  }
-
-  return transformed;
-}
 
 function createLlmModel(
   modelId: string,
diff --git a/packages/sdk/server/bare/plugins/llamacpp-completion/transform.ts b/packages/sdk/server/bare/plugins/llamacpp-completion/transform.ts
new file mode 100644
index 0000000000..9c4dd358ca
--- /dev/null
+++ b/packages/sdk/server/bare/plugins/llamacpp-completion/transform.ts
@@ -0,0 +1,47 @@
+import { TOOLS_MODE, type LlmConfig } from "@/schemas";
+
+/**
+ * Converts an LlmConfig into the flat string-keyed map the C++ addon expects.
+ *
+ * JS-only fields excluded from the output (must NOT be forwarded to the addon):
+ *   - modelType   (schema discriminant, meaningless at C++ level)
+ *   - system_prompt  (JS-side history seeding only; C++ removed --system-prompt in 8189)
+ */
+export function transformLlmConfig(llmConfig: LlmConfig) {
+  const transformed = JSON.parse(
+    JSON.stringify(llmConfig, (key: string, v: unknown) =>
+      key === "modelType" || key === "system_prompt"
+        ? undefined
+        : key === "stop_sequences"
+          ? Array.isArray(v)
+            ? v.join(", ")
+            : v
+          : typeof v === "number" || typeof v === "boolean"
+            ? String(v)
+            : v,
+    ).replace(
+      /"([a-z][A-Za-z]*)":/g,
+      (_, key: string) =>
+        `"${key.replace(/[A-Z]/g, (l: string) => `_${l.toLowerCase()}`)}":`,
+    ),
+  ) as Record<string, string>;
+
+  if ("stop_sequences" in transformed) {
+    transformed["reverse_prompt"] = transformed["stop_sequences"];
+    delete transformed["stop_sequences"];
+  }
+
+  if ("opencl_cache_dir" in transformed) {
+    transformed["openclCacheDir"] = transformed["opencl_cache_dir"];
+    delete transformed["opencl_cache_dir"];
+  }
+
+  if ("tools_mode" in transformed) {
+    if (transformed["tools_mode"] === TOOLS_MODE.dynamic) {
+      transformed["tools_compact"] = "true";
+    }
+    delete transformed["tools_mode"];
+  }
+
+  return transformed;
+}
diff --git a/packages/sdk/server/utils/completion-normalizer.ts b/packages/sdk/server/utils/completion-normalizer.ts
index ccc1c782a8..a91b0fa319 100644
--- a/packages/sdk/server/utils/completion-normalizer.ts
+++ b/packages/sdk/server/utils/completion-normalizer.ts
@@ -71,6 +71,14 @@ const DIALECT_SPECS: Record<Dialect, DialectSpec> = {
       "<|return|>",
     ],
   },
+  qwen35: {
+    // Same <tool_call>…</tool_call> framing as hermes; inner content is XML.
+    toolFrames: [{ open: "<tool_call>", close: "</tool_call>" }],
+  },
+  gemma4: {
+    toolFrames: [{ open: "<|tool_call>", close: "<tool_call|>" }],
+    thinkingFrames: [{ open: "<|channel>thought", close: "<channel|>" }],
+  },
 };
 
 // Capture-gated reasoning marker — the generic `<think>...</think>`
diff --git a/packages/sdk/server/utils/tools/dialect.ts b/packages/sdk/server/utils/tools/dialect.ts
index 11bf614b53..bbbade9228 100644
--- a/packages/sdk/server/utils/tools/dialect.ts
+++ b/packages/sdk/server/utils/tools/dialect.ts
@@ -12,6 +12,8 @@ export function detectToolDialectFromName(
   const basename = path.toLowerCase().split(/[/\\]/).pop() ?? "";
   const tag = `${(name ?? "").toLowerCase()}|${basename}`;
 
+  if (/qwen3[._-]?[56](?![a-z0-9])/.test(tag)) return "qwen35";
+  if (/gemma[-_]?4(?=[^a-z0-9]|$)/.test(tag)) return "gemma4";
   if (/gpt[_-]?oss/.test(tag)) return "harmony";
   if (/lfm[_-]?\d/.test(tag)) return "pythonic";
   return "hermes";
diff --git a/packages/sdk/server/utils/tools/parser.ts b/packages/sdk/server/utils/tools/parser.ts
index 2a685593bf..065d6e3ce6 100644
--- a/packages/sdk/server/utils/tools/parser.ts
+++ b/packages/sdk/server/utils/tools/parser.ts
@@ -11,6 +11,8 @@ import {
 import { parseHermesFormat } from "@/server/utils/tools/parsers/hermes";
 import { parsePythonicFormat } from "@/server/utils/tools/parsers/pythonic";
 import { parseHarmonyFormat } from "@/server/utils/tools/parsers/harmony";
+import { parseQwen35Format } from "@/server/utils/tools/parsers/qwen35";
+import { parseGemma4NativeFormat } from "@/server/utils/tools/parsers/gemma4native";
 
 function pickFormatParsers(
   dialect: ToolDialect | undefined,
@@ -26,13 +28,26 @@ function pickFormatParsers(
       return [parseGemmaFormat, parseLlamacppFormat];
     case "harmony":
       return [parseHarmonyFormat];
+    case "qwen35":
+      // Hermes fallback: Qwen3.5 templates sometimes emit OpenAI-style JSON
+      // when the native XML format fails; Hermes chain recovers those.
+      return [parseQwen35Format, parseHermesFormat];
+    case "gemma4":
+      // No JSON fallback: Gemma4 emits only its native channel-thought dialect
+      // and never falls back to JSON-envelope formats.
+      return [parseGemma4NativeFormat];
     default:
-      // Harmony first: `to=functions.` is uniquely Harmony and can't
+      // Gemma4 first: `<|tool_call>` is uniquely distinctive and can't
       // false-match other dialects.
+      // Harmony next: `to=functions.` is also uniquely Harmony.
+      // Qwen35 before Hermes: defers to Hermes when JSON is inside <tool_call>,
+      // so the XML path is recovered without breaking Hermes-JSON payloads.
       // Pythonic last: its bare `[name(...)]` form can match payloads that
       // look like other dialects.
       return [
+        parseGemma4NativeFormat,
         parseHarmonyFormat,
+        parseQwen35Format,
         parseHermesFormat,
         parseGemmaFormat,
         parseLlamacppFormat,
diff --git a/packages/sdk/server/utils/tools/parsers/gemma4native.ts b/packages/sdk/server/utils/tools/parsers/gemma4native.ts
new file mode 100644
index 0000000000..6b3cc9215a
--- /dev/null
+++ b/packages/sdk/server/utils/tools/parsers/gemma4native.ts
@@ -0,0 +1,75 @@
+import type { Tool, ToolCall, ToolCallError } from "@/schemas";
+import {
+  generateStableToolCallId,
+  validateToolArguments,
+  type ParserResult,
+} from "@/server/utils/tools/shared";
+
+// Transliterates Gemma 4's JS-literal argument body to valid JSON so it can
+// be parsed with JSON.parse. The body uses:
+//   - <|"|>...<|"|> instead of "..." for string values
+//   - bare (unquoted) object keys
+// Strategy: split by <|"|> tokens so structural parts (even indices) and
+// string value parts (odd indices) are processed separately, preventing
+// the key-quoting regex from matching `, key:` patterns inside string values.
+function gemmaArgsToJson(argsRaw: string): string {
+  const parts = ("{" + argsRaw + "}").split(/<\|"\|>/);
+  return parts
+    .map((part, i) =>
+      i % 2 === 0
+        ? part.replace(/([{,]\s*)([A-Za-z_][\w-]*)\s*:/g, '$1"$2":')
+        : '"' + part.replace(/\\/g, "\\\\").replace(/"/g, '\\"').replace(/[\x00-\x1f]/g, (c) => `\\u${c.charCodeAt(0).toString(16).padStart(4, "0")}`) + '"',
+    )
+    .join("");
+}
+
+// Parses Gemma 4's native tool-call dialect:
+//   <|tool_call>call:NAME{key:<|"|>val<|"|>,key2:bareNum,...}<tool_call|>
+// Keys are bare identifiers; string values are wrapped in <|"|>...<|"|>;
+// numbers/booleans are bare literals; nested objects/arrays use JS-literal
+// syntax throughout. Transliterates to JSON then parses.
+export function parseGemma4NativeFormat(text: string, tools: Tool[]): ParserResult {
+  const toolCalls: ToolCall[] = [];
+  const errors: ToolCallError[] = [];
+
+  if (!text.includes("<|tool_call>")) {
+    return { matched: false, toolCalls, errors };
+  }
+
+  const callRegex = /<\|tool_call>call:([A-Za-z_][\w-]*)\{([\s\S]*?)\}<tool_call\|>/g;
+  const matches = Array.from(text.matchAll(callRegex));
+
+  if (matches.length === 0) return { matched: false, toolCalls, errors };
+
+  for (const match of matches) {
+    const name = match[1]!;
+    const argsRaw = match[2]!;
+
+    let args: Record<string, unknown>;
+    try {
+      args = JSON.parse(gemmaArgsToJson(argsRaw)) as Record<string, unknown>;
+    } catch (err) {
+      errors.push({
+        code: "PARSE_ERROR",
+        message: `Failed to parse Gemma 4 tool call arguments: ${err instanceof Error ? err.message : String(err)}`,
+        raw: match[0],
+      });
+      continue;
+    }
+
+    const validation = validateToolArguments(name, args, tools);
+    if (!validation.isValid && validation.error) {
+      errors.push({ ...validation.error, raw: match[0] });
+      continue;
+    }
+
+    toolCalls.push({
+      id: generateStableToolCallId(name, args),
+      name,
+      arguments: args,
+      raw: match[0],
+    });
+  }
+
+  return { matched: true, toolCalls, errors };
+}
diff --git a/packages/sdk/server/utils/tools/parsers/qwen35.ts b/packages/sdk/server/utils/tools/parsers/qwen35.ts
new file mode 100644
index 0000000000..ac417e2e88
--- /dev/null
+++ b/packages/sdk/server/utils/tools/parsers/qwen35.ts
@@ -0,0 +1,133 @@
+import type { Tool, ToolCall, ToolCallError } from "@/schemas";
+import {
+  generateStableToolCallId,
+  validateToolArguments,
+  type ParserResult,
+} from "@/server/utils/tools/shared";
+
+// Coerce raw parameter text to the type declared in the tool's JSON schema.
+// String values are raw (not JSON-quoted); arrays/objects are valid JSON.
+function coerceParamValue(
+  raw: string,
+  schema?: { type?: string },
+): unknown {
+  const trimmed = raw.trim();
+  if (!schema?.type) return trimmed;
+  switch (schema.type) {
+    case "number": {
+      if (trimmed.length === 0) throw new Error(`invalid numeric value: ""`);
+      const n = Number(trimmed);
+      if (Number.isNaN(n)) throw new Error(`invalid numeric value: "${trimmed}"`);
+      return n;
+    }
+    case "integer": {
+      if (trimmed.length === 0) throw new Error(`invalid integer value: ""`);
+      const n = Number(trimmed);
+      if (Number.isNaN(n) || !Number.isInteger(n)) throw new Error(`invalid integer value: "${trimmed}"`);
+      return n;
+    }
+    case "boolean":
+      if (trimmed === "true") return true;
+      if (trimmed === "false") return false;
+      throw new Error(`invalid boolean value: "${trimmed}"`);
+    case "array":
+    case "object":
+      return JSON.parse(trimmed);
+    default:
+      return trimmed;
+  }
+}
+
+// Parses Qwen3.5/3.6 Pythonic-XML tool-call format:
+//   <tool_call>
+//   <function=NAME>
+//   <parameter=KEY>VALUE</parameter>
+//   </function>
+//   </tool_call>
+// String parameter values are raw text (not JSON-quoted); arrays/objects
+// are JSON. Type coercion uses the tool schema; unknown params pass through.
+export function parseQwen35Format(text: string, tools: Tool[]): ParserResult {
+  const toolCalls: ToolCall[] = [];
+  const errors: ToolCallError[] = [];
+
+  if (!text.includes("<tool_call>")) {
+    return { matched: false, toolCalls, errors };
+  }
+
+  const outerRegex = /<tool_call>([\s\S]*?)<\/tool_call>/g;
+  const outerMatches = Array.from(text.matchAll(outerRegex));
+
+  if (outerMatches.length === 0) return { matched: false, toolCalls, errors };
+
+  // If no match contains XML function syntax, check if this is JSON format
+  // (defer to hermes) or just malformed content (surface as PARSE_ERROR).
+  if (!outerMatches.some((m) => m[1]!.includes("<function="))) {
+    const looksLikeJson = outerMatches.some((m) => {
+      const inner = m[1]!.trim();
+      return inner.startsWith("{") || inner.startsWith("[");
+    });
+    if (looksLikeJson) return { matched: false, toolCalls, errors };
+    return {
+      matched: true,
+      toolCalls,
+      errors: outerMatches.map((m) => ({
+        code: "PARSE_ERROR" as const,
+        message: "Qwen3.5 tool call missing <function=NAME>...</function>",
+        raw: m[1]!.trim(),
+      })),
+    };
+  }
+
+  for (const outerMatch of outerMatches) {
+    const inner = outerMatch[1]!.trim();
+
+    const fnMatch = /<function=([^>\s]+)\s*>([\s\S]*?)<\/function>/i.exec(
+      inner,
+    );
+    if (!fnMatch) {
+      errors.push({
+        code: "PARSE_ERROR",
+        message: "Qwen3.5 tool call missing <function=NAME>...</function>",
+        raw: inner,
+      });
+      continue;
+    }
+
+    const name = fnMatch[1]!.trim();
+    const paramsBlock = fnMatch[2]!;
+    const tool = tools.find((t) => t.name === name);
+    const schemaProperties = tool?.parameters?.properties ?? {};
+
+    const args: Record<string, unknown> = {};
+    let parseError: string | undefined;
+    try {
+      const paramRegex = /<parameter=([^>\s]+)\s*>([\s\S]*?)<\/parameter>/gi;
+      let pm: RegExpExecArray | null;
+      while ((pm = paramRegex.exec(paramsBlock)) !== null) {
+        const paramName = pm[1]!.trim();
+        args[paramName] = coerceParamValue(pm[2]!, schemaProperties[paramName]);
+      }
+    } catch (err) {
+      parseError = err instanceof Error ? err.message : String(err);
+    }
+    if (parseError !== undefined) {
+      errors.push({ code: "PARSE_ERROR", message: parseError, raw: inner });
+      continue;
+    }
+
+    const validation = validateToolArguments(name, args, tools);
+    if (!validation.isValid && validation.error) {
+      errors.push({ ...validation.error, raw: inner });
+      continue;
+    }
+
+    toolCalls.push({
+      id: generateStableToolCallId(name, args),
+      name,
+      arguments: args,
+      raw: inner,
+    });
+  }
+
+  return { matched: true, toolCalls, errors };
+}
diff --git a/packages/sdk/test/unit/completion-normalizer.test.ts b/packages/sdk/test/unit/completion-normalizer.test.ts
index f92a8f9c48..f5b92f7575 100644
--- a/packages/sdk/test/unit/completion-normalizer.test.ts
+++ b/packages/sdk/test/unit/completion-normalizer.test.ts
@@ -770,3 +770,91 @@ test("harmony spec defined but hermes dialect still strips <think> as before", (
   t.alike(texts(events, "thinkingDelta"), ["thought"]);
   t.alike(texts(events, "contentDelta"), ["A", "B"]);
 });
+
+test("qwen35 streaming: tool frame emits toolCall mid-stream", (t) => {
+  const n = createCompletionNormalizer(
+    baseConfig({
+      capabilities: TEXT_PARSE_CAPS,
+      tools: [GET_WEATHER_TOOL],
+      toolDialect: "qwen35",
+    }),
+  );
+  const text = `<tool_call><function=get_weather><parameter=city>Paris</parameter></function></tool_call>`;
+  const events = [...pushAll(n, [text]), ...n.finish()];
+  const toolEvents = events.filter((e) => e.type === "toolCall");
+  t.is(toolEvents.length, 1, "qwen35 tool frame emits toolCall");
+  t.is((toolEvents[0] as { call: { name: string } }).call.name, "get_weather");
+  t.alike((toolEvents[0] as { call: { arguments: unknown } }).call.arguments, { city: "Paris" });
+  const contentJoined = texts(events, "contentDelta").join("");
+  t.absent(contentJoined.includes("<tool_call>"), "open marker must not leak");
+  t.absent(contentJoined.includes("</tool_call>"), "close marker must not leak");
+});
+
+test("qwen35 streaming: marker split across pushes still detected", (t) => {
+  const n = createCompletionNormalizer(
+    baseConfig({
+      capabilities: TEXT_PARSE_CAPS,
+      tools: [GET_WEATHER_TOOL],
+      toolDialect: "qwen35",
+    }),
+  );
+  const events = pushAll(n, [
+    "<tool_",
+    "call><function=get_weather><parameter=city>Lima</parameter></function></tool_call>",
+  ]);
+  const toolEvents = events.filter((e) => e.type === "toolCall");
+  t.is(toolEvents.length, 1, "qwen35 frame detected across split marker");
+});
+
+test("gemma4 streaming: tool frame emits toolCall mid-stream", (t) => {
+  const n = createCompletionNormalizer(
+    baseConfig({
+      capabilities: TEXT_PARSE_CAPS,
+      tools: [GET_WEATHER_TOOL],
+      toolDialect: "gemma4",
+    }),
+  );
+  const text = `<|tool_call>call:get_weather{city:<|"|>Tokyo<|"|>}<tool_call|>`;
+  const events = [...pushAll(n, [text]), ...n.finish()];
+  const toolEvents = events.filter((e) => e.type === "toolCall");
+  t.is(toolEvents.length, 1, "gemma4 tool frame emits toolCall");
+  t.is((toolEvents[0] as { call: { name: string } }).call.name, "get_weather");
+  t.alike((toolEvents[0] as { call: { arguments: unknown } }).call.arguments, { city: "Tokyo" });
+  const contentJoined = texts(events, "contentDelta").join("");
+  t.absent(contentJoined.includes("<|tool_call>"), "open marker must not leak");
+  t.absent(contentJoined.includes("<tool_call|>"), "close marker must not leak");
+});
+
+test("gemma4 thought frame: inner emitted as thinkingDelta (captureThinking=true)", (t) => {
+  const n = createCompletionNormalizer(
+    baseConfig({
+      capabilities: NONE_CAPS,
+      toolDialect: "gemma4",
+      captureThinking: true,
+    }),
+  );
+  const text = `<|channel>thoughtthinking here<channel|>after`;
+  const events = [...pushAll(n, [text]), ...n.finish()];
+  t.alike(texts(events, "thinkingDelta"), ["thinking here"]);
+  t.alike(texts(events, "contentDelta"), ["after"]);
+  t.is(n.getAccumulated().thinkingText, "thinking here");
+  t.is(n.getAccumulated().contentText, "after");
+});
+
+test("gemma4 thought frame: silently dropped (captureThinking=false)", (t) => {
+  const n = createCompletionNormalizer(
+    baseConfig({
+      capabilities: NONE_CAPS,
+      toolDialect: "gemma4",
+      captureThinking: false,
+    }),
+  );
+  const text = `<|channel>thoughtthinking here<channel|>after`;
+  const events = [...pushAll(n, [text]), ...n.finish()];
+  t.absent(types(events).includes("thinkingDelta"), "no thinkingDelta when not captured");
+  t.alike(texts(events, "contentDelta"), ["after"]);
+  const contentJoined = texts(events, "contentDelta").join("");
+  t.absent(contentJoined.includes("<|channel>thought"), "open marker must not leak");
+  t.absent(contentJoined.includes("<channel|>"), "close marker must not leak");
+  t.absent(contentJoined.includes("thinking here"), "thought inner must be dropped");
+});
diff --git a/packages/sdk/test/unit/completion-stream-schemas.test.ts b/packages/sdk/test/unit/completion-stream-schemas.test.ts
index 704b339927..37dac6aa47 100644
--- a/packages/sdk/test/unit/completion-stream-schemas.test.ts
+++ b/packages/sdk/test/unit/completion-stream-schemas.test.ts
@@ -3,6 +3,8 @@ import test from "brittle";
 import {
   completionStreamResponseSchema,
   completionStatsSchema,
+  generationParamsSchema,
+  toolDialectSchema,
 } from "@/schemas/completion-stream";
 
 test("completionStatsSchema: accepts backendDevice 'cpu' and 'gpu'", (t) => {
@@ -29,6 +31,25 @@ test("completionStatsSchema: backendDevice is optional", (t) => {
   t.is(result.success, true);
 });
 
+test("generationParamsSchema: accepts reasoning_budget -1 and 0", (t) => {
+  t.is(generationParamsSchema.safeParse({ reasoning_budget: -1 }).success, true);
+  t.is(generationParamsSchema.safeParse({ reasoning_budget: 0 }).success, true);
+});
+
+test("generationParamsSchema: rejects reasoning_budget other values", (t) => {
+  t.is(generationParamsSchema.safeParse({ reasoning_budget: 1 }).success, false);
+  t.is(generationParamsSchema.safeParse({ reasoning_budget: -2 }).success, false);
+});
+
+test("toolDialectSchema: accepts qwen35 and gemma4", (t) => {
+  t.is(toolDialectSchema.safeParse("qwen35").success, true);
+  t.is(toolDialectSchema.safeParse("gemma4").success, true);
+});
+
+test("toolDialectSchema: rejects unknown dialects", (t) => {
+  t.is(toolDialectSchema.safeParse("unknown").success, false);
+});
+
 test("completionStreamResponseSchema: round-trips backendDevice through completionStats event", (t) => {
   const result = completionStreamResponseSchema.safeParse({
     type: "completionStream",
diff --git a/packages/sdk/test/unit/llm-config-schema.test.ts b/packages/sdk/test/unit/llm-config-schema.test.ts
index 3f23eb9184..da2dd1bcb6 100644
--- a/packages/sdk/test/unit/llm-config-schema.test.ts
+++ b/packages/sdk/test/unit/llm-config-schema.test.ts
@@ -101,3 +101,17 @@ test("loadModelSrcRequestSchema: accepts split-mode for LLM", (t) => {
   });
   t.is(result.success, true);
 });
+
+test("llmConfigBaseSchema: accepts reasoning_budget -1 (unrestricted)", (t) => {
+  t.is(llmConfigBaseSchema.safeParse({ reasoning_budget: -1 }).success, true);
+});
+
+test("llmConfigBaseSchema: accepts reasoning_budget 0 (disabled)", (t) => {
+  t.is(llmConfigBaseSchema.safeParse({ reasoning_budget: 0 }).success, true);
+});
+
+test("llmConfigBaseSchema: rejects reasoning_budget other values", (t) => {
+  t.is(llmConfigBaseSchema.safeParse({ reasoning_budget: 1 }).success, false);
+  t.is(llmConfigBaseSchema.safeParse({ reasoning_budget: -2 }).success, false);
+  t.is(llmConfigBaseSchema.safeParse({ reasoning_budget: 0.5 }).success, false);
+});
diff --git a/packages/sdk/test/unit/llm-plugin-transform.test.ts b/packages/sdk/test/unit/llm-plugin-transform.test.ts
new file mode 100644
index 0000000000..f5b7f66ccb
--- /dev/null
+++ b/packages/sdk/test/unit/llm-plugin-transform.test.ts
@@ -0,0 +1,55 @@
+// @ts-expect-error brittle has no type declarations
+import test from "brittle";
+import { transformLlmConfig } from "@/server/bare/plugins/llamacpp-completion/transform";
+import { llmConfigSchema } from "@/schemas/llamacpp-config";
+
+function makeConfig(overrides: Record<string, unknown> = {}) {
+  return llmConfigSchema.parse(overrides);
+}
+
+test("transformLlmConfig: system_prompt is never forwarded to C++", (t) => {
+  const config = makeConfig({ system_prompt: "You are a helpful assistant." });
+  const result = transformLlmConfig(config);
+  t.absent(
+    "system_prompt" in result,
+    "system_prompt must not appear in C++ arg map",
+  );
+  t.absent(
+    "system-prompt" in result,
+    "hyphenated system-prompt must not appear in C++ arg map",
+  );
+});
+
+test("transformLlmConfig: modelType is never forwarded to C++", (t) => {
+  const config = makeConfig({});
+  const result = transformLlmConfig(config);
+  t.absent("modelType" in result, "modelType must not appear in C++ arg map");
+  t.absent("model_type" in result);
+});
+
+test("transformLlmConfig: reasoning_budget survives as underscore key", (t) => {
+  const config = makeConfig({ reasoning_budget: 0 });
+  const result = transformLlmConfig(config);
+  t.is(result["reasoning_budget"], "0", "reasoning_budget=0 must be forwarded as string '0'");
+});
+
+test("transformLlmConfig: reasoning_budget=-1 survives", (t) => {
+  const config = makeConfig({ reasoning_budget: -1 });
+  const result = transformLlmConfig(config);
+  t.is(result["reasoning_budget"], "-1");
+});
+
+test("transformLlmConfig: stop_sequences is renamed to reverse_prompt", (t) => {
+  const config = makeConfig({ stop_sequences: ["</s>", "<|im_end|>"] });
+  const result = transformLlmConfig(config);
+  t.absent("stop_sequences" in result);
+  t.is(result["reverse_prompt"], "</s>, <|im_end|>");
+});
+
+test("transformLlmConfig: numeric fields are stringified", (t) => {
+  const config = makeConfig({ ctx_size: 4096, gpu_layers: 99, temp: 0.7 });
+  const result = transformLlmConfig(config);
+  t.is(result["ctx_size"], "4096");
+  t.is(result["gpu_layers"], "99");
+  t.is(result["temp"], "0.7");
+});
diff --git a/packages/sdk/test/unit/tool-parser.test.ts b/packages/sdk/test/unit/tool-parser.test.ts
index 2526bdaae8..1c03a54f2a 100644
--- a/packages/sdk/test/unit/tool-parser.test.ts
+++ b/packages/sdk/test/unit/tool-parser.test.ts
@@ -6,6 +6,8 @@ import {
   detectToolDialectFromName,
 } from "@/server/utils/tools";
 import { parseHarmonyFormat } from "@/server/utils/tools/parsers/harmony";
+import { parseQwen35Format } from "@/server/utils/tools/parsers/qwen35";
+import { parseGemma4NativeFormat } from "@/server/utils/tools/parsers/gemma4native";
 const weatherTool: Tool = {
   type: "function",
   name: "weather",
@@ -268,6 +270,18 @@ test("detectToolDialectFromName: non-LFM models default to hermes", (t) => {
     [undefined, "/cache/abc_Llama-3.3-70B-Instruct-Tool-Calling.gguf"],
     [undefined, ""],
     ["", ""],
+    // Gemma 3 models (including 4B size variant) must not be detected as Gemma 4
+    [undefined, "/cache/abc_gemma3-Q4_K_M.gguf"],
+    ["GEMMA3_Q4", "/Users/x/.qvac/models/abc_gemma-3-4b-q4_k_m.gguf"],
+    // Qwen3 5B (5 billion params) must not be mistaken for Qwen3.5 (model version 3.5)
+    [undefined, "/cache/abc_Qwen3-5B-Instruct-Q4_K_M.gguf"],
+    ["QWEN3_5B_INST", "/Users/x/.qvac/models/abc_qwen3-5b-instruct.gguf"],
+    [undefined, "/cache/abc_Qwen3-50B-Instruct-Q4_K_M.gguf"],
+    ["QWEN3_50B_INST", "/Users/x/.qvac/models/abc_qwen3-50b-instruct.gguf"],
+    [undefined, "/cache/abc_Qwen3-60B-Instruct-Q4_K_M.gguf"],
+    ["QWEN3_60B_INST", "/Users/x/.qvac/models/abc_qwen3-60b-instruct.gguf"],
+    // gemma-40b contains 'gemma-4' as a substring but the trailing '0' (digit) blocks the gemma4 lookahead
+    [undefined, "/cache/abc_gemma-40b-Q4_K_M.gguf"],
   ];
 
   for (const [name, path] of cases) {
@@ -601,3 +615,493 @@ test("detectToolDialectFromName: GPT-OSS variants → harmony", (t) => {
     t.is(detectToolDialectFromName(name, path), "harmony", `name=${name} path=${path}`);
   }
 });
+
+test("detectToolDialectFromName: Qwen3.5 variants → qwen35", (t) => {
+  const cases: Array<[string | undefined, string]> = [
+    [undefined, "/cache/abc_Qwen3.5-7B-Instruct-Q4_K_M.gguf"],
+    ["QWEN3_5_7B_INST_Q4", "/Users/x/.qvac/models/abc_qwen3.5-7b-instruct.gguf"],
+    [undefined, "/cache/abc_qwen3-5-7b.gguf"],
+    // Qwen3.6 shares the same Pythonic-XML tool-call format as Qwen3.5
+    [undefined, "/cache/abc_Qwen3.6-7B-Instruct-Q4_K_M.gguf"],
+    ["QWEN3_6_7B_INST", "/Users/x/.qvac/models/abc_qwen3.6-7b-instruct.gguf"],
+  ];
+
+  for (const [name, path] of cases) {
+    t.is(detectToolDialectFromName(name, path), "qwen35", `name=${name} path=${path}`);
+  }
+});
+
+test("detectToolDialectFromName: Gemma 4 variants → gemma4", (t) => {
+  const cases: Array<[string | undefined, string]> = [
+    [undefined, "/cache/abc_gemma4-9b-it-Q4_K_M.gguf"],
+    ["GEMMA4_27B_IT_Q4", "/Users/x/.qvac/models/abc_gemma-4-27b-it.gguf"],
+    [undefined, "/cache/abc_gemma4-27b.gguf"],
+  ];
+
+  for (const [name, path] of cases) {
+    t.is(detectToolDialectFromName(name, path), "gemma4", `name=${name} path=${path}`);
+  }
+});
+
+test("parseQwen35Format: single function call with parameters", (t) => {
+  const text = `<tool_call>
+<function=get_weather>
+<parameter=city>Paris</parameter>
+<parameter=unit>celsius</parameter>
+</function>
+</tool_call>`;
+  const result = parseQwen35Format(text, pythonicTools);
+  t.is(result.matched, true);
+  t.is(result.toolCalls.length, 1);
+  t.is(result.toolCalls[0]?.name, "get_weather");
+  t.alike(result.toolCalls[0]?.arguments, { city: "Paris", unit: "celsius" });
+  t.is(result.errors.length, 0);
+});
+
+test("parseQwen35Format: no tool_call markers → matched=false", (t) => {
+  const result = parseQwen35Format("No tool call here.", pythonicTools);
+  t.is(result.matched, false);
+  t.is(result.toolCalls.length, 0);
+});
+
+test("parseQwen35Format: missing function tag → PARSE_ERROR", (t) => {
+  const text = `<tool_call>some plain content</tool_call>`;
+  const result = parseQwen35Format(text, pythonicTools);
+  t.is(result.matched, true);
+  t.is(result.toolCalls.length, 0);
+  t.is(result.errors.length, 1);
+  t.is(result.errors[0]?.code, "PARSE_ERROR");
+});
+
+test("parseToolCalls(dialect=qwen35): parses Qwen3.5 XML format", (t) => {
+  const text = `<tool_call><function=get_weather><parameter=city>Tokyo</parameter></function></tool_call>`;
+  const { toolCalls, errors } = parseToolCalls(text, pythonicTools, "qwen35");
+  t.is(errors.length, 0);
+  t.is(toolCalls.length, 1);
+  t.is(toolCalls[0]?.name, "get_weather");
+  t.alike(toolCalls[0]?.arguments, { city: "Tokyo" });
+});
+
+test("parseGemma4NativeFormat: single call with string values", (t) => {
+  const text = `<|tool_call>call:get_weather{city:<|"|>Paris<|"|>,country:<|"|>FR<|"|>}<tool_call|>`;
+  const result = parseGemma4NativeFormat(text, pythonicTools);
+  t.is(result.matched, true);
+  t.is(result.toolCalls.length, 1);
+  t.is(result.toolCalls[0]?.name, "get_weather");
+  t.alike(result.toolCalls[0]?.arguments, { city: "Paris", country: "FR" });
+  t.is(result.errors.length, 0);
+});
+
+test("parseGemma4NativeFormat: no open marker → matched=false", (t) => {
+  const result = parseGemma4NativeFormat("No gemma call here.", pythonicTools);
+  t.is(result.matched, false);
+  t.is(result.toolCalls.length, 0);
+});
+
+test("parseGemma4NativeFormat: multiline string value is parsed correctly", (t) => {
+  const text = `<|tool_call>call:get_weather{city:<|"|>line1\nline2<|"|>}<tool_call|>`;
+  const result = parseGemma4NativeFormat(text, pythonicTools);
+  t.is(result.matched, true);
+  t.is(result.errors.length, 0);
+  t.is(result.toolCalls.length, 1);
+  t.is(result.toolCalls[0]?.arguments?.city, "line1\nline2");
+});
+
+test("parseToolCalls(dialect=gemma4): parses Gemma4 native format", (t) => {
+  const text = `<|tool_call>call:get_weather{city:<|"|>Berlin<|"|>}<tool_call|>`;
+  const { toolCalls, errors } = parseToolCalls(text, pythonicTools, "gemma4");
+  t.is(errors.length, 0);
+  t.is(toolCalls.length, 1);
+  t.is(toolCalls[0]?.name, "get_weather");
+  t.alike(toolCalls[0]?.arguments, { city: "Berlin" });
+});
+
+// --- qwen35 coercion and error-surface tests ---
+
+test("parseQwen35Format: integer param is coerced to number", (t) => {
+  const typedTool: Tool = {
+    type: "function",
+    name: "typed",
+    description: "typed",
+    parameters: {
+      type: "object",
+      properties: { count: { type: "integer" }, label: { type: "string" } },
+      required: ["count"],
+    },
+  };
+  const text = `<tool_call><function=typed><parameter=count>42</parameter><parameter=label>hello</parameter></function></tool_call>`;
+  const result = parseQwen35Format(text, [typedTool]);
+  t.is(result.matched, true);
+  t.is(result.errors.length, 0);
+  t.is(result.toolCalls.length, 1);
+  t.is(result.toolCalls[0]?.arguments?.count, 42);
+  t.is(result.toolCalls[0]?.arguments?.label, "hello");
+});
+
+test("parseQwen35Format: boolean param 'true' coerces to true", (t) => {
+  const typedTool: Tool = {
+    type: "function",
+    name: "typed",
+    description: "typed",
+    parameters: {
+      type: "object",
+      properties: { count: { type: "integer" }, flag: { type: "boolean" } },
+      required: ["count"],
+    },
+  };
+  const text = `<tool_call><function=typed><parameter=count>1</parameter><parameter=flag>true</parameter></function></tool_call>`;
+  const result = parseQwen35Format(text, [typedTool]);
+  t.is(result.matched, true);
+  t.is(result.errors.length, 0);
+  t.is(result.toolCalls[0]?.arguments?.flag, true);
+});
+
+test("parseQwen35Format: boolean param 'false' coerces to false", (t) => {
+  const typedTool: Tool = {
+    type: "function",
+    name: "typed",
+    description: "typed",
+    parameters: {
+      type: "object",
+      properties: { count: { type: "integer" }, flag: { type: "boolean" } },
+      required: ["count"],
+    },
+  };
+  const text = `<tool_call><function=typed><parameter=count>1</parameter><parameter=flag>false</parameter></function></tool_call>`;
+  const result = parseQwen35Format(text, [typedTool]);
+  t.is(result.matched, true);
+  t.is(result.errors.length, 0);
+  t.is(result.toolCalls[0]?.arguments?.flag, false);
+});
+
+test("parseQwen35Format: boolean param 'True' (uppercase) surfaces PARSE_ERROR", (t) => {
+  const typedTool: Tool = {
+    type: "function",
+    name: "typed",
+    description: "typed",
+    parameters: {
+      type: "object",
+      properties: { count: { type: "integer" }, flag: { type: "boolean" } },
+      required: ["count"],
+    },
+  };
+  const text = `<tool_call><function=typed><parameter=count>1</parameter><parameter=flag>True</parameter></function></tool_call>`;
+  const result = parseQwen35Format(text, [typedTool]);
+  t.is(result.matched, true);
+  t.is(result.toolCalls.length, 0);
+  t.is(result.errors.length, 1);
+  t.is(result.errors[0]?.code, "PARSE_ERROR");
+});
+
+test("parseQwen35Format: integer param 'not-a-number' surfaces PARSE_ERROR", (t) => {
+  const typedTool: Tool = {
+    type: "function",
+    name: "typed",
+    description: "typed",
+    parameters: {
+      type: "object",
+      properties: { count: { type: "integer" } },
+      required: ["count"],
+    },
+  };
+  const text = `<tool_call><function=typed><parameter=count>not-a-number</parameter></function></tool_call>`;
+  const result = parseQwen35Format(text, [typedTool]);
+  t.is(result.matched, true);
+  t.is(result.toolCalls.length, 0);
+  t.is(result.errors.length, 1);
+  t.is(result.errors[0]?.code, "PARSE_ERROR");
+});
+
+test("parseQwen35Format: integer param '1.5' (non-integer) surfaces PARSE_ERROR", (t) => {
+  const typedTool: Tool = {
+    type: "function",
+    name: "typed",
+    description: "typed",
+    parameters: {
+      type: "object",
+      properties: { count: { type: "integer" } },
+      required: ["count"],
+    },
+  };
+  const text = `<tool_call><function=typed><parameter=count>1.5</parameter></function></tool_call>`;
+  const result = parseQwen35Format(text, [typedTool]);
+  t.is(result.matched, true);
+  t.is(result.toolCalls.length, 0);
+  t.is(result.errors.length, 1);
+  t.is(result.errors[0]?.code, "PARSE_ERROR");
+});
+
+test("parseQwen35Format: malformed array param surfaces PARSE_ERROR (no raw-string fallback)", (t) => {
+  const typedTool: Tool = {
+    type: "function",
+    name: "typed",
+    description: "typed",
+    parameters: {
+      type: "object",
+      properties: { count: { type: "integer" }, tags: { type: "array" } },
+      required: ["count"],
+    },
+  };
+  const text = `<tool_call><function=typed><parameter=count>1</parameter><parameter=tags>[1,2</parameter></function></tool_call>`;
+  const result = parseQwen35Format(text, [typedTool]);
+  t.is(result.matched, true);
+  t.is(result.toolCalls.length, 0);
+  t.is(result.errors.length, 1);
+  t.is(result.errors[0]?.code, "PARSE_ERROR");
+});
+
+test("parseQwen35Format: malformed object param surfaces PARSE_ERROR (no raw-string fallback)", (t) => {
+  const typedTool: Tool = {
+    type: "function",
+    name: "typed",
+    description: "typed",
+    parameters: {
+      type: "object",
+      properties: { count: { type: "integer" }, meta: { type: "object" } },
+      required: ["count"],
+    },
+  };
+  const text = `<tool_call><function=typed><parameter=count>1</parameter><parameter=meta>{bad json</parameter></function></tool_call>`;
+  const result = parseQwen35Format(text, [typedTool]);
+  t.is(result.matched, true);
+  t.is(result.toolCalls.length, 0);
+  t.is(result.errors.length, 1);
+  t.is(result.errors[0]?.code, "PARSE_ERROR");
+});
+
+test("parseQwen35Format: array param is parsed from JSON", (t) => {
+  const typedTool: Tool = {
+    type: "function",
+    name: "typed",
+    description: "typed",
+    parameters: {
+      type: "object",
+      properties: { count: { type: "integer" }, tags: { type: "array" } },
+      required: ["count"],
+    },
+  };
+  const text = `<tool_call><function=typed><parameter=count>1</parameter><parameter=tags>["a","b","c"]</parameter></function></tool_call>`;
+  const result = parseQwen35Format(text, [typedTool]);
+  t.is(result.matched, true);
+  t.is(result.errors.length, 0);
+  t.alike(result.toolCalls[0]?.arguments?.tags, ["a", "b", "c"]);
+});
+
+test("parseQwen35Format: multiple tool calls are all parsed", (t) => {
+  const text = `<tool_call><function=get_weather><parameter=city>Paris</parameter></function></tool_call>
+<tool_call><function=get_horoscope><parameter=sign>Aries</parameter></function></tool_call>`;
+  const result = parseQwen35Format(text, pythonicTools);
+  t.is(result.matched, true);
+  t.is(result.errors.length, 0);
+  t.is(result.toolCalls.length, 2);
+  t.is(result.toolCalls[0]?.name, "get_weather");
+  t.is(result.toolCalls[1]?.name, "get_horoscope");
+});
+
+test("parseQwen35Format: unknown tool name surfaces UNKNOWN_TOOL", (t) => {
+  const text = `<tool_call><function=unknown_fn><parameter=x>1</parameter></function></tool_call>`;
+  const result = parseQwen35Format(text, pythonicTools);
+  t.is(result.matched, true);
+  t.is(result.toolCalls.length, 0);
+  t.is(result.errors.length, 1);
+  t.is(result.errors[0]?.code, "UNKNOWN_TOOL");
+});
+
+test("parseQwen35Format: missing required param surfaces VALIDATION_ERROR", (t) => {
+  const text = `<tool_call><function=get_weather><parameter=country>FR</parameter></function></tool_call>`;
+  const result = parseQwen35Format(text, pythonicTools);
+  t.is(result.matched, true);
+  t.is(result.toolCalls.length, 0);
+  t.is(result.errors.length, 1);
+  t.is(result.errors[0]?.code, "VALIDATION_ERROR");
+});
+
+test("parseToolCalls(dialect=qwen35): JSON inside tool_call falls through to hermes parser", (t) => {
+  const text = `<tool_call>
+{"name": "get_weather", "arguments": {"city": "Seoul"}}
+</tool_call>`;
+  const { toolCalls, errors } = parseToolCalls(text, pythonicTools, "qwen35");
+  t.is(errors.length, 0);
+  t.is(toolCalls.length, 1);
+  t.is(toolCalls[0]?.name, "get_weather");
+  t.alike(toolCalls[0]?.arguments, { city: "Seoul" });
+});
+
+// --- gemma4 structural and error-surface tests ---
+
+test("parseGemma4NativeFormat: bare numeric arg is parsed as number", (t) => {
+  const numTool: Tool = {
+    type: "function",
+    name: "typed",
+    description: "typed",
+    parameters: { type: "object", properties: { count: { type: "integer" } }, required: ["count"] },
+  };
+  const text = `<|tool_call>call:typed{count:7}<tool_call|>`;
+  const result = parseGemma4NativeFormat(text, [numTool]);
+  t.is(result.matched, true);
+  t.is(result.errors.length, 0);
+  t.is(result.toolCalls[0]?.arguments?.count, 7);
+});
+
+test("parseGemma4NativeFormat: bare boolean arg is parsed as boolean", (t) => {
+  const boolTool: Tool = {
+    type: "function",
+    name: "toggle",
+    description: "toggle",
+    parameters: { type: "object", properties: { enabled: { type: "boolean" } }, required: ["enabled"] },
+  };
+  const text = `<|tool_call>call:toggle{enabled:true}<tool_call|>`;
+  const result = parseGemma4NativeFormat(text, [boolTool]);
+  t.is(result.matched, true);
+  t.is(result.errors.length, 0);
+  t.is(result.toolCalls[0]?.arguments?.enabled, true);
+});
+
+test("parseGemma4NativeFormat: nested object arg is parsed correctly", (t) => {
+  const searchTool: Tool = {
+    type: "function",
+    name: "search",
+    description: "search",
+    parameters: {
+      type: "object",
+      properties: { query: { type: "string" }, filters: { type: "object" } },
+      required: ["query"],
+    },
+  };
+  const text = `<|tool_call>call:search{query:<|"|>test<|"|>,filters:{active:true,limit:10}}<tool_call|>`;
+  const result = parseGemma4NativeFormat(text, [searchTool]);
+  t.is(result.matched, true);
+  t.is(result.errors.length, 0);
+  t.alike(result.toolCalls[0]?.arguments?.filters, { active: true, limit: 10 });
+  t.is(result.toolCalls[0]?.arguments?.query, "test");
+});
+
+test("parseGemma4NativeFormat: nested array arg is parsed correctly", (t) => {
+  const arrayTool: Tool = {
+    type: "function",
+    name: "get_weather",
+    description: "weather",
+    parameters: {
+      type: "object",
+      properties: { city: { type: "string" }, ids: { type: "array" } },
+      required: ["city"],
+    },
+  };
+  const text = `<|tool_call>call:get_weather{city:<|"|>Paris<|"|>,ids:[1,2,3]}<tool_call|>`;
+  const result = parseGemma4NativeFormat(text, [arrayTool]);
+  t.is(result.matched, true);
+  t.is(result.errors.length, 0);
+  t.alike(result.toolCalls[0]?.arguments?.ids, [1, 2, 3]);
+});
+
+test("parseGemma4NativeFormat: tab char in string value round-trips correctly", (t) => {
+  const text = `<|tool_call>call:get_weather{city:<|"|>col1\tcol2<|"|>}<tool_call|>`;
+  const result = parseGemma4NativeFormat(text, pythonicTools);
+  t.is(result.matched, true);
+  t.is(result.errors.length, 0);
+  t.is(result.toolCalls[0]?.arguments?.city, "col1\tcol2");
+});
+
+test("parseGemma4NativeFormat: CR char in string value round-trips correctly", (t) => {
+  const text = `<|tool_call>call:get_weather{city:<|"|>line1\rline2<|"|>}<tool_call|>`;
+  const result = parseGemma4NativeFormat(text, pythonicTools);
+  t.is(result.matched, true);
+  t.is(result.errors.length, 0);
+  t.is(result.toolCalls[0]?.arguments?.city, "line1\rline2");
+});
+
+test("parseGemma4NativeFormat: multiple tool calls are all parsed", (t) => {
+  const text = `<|tool_call>call:get_weather{city:<|"|>London<|"|>}<tool_call|>
+<|tool_call>call:get_horoscope{sign:<|"|>Leo<|"|>}<tool_call|>`;
+  const result = parseGemma4NativeFormat(text, pythonicTools);
+  t.is(result.matched, true);
+  t.is(result.errors.length, 0);
+  t.is(result.toolCalls.length, 2);
+  t.is(result.toolCalls[0]?.name, "get_weather");
+  t.is(result.toolCalls[1]?.name, "get_horoscope");
+});
+
+test("parseGemma4NativeFormat: unknown tool name surfaces UNKNOWN_TOOL", (t) => {
+  const text = `<|tool_call>call:unknown_fn{x:<|"|>y<|"|>}<tool_call|>`;
+  const result = parseGemma4NativeFormat(text, pythonicTools);
+  t.is(result.matched, true);
+  t.is(result.toolCalls.length, 0);
+  t.is(result.errors.length, 1);
+  t.is(result.errors[0]?.code, "UNKNOWN_TOOL");
+});
+
+test("parseGemma4NativeFormat: malformed args (trailing comma) surface PARSE_ERROR", (t) => {
+  const text = `<|tool_call>call:get_weather{city:<|"|>Paris<|"|>,}<tool_call|>`;
+  const result = parseGemma4NativeFormat(text, pythonicTools);
+  t.is(result.matched, true);
+  t.is(result.toolCalls.length, 0);
+  t.is(result.errors.length, 1);
+  t.is(result.errors[0]?.code, "PARSE_ERROR");
+});
+
+test("parseQwen35Format: empty integer param surfaces PARSE_ERROR (not 0)", (t) => {
+  const typedTool: Tool = {
+    type: "function",
+    name: "typed",
+    description: "typed",
+    parameters: {
+      type: "object",
+      properties: { count: { type: "integer" } },
+      required: ["count"],
+    },
+  };
+  const text = `<tool_call><function=typed><parameter=count></parameter></function></tool_call>`;
+  const result = parseQwen35Format(text, [typedTool]);
+  t.is(result.matched, true);
+  t.is(result.toolCalls.length, 0);
+  t.is(result.errors.length, 1);
+  t.is(result.errors[0]?.code, "PARSE_ERROR");
+});
+
+test("parseQwen35Format: whitespace-only number param surfaces PARSE_ERROR (not 0)", (t) => {
+  const typedTool: Tool = {
+    type: "function",
+    name: "typed",
+    description: "typed",
+    parameters: {
+      type: "object",
+      properties: { score: { type: "number" } },
+      required: ["score"],
+    },
+  };
+  const text = `<tool_call><function=typed><parameter=score>   </parameter></function></tool_call>`;
+  const result = parseQwen35Format(text, [typedTool]);
+  t.is(result.matched, true);
+  t.is(result.toolCalls.length, 0);
+  t.is(result.errors.length, 1);
+  t.is(result.errors[0]?.code, "PARSE_ERROR");
+});
+
+test("parseGemma4NativeFormat: hyphenated tool name parses correctly", (t) => {
+  const hyphenTool: Tool = {
+    type: "function",
+    name: "get-weather",
+    description: "Get current weather",
+    parameters: {
+      type: "object",
+      properties: { city: { type: "string" } },
+      required: ["city"],
+    },
+  };
+  const text = `<|tool_call>call:get-weather{city:<|"|>Tokyo<|"|>}<tool_call|>`;
+  const result = parseGemma4NativeFormat(text, [hyphenTool]);
+  t.is(result.matched, true);
+  t.is(result.errors.length, 0);
+  t.is(result.toolCalls.length, 1);
+  t.is(result.toolCalls[0]?.name, "get-weather");
+  t.alike(result.toolCalls[0]?.arguments, { city: "Tokyo" });
+});
+
+test("parseToolCalls(default): Qwen3.5 XML format is recovered without explicit dialect", (t) => {
+  const text = `<tool_call><function=get_weather><parameter=city>Berlin</parameter></function></tool_call>`;
+  const { toolCalls, errors } = parseToolCalls(text, pythonicTools);
+  t.is(errors.length, 0);
+  t.is(toolCalls.length, 1);
+  t.is(toolCalls[0]?.name, "get_weather");
+  t.alike(toolCalls[0]?.arguments, { city: "Berlin" });
+});
diff --git a/packages/sdk/tests-qvac/tests/completion-tests.ts b/packages/sdk/tests-qvac/tests/completion-tests.ts
index d64d44040a..d7a0c0e540 100644
--- a/packages/sdk/tests-qvac/tests/completion-tests.ts
+++ b/packages/sdk/tests-qvac/tests/completion-tests.ts
@@ -607,6 +607,28 @@ export const completionResponseFormatJsonSchema: TestDefinition = {
   metadata: { category: "completion", dependency: "llm", estimatedDurationMs: 20000 },
 };
 
+export const completionReasoningBudgetDisabled: TestDefinition = {
+  testId: "completion-reasoning-budget-disabled",
+  params: {
+    history: [{ role: "user", content: "What is 2+2? Answer with only the number." }],
+    stream: false,
+    generationParams: { reasoning_budget: 0, predict: 32 },
+  },
+  expectation: { validation: "type", expectedType: "string" },
+  metadata: { category: "completion", dependency: "llm", estimatedDurationMs: 10000 },
+};
+
+export const completionReasoningBudgetUnrestricted: TestDefinition = {
+  testId: "completion-reasoning-budget-unrestricted",
+  params: {
+    history: [{ role: "user", content: "What is 2+2? Answer with only the number." }],
+    stream: false,
+    generationParams: { reasoning_budget: -1, predict: 32 },
+  },
+  expectation: { validation: "type", expectedType: "string" },
+  metadata: { category: "completion", dependency: "llm", estimatedDurationMs: 10000 },
+};
+
 export const completionResponseFormatWithToolsRejected: TestDefinition = {
   testId: "completion-response-format-with-tools-rejected",
   params: {
@@ -677,4 +699,6 @@ export const completionTests = [
   completionResponseFormatJsonObjectStreaming,
   completionResponseFormatJsonSchema,
   completionResponseFormatWithToolsRejected,
+  completionReasoningBudgetDisabled,
+  completionReasoningBudgetUnrestricted,
 ];
diff --git a/packages/sdk/tests-qvac/tests/shared/executors/completion-executor.ts b/packages/sdk/tests-qvac/tests/shared/executors/completion-executor.ts
index 9457fcaf8f..6204b35a94 100644
--- a/packages/sdk/tests-qvac/tests/shared/executors/completion-executor.ts
+++ b/packages/sdk/tests-qvac/tests/shared/executors/completion-executor.ts
@@ -29,6 +29,7 @@ interface GenerationParams {
   frequency_penalty?: number;
   presence_penalty?: number;
   repeat_penalty?: number;
+  reasoning_budget?: -1 | 0;
 }
 
 interface CompletionTestParams {
diff --git a/packages/sdk/tests-qvac/tests/shared/executors/tools-executor.ts b/packages/sdk/tests-qvac/tests/shared/executors/tools-executor.ts
index be55a9a0ef..35cf887a1f 100644
--- a/packages/sdk/tests-qvac/tests/shared/executors/tools-executor.ts
+++ b/packages/sdk/tests-qvac/tests/shared/executors/tools-executor.ts
@@ -1,4 +1,5 @@
 import { completion } from "@qvac/sdk";
+import type { ToolDialect } from "@qvac/sdk";
 import {
   ValidationHelpers,
   type TestResult,
@@ -24,9 +25,11 @@ export class ToolsExecutor extends AbstractModelExecutor<typeof toolsTests> {
         parameters: Record<string, unknown>;
       }>;
       toolsMode?: "static" | "dynamic";
+      toolDialect?: ToolDialect;
+      resourceKey?: string;
       stream?: boolean;
     };
-    const resourceKey = p.toolsMode === "dynamic" ? "tools-dynamic" : "tools";
+    const resourceKey = p.resourceKey ?? (p.toolsMode === "dynamic" ? "tools-dynamic" : "tools");
     const toolsModelId = await this.resources.ensureLoaded(resourceKey);
 
     try {
@@ -35,6 +38,7 @@ export class ToolsExecutor extends AbstractModelExecutor<typeof toolsTests> {
         history: p.history,
         tools: p.tools as never,
         stream: p.stream ?? false,
+        ...(p.toolDialect && { toolDialect: p.toolDialect }),
       });
 
       const text = await result.text;
diff --git a/packages/sdk/tests-qvac/tests/tools-tests.ts b/packages/sdk/tests-qvac/tests/tools-tests.ts
index 271c3d62a5..72bac4b335 100644
--- a/packages/sdk/tests-qvac/tests/tools-tests.ts
+++ b/packages/sdk/tests-qvac/tests/tools-tests.ts
@@ -1,5 +1,6 @@
 // Tools/Function calling test definitions
 import type { TestDefinition } from "@tetherto/qvac-test-suite";
+import type { ToolDialect } from "@qvac/sdk";
 
 // Helper for creating tools tests
 const createToolsTest = (
@@ -21,6 +22,8 @@ const createToolsTest = (
       expectedType: "string" | "number" | "array";
     };
     toolsMode?: "static" | "dynamic";
+    toolDialect?: ToolDialect;
+    resourceKey?: string;
     suites?: string[];
   } = {},
 ): TestDefinition => {
@@ -29,7 +32,7 @@ const createToolsTest = (
     expectedType: "string" as const,
   };
   const dependency =
-    options.toolsMode === "dynamic" ? "tools-dynamic" : "tools";
+    options.resourceKey ?? (options.toolsMode === "dynamic" ? "tools-dynamic" : "tools");
   return {
     testId,
     params: {
@@ -37,6 +40,8 @@ const createToolsTest = (
       tools,
       stream: false,
       ...(options.toolsMode && { toolsMode: options.toolsMode }),
+      ...(options.toolDialect && { toolDialect: options.toolDialect }),
+      ...(options.resourceKey && { resourceKey: options.resourceKey }),
     },
     expectation,
     ...(options.suites && { suites: options.suites }),