+
+
+ Draft Tokens
+
+
+ Max MTP draft tokens per step
+ (--spec-draft-n-max). Lower = less wasted
+ draft decode; higher = bigger speedup when
+ acceptance stays high. Default: 2 on GPU,
+ 3 on CPU/Mac.
+
+
+
{
+ const raw = e.target.value;
+ if (raw === "") {
+ setSpecDraftNMax(null);
+ return;
+ }
+ const parsed = Number.parseInt(raw, 10);
+ if (Number.isFinite(parsed)) {
+ const clamped = Math.max(1, Math.min(16, parsed));
+ setSpecDraftNMax(clamped);
+ }
+ }}
+ data-test-id="spec-draft-n-max-input"
+ aria-label="Speculative decoding draft tokens"
+ className="h-7 w-[72px] rounded-[10px] border-transparent bg-black/[0.04] dark:bg-white/[0.05] hover:bg-black/[0.06] dark:hover:bg-white/[0.07] px-2 py-0 text-[13px] font-medium text-nav-fg outline-none focus-visible:ring-0"
+ />
+
+ )}
>
)}
{!isGguf && params.checkpoint && (
@@ -1051,6 +1119,7 @@ export function ChatSettingsPanel({
setCustomContextLength(null);
setKvCacheDtype(loadedKvCacheDtype);
setSpeculativeType(loadedSpeculativeType);
+ setSpecDraftNMax(loadedSpecDraftNMax);
setChatTemplateOverride(loadedChatTemplateOverride);
}}
className="h-7 px-3 text-[12px] font-medium tracking-nav text-muted-foreground"
diff --git a/studio/frontend/src/features/chat/hooks/use-chat-model-runtime.ts b/studio/frontend/src/features/chat/hooks/use-chat-model-runtime.ts
index 03f80c19f2..3f1060edf7 100644
--- a/studio/frontend/src/features/chat/hooks/use-chat-model-runtime.ts
+++ b/studio/frontend/src/features/chat/hooks/use-chat-model-runtime.ts
@@ -141,10 +141,30 @@ function getTrustRemoteCodeRequiredMessage(modelName: string): string {
return `${modelName} needs custom code enabled to load. Turn on "Enable custom code" in Chat Settings, then try again.`;
}
+// Canonicalises any value the backend reports (or persisted state holds)
+// onto the five UI-facing modes the Speculative Decoding dropdown
+// understands: "auto" / "mtp" / "ngram" / "mtp+ngram" / "off" / null.
+// Mirrors backend _canonicalize_spec_mode so old persisted "default" /
+// "draft-mtp" / "ngram-mod" / chain values round-trip cleanly.
function normalizeSpeculativeType(v: string | null | undefined): string | null {
if (v == null) return null;
- if (v === "default" || v === "off") return v;
- return "default";
+ const s = String(v).trim().toLowerCase();
+ if (!s) return null;
+ if (s === "auto" || s === "default") return "auto";
+ if (s === "off") return "off";
+ if (s === "ngram-simple") return "ngram-simple";
+ if (s === "mtp" || s === "draft-mtp") return "mtp";
+ if (s === "ngram" || s === "ngram-mod") return "ngram";
+ if (s === "mtp+ngram") return "mtp+ngram";
+ // Comma-chained legacy values (e.g. from older persisted state).
+ const parts = s.split(",").map((p) => p.trim()).filter(Boolean);
+ const hasMtp = parts.some((p) => p === "mtp" || p === "draft-mtp");
+ const hasNgram = parts.some((p) => p === "ngram" || p === "ngram-mod");
+ if (hasMtp && hasNgram) return "mtp+ngram";
+ if (hasMtp) return "mtp";
+ if (hasNgram) return "ngram";
+ // Unknown -> safe fallback to Auto so the dropdown stays controlled.
+ return "auto";
}
type LocalReasoningEffort = Extract