From 7d3ef347cd35342bd405be33bfe8ce59ee101b4a Mon Sep 17 00:00:00 2001
From: Yangmin Li <yangminl@nvidia.com>
Date: Fri, 24 Apr 2026 17:53:00 -0700
Subject: [PATCH] fix(sa-bench): auto-fallback when tokenizer has no chat
 template

Models like DeepSeek-V4 ship no Hugging Face chat template; rendering
lives entirely inside the engine. With the default `use_chat_template:
true` (introduced in #20) and no `custom_tokenizer` plugin, sa-bench
called `tokenizer.apply_chat_template(...)` directly and crashed with
`ValueError: ... has no chat template`.

Detect this case in `main()` after `get_tokenizer` returns: if
`use_chat_template` is on but the tokenizer exposes neither
`chat_template` nor `default_chat_template`, emit a loud warning and
fall back to the raw-text path so the run completes. Users who care
about exact token-count parity with the server are pointed at
`custom_tokenizer` (e.g. SGLangDeepseekV4Tokenizer added in #73).

Recipes that already set `custom_tokenizer` are unaffected.
---
 .../scripts/sa-bench/benchmark_serving.py     | 23 +++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/src/srtctl/benchmarks/scripts/sa-bench/benchmark_serving.py b/src/srtctl/benchmarks/scripts/sa-bench/benchmark_serving.py
index ea6bae93..9fdf1659 100644
--- a/src/srtctl/benchmarks/scripts/sa-bench/benchmark_serving.py
+++ b/src/srtctl/benchmarks/scripts/sa-bench/benchmark_serving.py
@@ -840,6 +840,29 @@ def main(args: argparse.Namespace):
         custom_tokenizer=args.custom_tokenizer,
     )
 
+    # Some models (e.g. DeepSeek-V4) ship NO Hugging Face chat template; the
+    # server-side rendering happens entirely inside the engine. If a user runs
+    # such a model without supplying a `custom_tokenizer` plugin, the default
+    # `use_chat_template=True` would cause `tokenizer.apply_chat_template(...)`
+    # to raise. Auto-fallback to raw-text mode and warn loudly so the run does
+    # not silently break.
+    if args.use_chat_template and not args.custom_tokenizer:
+        has_template = bool(getattr(tokenizer, "chat_template", None)) or bool(
+            getattr(tokenizer, "default_chat_template", None)
+        )
+        if not has_template:
+            warnings.warn(
+                f"Tokenizer for '{tokenizer_id}' has no chat_template and no "
+                "`custom_tokenizer` was provided; disabling --use-chat-template "
+                "and benchmarking against the raw text path. Token counts on the "
+                "client may diverge from the server's #new-token. To match the "
+                "server exactly, set `custom_tokenizer` in the recipe (e.g. "
+                "`sa_bench_tokenizers.sglang_deepseek_v4.SGLangDeepseekV4Tokenizer` "
+                "for DeepSeek-V4).",
+                stacklevel=2,
+            )
+            args.use_chat_template = False
+
     if args.dataset_name == "custom":
         from benchmark_dataset import sample_custom_requests