From 7d3ef347cd35342bd405be33bfe8ce59ee101b4a Mon Sep 17 00:00:00 2001 From: Yangmin Li Date: Fri, 24 Apr 2026 17:53:00 -0700 Subject: [PATCH] fix(sa-bench): auto-fallback when tokenizer has no chat template Models like DeepSeek-V4 ship no Hugging Face chat template; rendering lives entirely inside the engine. With the default `use_chat_template: true` (introduced in #20) and no `custom_tokenizer` plugin, sa-bench called `tokenizer.apply_chat_template(...)` directly and crashed with `ValueError: ... has no chat template`. Detect this case in `main()` after `get_tokenizer` returns: if `use_chat_template` is on but the tokenizer exposes neither `chat_template` nor `default_chat_template`, emit a loud warning and fall back to the raw-text path so the run completes. Users who care about exact token-count parity with the server are pointed at `custom_tokenizer` (e.g. SGLangDeepseekV4Tokenizer added in #73). Recipes that already set `custom_tokenizer` are unaffected. --- .../scripts/sa-bench/benchmark_serving.py | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/src/srtctl/benchmarks/scripts/sa-bench/benchmark_serving.py b/src/srtctl/benchmarks/scripts/sa-bench/benchmark_serving.py index ea6bae93..9fdf1659 100644 --- a/src/srtctl/benchmarks/scripts/sa-bench/benchmark_serving.py +++ b/src/srtctl/benchmarks/scripts/sa-bench/benchmark_serving.py @@ -840,6 +840,29 @@ def main(args: argparse.Namespace): custom_tokenizer=args.custom_tokenizer, ) + # Some models (e.g. DeepSeek-V4) ship NO Hugging Face chat template; the + # server-side rendering happens entirely inside the engine. If a user runs + # such a model without supplying a `custom_tokenizer` plugin, the default + # `use_chat_template=True` would cause `tokenizer.apply_chat_template(...)` + # to raise. Auto-fallback to raw-text mode and warn loudly so the run does + # not silently break. + if args.use_chat_template and not args.custom_tokenizer: + has_template = bool(getattr(tokenizer, "chat_template", None)) or bool( + getattr(tokenizer, "default_chat_template", None) + ) + if not has_template: + warnings.warn( + f"Tokenizer for '{tokenizer_id}' has no chat_template and no " + "`custom_tokenizer` was provided; disabling --use-chat-template " + "and benchmarking against the raw text path. Token counts on the " + "client may diverge from the server's #new-token. To match the " + "server exactly, set `custom_tokenizer` in the recipe (e.g. " + "`sa_bench_tokenizers.sglang_deepseek_v4.SGLangDeepseekV4Tokenizer` " + "for DeepSeek-V4).", + stacklevel=2, + ) + args.use_chat_template = False + if args.dataset_name == "custom": from benchmark_dataset import sample_custom_requests