From 810081048f49513d51fdc76f5e1c40c492656255 Mon Sep 17 00:00:00 2001 From: Piotr Wilkin Date: Wed, 25 Mar 2026 19:05:20 +0100 Subject: [PATCH 1/2] grammar: increase MAX_REPETITION_THRESHOLD + make it configurable via envvar --- src/llama-grammar.cpp | 18 +++++++++++++++--- src/llama-grammar.h | 5 ++++- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/src/llama-grammar.cpp b/src/llama-grammar.cpp index badcbfd0fbb6..9988b650b870 100644 --- a/src/llama-grammar.cpp +++ b/src/llama-grammar.cpp @@ -7,10 +7,22 @@ #include #include #include +#include #include #include -#define MAX_REPETITION_THRESHOLD 2000 +static constexpr uint64_t DEFAULT_MAX_REPETITION_THRESHOLD = 50000; + +llama_grammar_parser::llama_grammar_parser(const struct llama_vocab * vocab) + : vocab(vocab) { + const char * env = std::getenv("LLAMA_GRAMMAR_MAX_REPS"); + if (env) { + max_repetition_threshold = std::stoull(env); + } else { + max_repetition_threshold = DEFAULT_MAX_REPETITION_THRESHOLD; + } +} + // // helpers // @@ -491,7 +503,7 @@ const char * llama_grammar_parser::parse_sequence( total_rules = min_times; } - if (n_prev_rules * total_rules >= MAX_REPETITION_THRESHOLD) { + if (n_prev_rules * total_rules >= max_repetition_threshold) { throw std::runtime_error("number of rules that are going to be repeated multiplied by the new repetition exceeds sane defaults, please reduce the number of repetitions or rule complexity"); } @@ -649,7 +661,7 @@ const char * llama_grammar_parser::parse_sequence( throw std::runtime_error(std::string("expecting ',' at ") + pos); } bool has_max = max_times != UINT64_MAX; - if (min_times > MAX_REPETITION_THRESHOLD || (has_max && max_times > MAX_REPETITION_THRESHOLD)) { + if (min_times > max_repetition_threshold || (has_max && max_times > max_repetition_threshold)) { throw std::runtime_error(std::string("number of repetitions exceeds sane defaults, please reduce the number of repetitions")); } handle_repetitions(min_times, max_times); diff --git a/src/llama-grammar.h b/src/llama-grammar.h index b5a0e588e903..558351874e5a 100644 --- a/src/llama-grammar.h +++ b/src/llama-grammar.h @@ -89,7 +89,10 @@ struct llama_grammar_parser { llama_grammar_rules rules; - llama_grammar_parser(const struct llama_vocab * vocab = nullptr) : vocab(vocab) {} + uint64_t max_repetition_threshold; + + llama_grammar_parser(const struct llama_vocab * vocab = nullptr); + llama_grammar_stack c_rules() const; From ae274f19da0bb3a812ae768c608cb25c77f3df46 Mon Sep 17 00:00:00 2001 From: Piotr Wilkin Date: Wed, 25 Mar 2026 19:37:29 +0100 Subject: [PATCH 2/2] make repetition test more evil --- tests/test-grammar-parser.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test-grammar-parser.cpp b/tests/test-grammar-parser.cpp index 6abc43461be3..ed2b89daca84 100644 --- a/tests/test-grammar-parser.cpp +++ b/tests/test-grammar-parser.cpp @@ -146,7 +146,7 @@ int main() )"""); verify_failure(R"""( - root ::= (((((([^x]*){0,99}){0,99}){0,99}){0,99}){0,99}){0,99} + root ::= (((((([^x]*){0,999}){0,999}){0,999}){0,999}){0,999}){0,999} )"""); verify_failure(R"""(