diff --git a/docs/advanced_features/server_arguments.md b/docs/advanced_features/server_arguments.md index 17935a400d87..e36b49a54809 100644 --- a/docs/advanced_features/server_arguments.md +++ b/docs/advanced_features/server_arguments.md @@ -295,12 +295,10 @@ Please consult the documentation below and [server_args.py](https://github.com/s ## Ngram speculative decoding | Argument | Description | Defaults | Options | | --- | --- | --- | --- | -| `--speculative-ngram-min-match-window-size` | The minimum window size for pattern matching in ngram speculative decoding. | `1` | Type: int | -| `--speculative-ngram-max-match-window-size` | The maximum window size for pattern matching in ngram speculative decoding. | `12` | Type: int | | `--speculative-ngram-min-bfs-breadth` | The minimum breadth for BFS (Breadth-First Search) in ngram speculative decoding. | `1` | Type: int | | `--speculative-ngram-max-bfs-breadth` | The maximum breadth for BFS (Breadth-First Search) in ngram speculative decoding. | `10` | Type: int | | `--speculative-ngram-match-type` | Ngram tree-building mode. `BFS` selects recency-based expansion and `PROB` selects frequency-based expansion. This setting is forwarded to the ngram cache implementation. | `BFS` | `BFS`, `PROB` | -| `--speculative-ngram-max-trie-depth` | The max trie depth for ngram speculative decoding. | `18` | Type: int | +| `--speculative-ngram-max-trie-depth` | Maximum suffix length stored and matched by the ngram trie. | `18` | Type: int | | `--speculative-ngram-capacity` | The cache capacity for ngram speculative decoding. | `10000000` | Type: int | ## Multi-layer Eagle speculative decoding diff --git a/docs/advanced_features/speculative_decoding.md b/docs/advanced_features/speculative_decoding.md index c573af0724a8..b8fe2d890cc4 100644 --- a/docs/advanced_features/speculative_decoding.md +++ b/docs/advanced_features/speculative_decoding.md @@ -387,13 +387,11 @@ Enable it with: | Parameter | Description | Default | |---|---|---| -| `--speculative-num-draft-tokens` | Number of draft tokens verified per step. If omitted, defaults to `--speculative-ngram-max-match-window-size`. | `12` (with default ngram settings) | -| `--speculative-ngram-min-match-window-size` | Minimum matching window size. | `1` | -| `--speculative-ngram-max-match-window-size` | Maximum matching window size. | `12` | +| `--speculative-num-draft-tokens` | Number of draft tokens verified per step. If omitted, defaults to `min(--speculative-ngram-max-trie-depth, 12)`. | `12` (with default ngram settings) | | `--speculative-ngram-min-bfs-breadth` | Minimum BFS breadth. | `1` | | `--speculative-ngram-max-bfs-breadth` | Maximum BFS breadth. | `10` | | `--speculative-ngram-match-type` | Ngram tree-building mode: `"BFS"` for recency-based expansion or `"PROB"` for frequency-based expansion. | `"BFS"` | -| `--speculative-ngram-max-trie-depth` | The max trie depth for ngram speculative decoding. | `18` | +| `--speculative-ngram-max-trie-depth` | Maximum suffix length stored and matched by the ngram trie. | `18` | | `--speculative-ngram-capacity` | Cache capacity (number of entries). | `10,000,000` | Notes: @@ -408,7 +406,6 @@ python3 -m sglang.launch_server \ --model Qwen/Qwen2.5-7B-Instruct \ --speculative-algorithm NGRAM \ --speculative-num-draft-tokens 16 \ - --speculative-ngram-max-match-window-size 12 \ --speculative-ngram-max-bfs-breadth 10 \ --mem-fraction-static 0.7 \ --cuda-graph-max-bs 8 \ @@ -464,12 +461,10 @@ Below is a comprehensive list of all speculative decoding parameters available i | Parameter | Type | Default | Description | |---|---|---|---| -| `--speculative-ngram-min-match-window-size` | `int` | `1` | Minimum ngram matching window | -| `--speculative-ngram-max-match-window-size` | `int` | `12` | Maximum ngram matching window | | `--speculative-ngram-min-bfs-breadth` | `int` | `1` | Minimum BFS breadth | | `--speculative-ngram-max-bfs-breadth` | `int` | `10` | Maximum BFS breadth | | `--speculative-ngram-match-type` | `str` | `"BFS"` | Ngram tree-building mode: `"BFS"` for recency-based expansion or `"PROB"` for frequency-based expansion | -| `--speculative-ngram-max-trie-depth` | `int` | `18` | Max trie depth for ngram speculative decoding | +| `--speculative-ngram-max-trie-depth` | `int` | `18` | Maximum suffix length stored and matched by the ngram trie | | `--speculative-ngram-capacity` | `int` | `10,000,000` | Cache capacity | ### Environment variables diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py index a79026ae24f8..244e3d96ea71 100644 --- a/python/sglang/srt/server_args.py +++ b/python/sglang/srt/server_args.py @@ -506,8 +506,6 @@ class ServerArgs: speculative_draft_model_quantization: Optional[str] = None # Speculative decoding (ngram) - speculative_ngram_min_match_window_size: int = 1 - speculative_ngram_max_match_window_size: int = 12 speculative_ngram_min_bfs_breadth: int = 1 speculative_ngram_max_bfs_breadth: int = 10 speculative_ngram_match_type: Literal["BFS", "PROB"] = "BFS" @@ -3108,8 +3106,10 @@ def _handle_speculative_decoding(self): self.enable_mixed_chunk = False self.speculative_eagle_topk = self.speculative_ngram_max_bfs_breadth if self.speculative_num_draft_tokens is None: - self.speculative_num_draft_tokens = ( - self.speculative_ngram_max_match_window_size + self.speculative_num_draft_tokens = 12 + logger.warning( + "speculative_num_draft_tokens is set to 12 by default for ngram speculative decoding. " + "You can override this by explicitly setting --speculative-num-draft-tokens." ) logger.warning( "The overlap scheduler and mixed chunked prefill are disabled because of " @@ -4851,18 +4851,6 @@ def add_cli_args(parser: argparse.ArgumentParser): ) # Speculative decoding (ngram) - parser.add_argument( - "--speculative-ngram-min-match-window-size", - type=int, - default=ServerArgs.speculative_ngram_min_match_window_size, - help="The minimum window size for pattern matching in ngram speculative decoding.", - ) - parser.add_argument( - "--speculative-ngram-max-match-window-size", - type=int, - default=ServerArgs.speculative_ngram_max_match_window_size, - help="The maximum window size for pattern matching in ngram speculative decoding.", - ) parser.add_argument( "--speculative-ngram-min-bfs-breadth", type=int, diff --git a/python/sglang/srt/speculative/cpp_ngram/ngram.cpp b/python/sglang/srt/speculative/cpp_ngram/ngram.cpp index b1d54b964400..904782774916 100644 --- a/python/sglang/srt/speculative/cpp_ngram/ngram.cpp +++ b/python/sglang/srt/speculative/cpp_ngram/ngram.cpp @@ -13,23 +13,6 @@ Ngram::Ngram(size_t capacity, const Param& param) : param_(param) { throw std::runtime_error( "param_.max_trie_depth must be greater than 1, current value: " + std::to_string(param_.max_trie_depth)); } - if (!(param_.min_match_window_size > 0)) { - throw std::runtime_error( - "min_match_window_size must be greater than 0, current value: " + std::to_string(param_.min_match_window_size)); - } - if (!(param_.min_match_window_size <= param_.max_match_window_size)) { - throw std::runtime_error( - "min_match_window_size must be less than or equal to " - "max_match_window_size, current min_match_window_size: " + - std::to_string(param_.min_match_window_size) + - ", max_match_window_size: " + std::to_string(param_.max_match_window_size)); - } - if (!(param_.max_match_window_size < param_.max_trie_depth)) { - throw std::runtime_error( - "max_match_window_size must be less than max_trie_depth, current " - "max_match_window_size: " + - std::to_string(param_.max_match_window_size) + ", max_trie_depth: " + std::to_string(param_.max_trie_depth)); - } if (!(param_.min_bfs_breadth > 0)) { throw std::runtime_error( "min_bfs_breadth must be greater than 0, current value: " + std::to_string(param_.min_bfs_breadth)); @@ -53,20 +36,6 @@ Ngram::Ngram(size_t capacity, const Param& param) : param_(param) { } } } - for (auto config : param_.batch_min_match_window_size) { - if (config != std::numeric_limits::max()) { - if (!(config >= param_.min_match_window_size)) { - throw std::runtime_error( - "batch_min_match_window_size config value " + std::to_string(config) + - " must be greater than or equal to min_match_window_size: " + std::to_string(param_.min_match_window_size)); - } - if (!(config <= param_.max_match_window_size)) { - throw std::runtime_error( - "batch_min_match_window_size config value " + std::to_string(config) + - " must be less than or equal to max_match_window_size: " + std::to_string(param_.max_match_window_size)); - } - } - } trie_ = std::make_unique(capacity, param_); diff --git a/python/sglang/srt/speculative/cpp_ngram/ngram_corpus.py b/python/sglang/srt/speculative/cpp_ngram/ngram_corpus.py index e44a3da6b2ec..f35e9acf95fe 100644 --- a/python/sglang/srt/speculative/cpp_ngram/ngram_corpus.py +++ b/python/sglang/srt/speculative/cpp_ngram/ngram_corpus.py @@ -26,8 +26,6 @@ class NgramCorpus: def __init__( self, max_trie_depth=18, - min_match_window_size=1, - max_match_window_size=10, min_bfs_breadth=1, max_bfs_breadth=8, draft_token_num=8, @@ -36,8 +34,6 @@ def __init__( ): param = ngram_corpus_cpp.Param() param.max_trie_depth = max_trie_depth - param.min_match_window_size = min_match_window_size - param.max_match_window_size = max_match_window_size param.min_bfs_breadth = min_bfs_breadth param.max_bfs_breadth = max_bfs_breadth param.draft_token_num = draft_token_num diff --git a/python/sglang/srt/speculative/cpp_ngram/ngram_corpus_binding.cpp b/python/sglang/srt/speculative/cpp_ngram/ngram_corpus_binding.cpp index 8da395440293..e632dfb3de59 100644 --- a/python/sglang/srt/speculative/cpp_ngram/ngram_corpus_binding.cpp +++ b/python/sglang/srt/speculative/cpp_ngram/ngram_corpus_binding.cpp @@ -21,17 +21,12 @@ PYBIND11_MODULE(ngram_corpus_cpp, m) { .def_readwrite("enable_router_mode", &Param::enable_router_mode) .def_readwrite("min_bfs_breadth", &Param::min_bfs_breadth) .def_readwrite("max_bfs_breadth", &Param::max_bfs_breadth) - .def_readwrite("min_match_window_size", &Param::min_match_window_size) - .def_readwrite("max_match_window_size", &Param::max_match_window_size) .def_readwrite("max_trie_depth", &Param::max_trie_depth) .def_readwrite("draft_token_num", &Param::draft_token_num) .def_readwrite("match_type", &Param::match_type) - .def_readwrite("batch_min_match_window_size", &Param::batch_min_match_window_size) .def_readwrite("batch_draft_token_num", &Param::batch_draft_token_num) .def("get_draft_token_num", &Param::get_draft_token_num, "") - .def("get_min_match_window_size", &Param::get_min_match_window_size, "") .def("parse", &Param::parse, "") - .def("resetBatchMinMatchWindowSize", &Param::resetBatchMinMatchWindowSize, "") .def("resetBatchReturnTokenNum", &Param::resetBatchReturnTokenNum, "") .def("detail", &Param::detail, ""); diff --git a/python/sglang/srt/speculative/cpp_ngram/param.h b/python/sglang/srt/speculative/cpp_ngram/param.h index d31af64ba5b9..725f635db8cd 100644 --- a/python/sglang/srt/speculative/cpp_ngram/param.h +++ b/python/sglang/srt/speculative/cpp_ngram/param.h @@ -17,13 +17,10 @@ struct Param { bool enable_router_mode; size_t min_bfs_breadth; size_t max_bfs_breadth; - size_t min_match_window_size; - size_t max_match_window_size; size_t max_trie_depth; size_t draft_token_num; std::string match_type; - std::vector batch_min_match_window_size; std::vector batch_draft_token_num; size_t get_draft_token_num(size_t batch_size) const { @@ -36,16 +33,6 @@ struct Param { return draft_token_num - 1; } - size_t get_min_match_window_size(size_t batch_size) const { - if (batch_size < batch_min_match_window_size.size()) { - if (batch_min_match_window_size[batch_size] != - std::numeric_limits::max()) { - return batch_min_match_window_size[batch_size]; - } - } - return min_match_window_size; - } - std::vector parse(const std::string& value) { // 0-1|10,2-3|20, std::vector result; @@ -96,10 +83,6 @@ struct Param { return result; } - void resetBatchMinMatchWindowSize(const std::string& value) { - batch_min_match_window_size = parse(value); - } - void resetBatchReturnTokenNum(const std::string& value) { batch_draft_token_num = parse(value); } @@ -108,13 +91,8 @@ struct Param { std::stringstream ss; ss << "enable = " << enable << ", enable_router_mode = " << enable_router_mode << ", min_bfs_breadth = " << min_bfs_breadth << ", max_bfs_breadth = " << max_bfs_breadth - << ", min_match_window_size = " << min_match_window_size << ", max_match_window_size = " << max_match_window_size << ", max_trie_depth = " << max_trie_depth << ", draft_token_num = " << draft_token_num << ", match_type = " << match_type; - ss << ", batch_min_match_window_size(" << batch_min_match_window_size.size() << ") = "; - for (int i = 0; i < batch_min_match_window_size.size(); ++i) { - ss << i << "|" << batch_min_match_window_size[i] << ","; - } ss << ", batch_draft_token_num(" << batch_draft_token_num.size() << ") = "; for (int i = 0; i < batch_draft_token_num.size(); ++i) { ss << i << "|" << batch_draft_token_num[i] << ","; diff --git a/python/sglang/srt/speculative/cpp_ngram/trie.cpp b/python/sglang/srt/speculative/cpp_ngram/trie.cpp index 8d9eec82b97e..67058eccb589 100644 --- a/python/sglang/srt/speculative/cpp_ngram/trie.cpp +++ b/python/sglang/srt/speculative/cpp_ngram/trie.cpp @@ -19,7 +19,7 @@ Trie::Trie(size_t capacity, const Param& param) : param_(param) { } void Trie::insert(const int32_t* tokens, size_t len) { - for (size_t i = 0; i + param_.min_match_window_size < len; ++i) { + for (size_t i = 0; i < len; ++i) { auto start = tokens + i; auto end = start + std::min(len - i, param_.max_trie_depth); @@ -100,14 +100,13 @@ void Trie::reset() { root_ = getNode(); } -std::vector> -Trie::match(const int32_t* context, size_t len, size_t min_window, size_t max_window) const { +std::vector> Trie::match(const int32_t* context, size_t len) const { std::vector> result; - result.reserve(max_window - min_window); - for (int32_t match_window_size = std::min(len, max_window); match_window_size >= static_cast(min_window); - --match_window_size) { - auto start = context + len - match_window_size; - auto end = start + match_window_size; + const auto max_match_depth = std::min(len, param_.max_trie_depth); + result.reserve(max_match_depth); + for (size_t match_depth = max_match_depth; match_depth > 0; --match_depth) { + auto start = context + len - match_depth; + auto end = start + match_depth; auto cursor = root_; while (start != end) { auto iter = cursor->child.find(*start); @@ -118,8 +117,8 @@ Trie::match(const int32_t* context, size_t len, size_t min_window, size_t max_wi ++start; cursor = iter->second; } - if (cursor) { - result.emplace_back(std::make_pair(cursor, match_window_size)); + if (cursor != nullptr && !cursor->child.empty()) { + result.emplace_back(cursor, static_cast(match_depth)); } } return result; @@ -127,10 +126,10 @@ Trie::match(const int32_t* context, size_t len, size_t min_window, size_t max_wi Result Trie::buildRecency( const int32_t* context, size_t len, int32_t last_token, size_t draft_token_num, const Param& param) const { - auto anchors = match(context, len, param.min_match_window_size, param.max_match_window_size); + auto anchors = match(context, len); - double bfs_breadth_scale = double(param.max_bfs_breadth - param.min_bfs_breadth) / - (param.max_match_window_size - param.min_match_window_size + 1); + const auto max_match_depth = std::max(1, static_cast(param.max_trie_depth - 1)); + double bfs_breadth_scale = double(param.max_bfs_breadth - param.min_bfs_breadth) / max_match_depth; std::vector tree(draft_token_num + 1); int root = 0; @@ -138,7 +137,7 @@ Result Trie::buildRecency( for (auto [node, depth] : anchors) { std::queue> queue; - queue.push({root, (param.max_match_window_size - depth) * bfs_breadth_scale + param.min_bfs_breadth, node}); + queue.push({root, (max_match_depth - depth) * bfs_breadth_scale + param.min_bfs_breadth, node}); while (queue.size() && cursor <= static_cast(draft_token_num)) { auto front = queue.front(); queue.pop(); @@ -168,7 +167,7 @@ Result Trie::buildRecency( Result Trie::buildFrequency( const int32_t* context, size_t len, int32_t last_token, size_t draft_token_num, const Param& param) const { - auto anchors = match(context, len, param.min_match_window_size, param.max_match_window_size); + auto anchors = match(context, len); struct CompareByLastDouble { bool operator()( diff --git a/python/sglang/srt/speculative/cpp_ngram/trie.h b/python/sglang/srt/speculative/cpp_ngram/trie.h index 30db5b29400c..41fd6e54ceb2 100644 --- a/python/sglang/srt/speculative/cpp_ngram/trie.h +++ b/python/sglang/srt/speculative/cpp_ngram/trie.h @@ -49,8 +49,7 @@ class Trie { void reset(); private: - std::vector> - match(const int32_t* context, size_t len, size_t min_window, size_t max_window) const; + std::vector> match(const int32_t* context, size_t len) const; TrieNode* getNode() { auto node = node_pool_[--free_node_count_]; diff --git a/python/sglang/srt/speculative/ngram_worker.py b/python/sglang/srt/speculative/ngram_worker.py index 04a38cefbb83..8c108915c939 100644 --- a/python/sglang/srt/speculative/ngram_worker.py +++ b/python/sglang/srt/speculative/ngram_worker.py @@ -41,9 +41,6 @@ def __init__( self.page_size = server_args.page_size self.draft_token_num: int = server_args.speculative_num_draft_tokens self.max_trie_depth: int = server_args.speculative_ngram_max_trie_depth - self.max_match_window_size: int = ( - server_args.speculative_ngram_max_match_window_size - ) self.max_batch_size = target_worker.max_running_requests self.device = f"cuda:{gpu_id}" if gpu_id >= 0 else "cuda" @@ -51,8 +48,6 @@ def __init__( self._init_preallocated_tensors() self.ngram_corpus = NgramCorpus( - min_match_window_size=server_args.speculative_ngram_min_match_window_size, - max_match_window_size=server_args.speculative_ngram_max_match_window_size, min_bfs_breadth=server_args.speculative_ngram_min_bfs_breadth, max_bfs_breadth=server_args.speculative_ngram_max_bfs_breadth, match_type=server_args.speculative_ngram_match_type, @@ -131,7 +126,7 @@ def _prepare_draft_tokens( batch_tokens = [] for req in batch.reqs: check_token = self._efficient_concat_last_n( - req.origin_input_ids, req.output_ids, self.max_match_window_size + req.origin_input_ids, req.output_ids, self.max_trie_depth ) batch_tokens.append(check_token) req_drafts, mask = self.ngram_corpus.batch_get(batch_tokens) diff --git a/python/sglang/test/lora_utils.py b/python/sglang/test/lora_utils.py index 566165cfa266..9de8d1d6e300 100644 --- a/python/sglang/test/lora_utils.py +++ b/python/sglang/test/lora_utils.py @@ -768,8 +768,6 @@ def run_lora_multiple_batch_on_model_cases( else { "speculative_algorithm": "NGRAM", "speculative_num_draft_tokens": 5, - "speculative_ngram_min_match_window_size": 2, - "speculative_ngram_max_match_window_size": 15, } ) srt_runner = SRTRunner( diff --git a/python/sglang/test/runners.py b/python/sglang/test/runners.py index 8edd4fa802f7..c2d84ff2f85b 100644 --- a/python/sglang/test/runners.py +++ b/python/sglang/test/runners.py @@ -574,8 +574,6 @@ def __init__( speculative_num_steps: Optional[int] = None, speculative_eagle_topk: Optional[int] = None, speculative_num_draft_tokens: Optional[int] = None, - speculative_ngram_min_match_window_size: Optional[int] = None, - speculative_ngram_max_match_window_size: Optional[int] = None, disable_overlap_schedule: bool = False, disable_custom_all_reduce: bool = False, torchao_config: Optional[str] = None, @@ -606,12 +604,7 @@ def __init__( spec_kwargs["speculative_num_draft_tokens"] = speculative_num_draft_tokens elif speculative_algorithm == "NGRAM": spec_kwargs["speculative_algorithm"] = speculative_algorithm - spec_kwargs["speculative_ngram_min_match_window_size"] = ( - speculative_ngram_min_match_window_size - ) - spec_kwargs["speculative_ngram_max_match_window_size"] = ( - speculative_ngram_max_match_window_size - ) + spec_kwargs["speculative_num_draft_tokens"] = speculative_num_draft_tokens self.engine = Engine( model_path=model_path, diff --git a/test/registered/spec/utils/test_ngram_corpus.py b/test/registered/spec/utils/test_ngram_corpus.py index b921225ed18e..93b2d77b5ac9 100644 --- a/test/registered/spec/utils/test_ngram_corpus.py +++ b/test/registered/spec/utils/test_ngram_corpus.py @@ -12,8 +12,6 @@ def _make_corpus(match_type="BFS", **kwargs): defaults = dict( max_trie_depth=12, - min_match_window_size=1, - max_match_window_size=10, min_bfs_breadth=1, max_bfs_breadth=8, draft_token_num=8, @@ -239,9 +237,7 @@ def test_small_capacity_does_not_crash(self): self.assertEqual(len(ids), 8, "Should still produce draft_token_num outputs") def test_eviction_preserves_recent(self): - corpus = _make_corpus( - "BFS", capacity=500, max_trie_depth=6, max_match_window_size=5 - ) + corpus = _make_corpus("BFS", capacity=500, max_trie_depth=6) old_seq = list(range(1000, 1050)) corpus.batch_put([old_seq]) @@ -357,7 +353,6 @@ def test_repeated_insert_promotes_token(self): draft_token_num=2, max_bfs_breadth=1, min_bfs_breadth=1, - max_match_window_size=3, max_trie_depth=5, ) corpus.batch_put([[1, 2, 3, 10, 11]]) @@ -386,7 +381,6 @@ def test_most_recent_insert_selected(self): draft_token_num=2, max_bfs_breadth=1, min_bfs_breadth=1, - max_match_window_size=3, max_trie_depth=5, ) corpus.batch_put([[1, 2, 3, 10, 11]]) @@ -422,7 +416,7 @@ class TestSingleTokenContext(CustomTestCase): """Verify behavior with minimum-length context.""" def test_single_token_query(self): - corpus = _make_corpus("BFS", min_match_window_size=1) + corpus = _make_corpus("BFS") corpus.batch_put([[5, 10, 20, 30]]) corpus.synchronize() @@ -436,7 +430,7 @@ class TestLongContext(CustomTestCase): """Verify behavior when query context exceeds max_trie_depth.""" def test_context_longer_than_max_trie_depth(self): - corpus = _make_corpus("BFS", max_trie_depth=6, max_match_window_size=5) + corpus = _make_corpus("BFS", max_trie_depth=6) seq = list(range(1, 20)) corpus.batch_put([seq]) corpus.synchronize() @@ -447,6 +441,23 @@ def test_context_longer_than_max_trie_depth(self): self.assertEqual(ids_list[0], 15, "First token should be last context token") self.assertIn(16, ids_list, "Should match via suffix despite long context") + def test_matches_longest_stored_suffix(self): + corpus = _make_corpus("BFS", max_trie_depth=6, draft_token_num=4) + corpus.batch_put([[1, 2, 3, 4, 5, 6, 7]]) + corpus.batch_put([[99, 3, 4, 5, 6, 8]]) + corpus.synchronize() + + ids, _ = corpus.batch_get([[2, 3, 4, 5, 6]]) + ids_list = ids.tolist() + self.assertIn( + 7, ids_list, "Longest stored suffix should contribute a continuation" + ) + self.assertIn( + 8, + ids_list, + "Shorter matching suffixes should still contribute continuations", + ) + class TestDraftBudgetSaturation(CustomTestCase): """Verify the draft tree uses exactly draft_token_num slots.""" @@ -538,9 +549,7 @@ class TestSqueezeEvictsOld(CustomTestCase): """Verify that squeeze actually evicts old data, not just preserves recent.""" def test_old_data_evicted(self): - corpus = _make_corpus( - "BFS", capacity=150, max_trie_depth=6, max_match_window_size=5 - ) + corpus = _make_corpus("BFS", capacity=150, max_trie_depth=6) old_seq = list(range(5000, 5030)) corpus.batch_put([old_seq])