1010class NgramProposer :
1111
1212 def __init__ (self , vllm_config : VllmConfig ):
13- self .vllm_config = vllm_config
13+ # Minimum length of the n-gram to match.
14+ self .min_n = vllm_config .speculative_config .prompt_lookup_min
15+ # Maximum length of the n-gram to match.
16+ self .max_n = vllm_config .speculative_config .prompt_lookup_max
17+ # Number of tokens follow the match. If there are less than k
18+ # tokens follow the match, we will return the maximum amount of
19+ # tokens until the end.
20+ self .k = vllm_config .speculative_config .num_speculative_tokens
21+ # Trigger Numba JIT compilation for N-gram proposer.
22+ # This usually takes less than 1 second.
23+ self .propose (np .zeros (1024 , dtype = np .int32 ))
1424
1525 def propose (
1626 self ,
1727 context_token_ids : np .ndarray ,
18- min_n : int ,
19- max_n : int ,
20- k : int ,
2128 ) -> Optional [np .ndarray ]:
2229 """Proposes the next sequence of tokens based on n-gram pattern
2330 matching in the context. The function finds matches of the last n
@@ -27,17 +34,12 @@ def propose(
2734 Args:
2835 context_token_ids: Numpy array of token IDs representing the
2936 context sequence.
30- min_n: Minimum length of the n-gram to match.
31- max_n: Maximum length of the n-gram to match.
32- k: Number of tokens follow the match. If there are less
33- than k tokens follow the match, we will return
34- the maximum amount of tokens until the end.
35-
37+
3638 Returns:
3739 np.ndarray: The sequence of tokens that followed
3840 the matched n-gram in the context.
3941 None: If no matching n-gram pattern is found.
40-
42+
4143 Example:
4244 If context_token_ids = [1,2,3,4,2,3], min_n = 2, max_n = 3, and
4345 k = 4:
@@ -49,8 +51,8 @@ def propose(
4951 we only have three tokens after the match.
5052 """
5153 # TODO(woosuk): Optimize this.
52- for n in range (max_n , min_n - 1 , - 1 ):
53- result = _find_subarray_kmp (context_token_ids , n , k )
54+ for n in range (self . max_n , self . min_n - 1 , - 1 ):
55+ result = _find_subarray_kmp (context_token_ids , n , self . k )
5456 if result is not None :
5557 return result
5658 return None
0 commit comments