File tree Expand file tree Collapse file tree 2 files changed +7
-1
lines changed Expand file tree Collapse file tree 2 files changed +7
-1
lines changed Original file line number Diff line number Diff line change @@ -116,15 +116,21 @@ struct llama_context_params llama_context_params_from_gpt_params(const gpt_param
116116// Vocab utils
117117//
118118
119+ // tokenizes a string into a vector of tokens
120+ // should work similar to Python's `tokenizer.decode`
119121std::vector<llama_token> llama_tokenize (
120122 struct llama_context * ctx,
121123 const std::string & text,
122124 bool add_bos);
123125
126+ // tokenizes a token into a piece
127+ // should work similar to Python's `tokenizer.id_to_piece`
124128std::string llama_token_to_piece (
125129 const struct llama_context * ctx,
126130 llama_token token);
127131
132+ // detokenizes a vector of tokens into a string
133+ // should work similar to Python's `tokenizer.decode`
128134// removes the leading space from the first non-BOS token
129135std::string llama_detokenize (
130136 llama_context * ctx,
Original file line number Diff line number Diff line change @@ -384,7 +384,7 @@ extern "C" {
384384 // Token Id -> Piece.
385385 // Uses the vocabulary in the provided context.
386386 // Does not write null terminator to the buffer.
387- // Use code is responsible to remove the leading whitespace of the first non-BOS token.
387+ // User code is responsible to remove the leading whitespace of the first non-BOS token when decoding multiple tokens .
388388 LLAMA_API int llama_token_to_piece (
389389 const struct llama_context * ctx,
390390 llama_token token,
You can’t perform that action at this time.
0 commit comments