Skip to content

Commit

Permalink
More arguments in api_client, update docstrings (#3077)
Browse files Browse the repository at this point in the history
  • Loading branch information
AllentDan authored Jan 23, 2025
1 parent cceb327 commit 800b601
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 4 deletions.
31 changes: 27 additions & 4 deletions lmdeploy/serve/openai/api_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,14 @@ def chat_completions_v1(self,
frequency_penalty: Optional[float] = 0.0,
user: Optional[str] = None,
repetition_penalty: Optional[float] = 1.0,
session_id: Optional[int] = -1,
ignore_eos: Optional[bool] = False,
skip_special_tokens: Optional[bool] = True,
spaces_between_special_tokens: Optional[bool] = True,
top_k: int = 40,
min_new_tokens: Optional[int] = None,
min_p: float = 0.0,
logit_bias: Optional[Dict[str, float]] = None,
stream_options: Optional[Dict] = None,
**kwargs):
"""Chat completion v1.
Expand All @@ -128,7 +133,20 @@ def chat_completions_v1(self,
ignore_eos (bool): indicator for ignoring eos
skip_special_tokens (bool): Whether or not to remove special tokens
in the decoding. Default to be True.
session_id (int): Deprecated.
spaces_between_special_tokens (bool): Whether or not to add spaces
around special tokens. The behavior of Fast tokenizers is to have
this to False. This is setup to True in slow tokenizers.
top_k (int): The number of the highest probability vocabulary
tokens to keep for top-k-filtering
min_new_tokens (int): To generate at least numbers of tokens.
min_p (float): Minimum token probability, which will be scaled by the
probability of the most likely token. It must be a value between
0 and 1. Typical values are in the 0.01-0.2 range, comparably
selective as setting `top_p` in the 0.99-0.8 range (use the
opposite of normal `top_p` values)
logit_bias (Dict): Bias to logits. Only supported in pytorch engine.
stream_options: Options for streaming response. Only set this when you
set stream: true.
Yields:
json objects in openai formats
Expand Down Expand Up @@ -229,9 +247,10 @@ def completions_v1(
user: Optional[str] = None,
# additional argument of lmdeploy
repetition_penalty: Optional[float] = 1.0,
session_id: Optional[int] = -1,
ignore_eos: Optional[bool] = False,
skip_special_tokens: Optional[bool] = True,
spaces_between_special_tokens: Optional[bool] = True,
stream_options: Optional[Dict] = None,
**kwargs):
"""Chat completion v1.
Expand All @@ -258,7 +277,11 @@ def completions_v1(
ignore_eos (bool): indicator for ignoring eos
skip_special_tokens (bool): Whether or not to remove special tokens
in the decoding. Default to be True.
session_id (int): Deprecated.
spaces_between_special_tokens (bool): Whether or not to add spaces
around special tokens. The behavior of Fast tokenizers is to have
this to False. This is setup to True in slow tokenizers.
stream_options: Options for streaming response. Only set this when you
set stream: true.
Yields:
json objects in openai formats
Expand Down
4 changes: 4 additions & 0 deletions lmdeploy/serve/openai/api_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,8 @@ async def chat_completions_v1(request: ChatCompletionRequest, raw_request: Reque
- n (int): How many chat completion choices to generate for each input
message. **Only support one here**.
- stream: whether to stream the results or not. Default to false.
- stream_options: Options for streaming response. Only set this when you
set stream: true.
- max_tokens (int | None): output token nums. Default to None.
- repetition_penalty (float): The parameter for repetition penalty.
1.0 means no penalty
Expand Down Expand Up @@ -523,6 +525,8 @@ async def completions_v1(request: CompletionRequest, raw_request: Request = None
- n (int): How many chat completion choices to generate for each input
message. **Only support one here**.
- stream: whether to stream the results or not. Default to false.
- stream_options: Options for streaming response. Only set this when you
set stream: true.
- repetition_penalty (float): The parameter for repetition penalty.
1.0 means no penalty
- user (str): A unique identifier representing your end-user.
Expand Down

0 comments on commit 800b601

Please sign in to comment.