Skip to content

Commit 9ac5394

Browse files
authored
[format] Valid para format error info (#4035)
* feat(log):add_request_and_response_log * 报错信息与OpenAI对齐
1 parent 88ea565 commit 9ac5394

File tree

11 files changed

+435
-90
lines changed

11 files changed

+435
-90
lines changed

fastdeploy/entrypoints/engine_client.py

Lines changed: 17 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,12 @@
3131
from fastdeploy.metrics.work_metrics import work_process_metrics
3232
from fastdeploy.multimodal.registry import MultimodalRegistry
3333
from fastdeploy.platforms import current_platform
34-
from fastdeploy.utils import EngineError, StatefulSemaphore, api_server_logger
34+
from fastdeploy.utils import (
35+
EngineError,
36+
ParameterError,
37+
StatefulSemaphore,
38+
api_server_logger,
39+
)
3540

3641

3742
class EngineClient:
@@ -218,42 +223,21 @@ async def add_requests(self, task):
218223
def valid_parameters(self, data):
219224
"""
220225
Validate stream options
226+
超参数(top_p、seed、frequency_penalty、temperature、presence_penalty)的校验逻辑
227+
前置到了ChatCompletionRequest/CompletionRequest中
221228
"""
222229

223230
if data.get("n") is not None:
224231
if data["n"] != 1:
225-
raise ValueError("n only support 1.")
232+
raise ParameterError("n", "n only support 1.")
226233

227234
if data.get("max_tokens") is not None:
228235
if data["max_tokens"] < 1 or data["max_tokens"] >= self.max_model_len:
229-
raise ValueError(f"max_tokens can be defined [1, {self.max_model_len}).")
236+
raise ParameterError("max_tokens", f"max_tokens can be defined [1, {self.max_model_len}).")
230237

231238
if data.get("reasoning_max_tokens") is not None:
232239
if data["reasoning_max_tokens"] > data["max_tokens"] or data["reasoning_max_tokens"] < 1:
233-
raise ValueError("reasoning_max_tokens must be between max_tokens and 1")
234-
235-
if data.get("top_p") is not None:
236-
if data["top_p"] > 1 or data["top_p"] < 0:
237-
raise ValueError("top_p value can only be defined [0, 1].")
238-
239-
if data.get("frequency_penalty") is not None:
240-
if not -2.0 <= data["frequency_penalty"] <= 2.0:
241-
raise ValueError("frequency_penalty must be in [-2, 2]")
242-
243-
if data.get("temperature") is not None:
244-
if data["temperature"] < 0:
245-
raise ValueError("temperature must be non-negative")
246-
247-
if data.get("presence_penalty") is not None:
248-
if not -2.0 <= data["presence_penalty"] <= 2.0:
249-
raise ValueError("presence_penalty must be in [-2, 2]")
250-
251-
if data.get("seed") is not None:
252-
if not 0 <= data["seed"] <= 922337203685477580:
253-
raise ValueError("seed must be in [0, 922337203685477580]")
254-
255-
if data.get("stream_options") and not data.get("stream"):
256-
raise ValueError("Stream options can only be defined when `stream=True`.")
240+
raise ParameterError("reasoning_max_tokens", "reasoning_max_tokens must be between max_tokens and 1")
257241

258242
# logprobs
259243
logprobs = data.get("logprobs")
@@ -263,35 +247,35 @@ def valid_parameters(self, data):
263247
if not self.enable_logprob:
264248
err_msg = "Logprobs is disabled, please enable it in startup config."
265249
api_server_logger.error(err_msg)
266-
raise ValueError(err_msg)
250+
raise ParameterError("logprobs", err_msg)
267251
top_logprobs = data.get("top_logprobs")
268252
elif isinstance(logprobs, int):
269253
top_logprobs = logprobs
270254
elif logprobs:
271-
raise ValueError("Invalid type for 'logprobs'")
255+
raise ParameterError("logprobs", "Invalid type for 'logprobs'")
272256

273257
# enable_logprob
274258
if top_logprobs:
275259
if not self.enable_logprob:
276260
err_msg = "Logprobs is disabled, please enable it in startup config."
277261
api_server_logger.error(err_msg)
278-
raise ValueError(err_msg)
262+
raise ParameterError("logprobs", err_msg)
279263

280264
if not isinstance(top_logprobs, int):
281265
err_type = type(top_logprobs).__name__
282266
err_msg = f"Invalid type for 'top_logprobs': expected int but got {err_type}."
283267
api_server_logger.error(err_msg)
284-
raise ValueError(err_msg)
268+
raise ParameterError("top_logprobs", err_msg)
285269

286270
if top_logprobs < 0:
287271
err_msg = f"Invalid 'top_logprobs': must be >= 0, got {top_logprobs}."
288272
api_server_logger.error(err_msg)
289-
raise ValueError(err_msg)
273+
raise ParameterError("top_logprobs", err_msg)
290274

291275
if top_logprobs > 20:
292276
err_msg = "Invalid value for 'top_logprobs': must be <= 20."
293277
api_server_logger.error(err_msg)
294-
raise ValueError(err_msg)
278+
raise ParameterError("top_logprobs", err_msg)
295279

296280
def check_health(self, time_interval_threashold=30):
297281
"""

fastdeploy/entrypoints/openai/api_server.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import uvicorn
2727
import zmq
2828
from fastapi import FastAPI, HTTPException, Request
29+
from fastapi.exceptions import RequestValidationError
2930
from fastapi.responses import JSONResponse, Response, StreamingResponse
3031
from prometheus_client import CONTENT_TYPE_LATEST
3132

@@ -40,6 +41,7 @@
4041
CompletionRequest,
4142
CompletionResponse,
4243
ControlSchedulerRequest,
44+
ErrorInfo,
4345
ErrorResponse,
4446
ModelList,
4547
)
@@ -56,6 +58,7 @@
5658
)
5759
from fastdeploy.metrics.trace_util import fd_start_span, inject_to_metadata, instrument
5860
from fastdeploy.utils import (
61+
ExceptionHandler,
5962
FlexibleArgumentParser,
6063
StatefulSemaphore,
6164
api_server_logger,
@@ -232,6 +235,8 @@ async def lifespan(app: FastAPI):
232235

233236

234237
app = FastAPI(lifespan=lifespan)
238+
app.add_exception_handler(RequestValidationError, ExceptionHandler.handle_request_validation_exception)
239+
app.add_exception_handler(Exception, ExceptionHandler.handle_exception)
235240
instrument(app)
236241

237242

@@ -336,7 +341,7 @@ async def create_chat_completion(request: ChatCompletionRequest):
336341
if isinstance(generator, ErrorResponse):
337342
api_server_logger.debug(f"release: {connection_semaphore.status()}")
338343
connection_semaphore.release()
339-
return JSONResponse(content={"detail": generator.model_dump()}, status_code=generator.code)
344+
return JSONResponse(content=generator.model_dump(), status_code=500)
340345
elif isinstance(generator, ChatCompletionResponse):
341346
api_server_logger.debug(f"release: {connection_semaphore.status()}")
342347
connection_semaphore.release()
@@ -365,7 +370,7 @@ async def create_completion(request: CompletionRequest):
365370
generator = await app.state.completion_handler.create_completion(request)
366371
if isinstance(generator, ErrorResponse):
367372
connection_semaphore.release()
368-
return JSONResponse(content=generator.model_dump(), status_code=generator.code)
373+
return JSONResponse(content=generator.model_dump(), status_code=500)
369374
elif isinstance(generator, CompletionResponse):
370375
connection_semaphore.release()
371376
return JSONResponse(content=generator.model_dump())
@@ -388,7 +393,7 @@ async def list_models() -> Response:
388393

389394
models = await app.state.model_handler.list_models()
390395
if isinstance(models, ErrorResponse):
391-
return JSONResponse(content=models.model_dump(), status_code=models.code)
396+
return JSONResponse(content=models.model_dump())
392397
elif isinstance(models, ModelList):
393398
return JSONResponse(content=models.model_dump())
394399

@@ -502,7 +507,8 @@ def control_scheduler(request: ControlSchedulerRequest):
502507
"""
503508
Control the scheduler behavior with the given parameters.
504509
"""
505-
content = ErrorResponse(object="", message="Scheduler updated successfully", code=0)
510+
511+
content = ErrorResponse(error=ErrorInfo(message="Scheduler updated successfully", code=0))
506512

507513
global llm_engine
508514
if llm_engine is None:

fastdeploy/entrypoints/openai/protocol.py

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,14 @@ class ErrorResponse(BaseModel):
3232
Error response from OpenAI API.
3333
"""
3434

35-
object: str = "error"
35+
error: ErrorInfo
36+
37+
38+
class ErrorInfo(BaseModel):
3639
message: str
37-
code: int
40+
type: Optional[str] = None
41+
param: Optional[str] = None
42+
code: Optional[str] = None
3843

3944

4045
class PromptTokenUsageInfo(BaseModel):
@@ -403,21 +408,21 @@ class CompletionRequest(BaseModel):
403408
prompt: Union[List[int], List[List[int]], str, List[str]]
404409
best_of: Optional[int] = None
405410
echo: Optional[bool] = False
406-
frequency_penalty: Optional[float] = None
411+
frequency_penalty: Optional[float] = Field(default=None, ge=-2, le=2)
407412
logprobs: Optional[int] = None
408413
# For logits and logprobs post processing
409414
temp_scaled_logprobs: bool = False
410415
top_p_normalized_logprobs: bool = False
411416
max_tokens: Optional[int] = None
412-
n: int = 1
413-
presence_penalty: Optional[float] = None
414-
seed: Optional[int] = None
417+
n: Optional[int] = 1
418+
presence_penalty: Optional[float] = Field(default=None, ge=-2, le=2)
419+
seed: Optional[int] = Field(default=None, ge=0, le=922337203685477580)
415420
stop: Optional[Union[str, List[str]]] = Field(default_factory=list)
416421
stream: Optional[bool] = False
417422
stream_options: Optional[StreamOptions] = None
418423
suffix: Optional[dict] = None
419-
temperature: Optional[float] = None
420-
top_p: Optional[float] = None
424+
temperature: Optional[float] = Field(default=None, ge=0)
425+
top_p: Optional[float] = Field(default=None, ge=0, le=1)
421426
user: Optional[str] = None
422427

423428
# doc: begin-completion-sampling-params
@@ -537,7 +542,7 @@ class ChatCompletionRequest(BaseModel):
537542
messages: Union[List[Any], List[int]]
538543
tools: Optional[List[ChatCompletionToolsParam]] = None
539544
model: Optional[str] = "default"
540-
frequency_penalty: Optional[float] = None
545+
frequency_penalty: Optional[float] = Field(None, le=2, ge=-2)
541546
logprobs: Optional[bool] = False
542547
top_logprobs: Optional[int] = 0
543548

@@ -552,13 +557,13 @@ class ChatCompletionRequest(BaseModel):
552557
)
553558
max_completion_tokens: Optional[int] = None
554559
n: Optional[int] = 1
555-
presence_penalty: Optional[float] = None
556-
seed: Optional[int] = None
560+
presence_penalty: Optional[float] = Field(None, le=2, ge=-2)
561+
seed: Optional[int] = Field(default=None, ge=0, le=922337203685477580)
557562
stop: Optional[Union[str, List[str]]] = Field(default_factory=list)
558563
stream: Optional[bool] = False
559564
stream_options: Optional[StreamOptions] = None
560-
temperature: Optional[float] = None
561-
top_p: Optional[float] = None
565+
temperature: Optional[float] = Field(None, ge=0)
566+
top_p: Optional[float] = Field(None, le=1, ge=0)
562567
user: Optional[str] = None
563568
metadata: Optional[dict] = None
564569
response_format: Optional[AnyResponseFormat] = None

fastdeploy/entrypoints/openai/serving_chat.py

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
ChatCompletionStreamResponse,
3131
ChatMessage,
3232
DeltaMessage,
33+
ErrorInfo,
3334
ErrorResponse,
3435
LogProbEntry,
3536
LogProbs,
@@ -38,7 +39,7 @@
3839
)
3940
from fastdeploy.entrypoints.openai.response_processors import ChatResponseProcessor
4041
from fastdeploy.metrics.work_metrics import work_process_metrics
41-
from fastdeploy.utils import api_server_logger
42+
from fastdeploy.utils import ErrorCode, ErrorType, ParameterError, api_server_logger
4243
from fastdeploy.worker.output import LogprobsLists
4344

4445

@@ -86,14 +87,16 @@ async def create_chat_completion(self, request: ChatCompletionRequest):
8687
f"Only master node can accept completion request, please send request to master node: {self.master_ip}"
8788
)
8889
api_server_logger.error(err_msg)
89-
return ErrorResponse(message=err_msg, code=400)
90+
return ErrorResponse(error=ErrorInfo(message=err_msg, type=ErrorType.SERVER_ERROR))
9091

9192
if self.models:
9293
is_supported, request.model = self.models.is_supported_model(request.model)
9394
if not is_supported:
9495
err_msg = f"Unsupported model: [{request.model}], support [{', '.join([x.name for x in self.models.model_paths])}] or default"
9596
api_server_logger.error(err_msg)
96-
return ErrorResponse(message=err_msg, code=400)
97+
return ErrorResponse(
98+
error=ErrorInfo(message=err_msg, type=ErrorType.SERVER_ERROR, code=ErrorCode.MODEL_NOT_SUPPORT)
99+
)
97100

98101
try:
99102
if self.max_waiting_time < 0:
@@ -117,11 +120,17 @@ async def create_chat_completion(self, request: ChatCompletionRequest):
117120
text_after_process = current_req_dict.get("text_after_process")
118121
if isinstance(prompt_token_ids, np.ndarray):
119122
prompt_token_ids = prompt_token_ids.tolist()
123+
except ParameterError as e:
124+
api_server_logger.error(e.message)
125+
self.engine_client.semaphore.release()
126+
return ErrorResponse(
127+
error=ErrorInfo(message=str(e.message), type=ErrorType.INVALID_REQUEST_ERROR, param=e.param)
128+
)
120129
except Exception as e:
121130
error_msg = f"request[{request_id}] generator error: {str(e)}, {str(traceback.format_exc())}"
122131
api_server_logger.error(error_msg)
123132
self.engine_client.semaphore.release()
124-
return ErrorResponse(code=400, message=error_msg)
133+
return ErrorResponse(error=ErrorInfo(message=error_msg, type=ErrorType.INVALID_REQUEST_ERROR))
125134
del current_req_dict
126135

127136
if request.stream:
@@ -136,21 +145,20 @@ async def create_chat_completion(self, request: ChatCompletionRequest):
136145
except Exception as e:
137146
error_msg = f"request[{request_id}]full generator error: {str(e)}, {str(traceback.format_exc())}"
138147
api_server_logger.error(error_msg)
139-
return ErrorResponse(code=408, message=error_msg)
148+
return ErrorResponse(error=ErrorInfo(message=error_msg, type=ErrorType.SERVER_ERROR))
140149
except Exception as e:
141150
error_msg = (
142151
f"request[{request_id}] waiting error: {str(e)}, {str(traceback.format_exc())}, "
143152
f"max waiting time: {self.max_waiting_time}"
144153
)
145154
api_server_logger.error(error_msg)
146-
return ErrorResponse(code=408, message=error_msg)
155+
return ErrorResponse(
156+
error=ErrorInfo(message=error_msg, type=ErrorType.TIMEOUT_ERROR, code=ErrorCode.TIMEOUT)
157+
)
147158

148159
def _create_streaming_error_response(self, message: str) -> str:
149160
api_server_logger.error(message)
150-
error_response = ErrorResponse(
151-
code=400,
152-
message=message,
153-
)
161+
error_response = ErrorResponse(error=ErrorInfo(message=message, type=ErrorType.SERVER_ERROR))
154162
return error_response.model_dump_json()
155163

156164
async def chat_completion_stream_generator(

0 commit comments

Comments
 (0)