Skip to content

Commit

Permalink
Added docker option: NICENESS of process
Browse files Browse the repository at this point in the history
  • Loading branch information
c0sogi committed Aug 20, 2023
1 parent 8a2a41a commit 970b1c5
Show file tree
Hide file tree
Showing 5 changed files with 22 additions and 13 deletions.
2 changes: 2 additions & 0 deletions docker-compose.persistent.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ services:
image: cosogi/llama-api:latest
cap_add:
- IPC_LOCK
- SYS_NICE
- SYS_RESOURCE
entrypoint: ["python3", "-m", "main", "--port", "8000"]
environment:
- FORCE_CUDA=1
Expand Down
2 changes: 2 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ services:
image: cosogi/llama-api:latest
cap_add:
- IPC_LOCK
- SYS_NICE
- SYS_RESOURCE
entrypoint: ["python3", "-m", "main", "--port", "8000"]
environment:
- FORCE_CUDA=1
Expand Down
23 changes: 11 additions & 12 deletions llama_api/modules/exllama.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,16 @@ def _generate_text(
text_buffer = "" # type: str
byte_array = array("B") # type: array[int]
byte_pattern = compile(r"<0x([0-9a-fA-F]{2})>")
logit_processors = (
[
processor
for processor in self.get_logit_processors(
settings=settings, encoder=self.encode
)
]
if cfg_mask is None
else None
) or None

for _ in range(settings.max_tokens):
# If the generator was interrupted, stop the generation
Expand All @@ -216,18 +226,7 @@ def _generate_text(
else _gen_single_token_without_cfg(
generator=generator,
input_ids=generator.sequence[0][initial_len:],
logit_processors=(
[
processor
for processor in self.get_logit_processors(
settings=settings,
encoder=self.encode,
)
]
if cfg_mask is None
else None
)
or None,
logit_processors=logit_processors,
)
) # type: int

Expand Down
4 changes: 3 additions & 1 deletion llama_api/server/routers/v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,10 +220,12 @@ def log_request_and_response(
CreateCompletionRequest,
CreateEmbeddingRequest,
],
status: Union[CompletionStatus, EmbeddingStatus],
status: Optional[Union[CompletionStatus, EmbeddingStatus]],
state: Literal["Completed", "Interrupted"],
) -> None:
"""Log the request and response of the completion or embedding"""
if status is None:
return
elapsed_time = time() - status.started_at
log_messages: List[str] = [f"elapsed time: {elapsed_time: .1f}s"]
body_without_prompt = body.model_dump(
Expand Down
4 changes: 4 additions & 0 deletions llama_api/utils/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,10 @@ async def custom_route_handler(self, request: Request) -> Response:
status_code,
error_message,
) = self.error_message_wrapper(error=error, body=body)
client = request.client.host if request.client else "UNKNOWN"
logger.error(
f'"{client}{request.url.path}": {error_message["message"]}'
)
return JSONResponse(
{"error": error_message},
status_code=status_code,
Expand Down

0 comments on commit 970b1c5

Please sign in to comment.