Skip to content

Commit

Permalink
Cleanup code
Browse files Browse the repository at this point in the history
  • Loading branch information
opus24 committed Sep 10, 2024
1 parent 30fb9b4 commit 27b0045
Show file tree
Hide file tree
Showing 9 changed files with 46 additions and 170 deletions.
28 changes: 0 additions & 28 deletions examples/LPU_inference.py

This file was deleted.

2 changes: 1 addition & 1 deletion examples/lpu_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def post_http_request(prompt: str,
"n": n,
"use_beam_search": False,
"temperature": 0.8,
"max_tokens": 32,
"max_tokens": 40,
"top_p": 0.95,
"top_k": 1,
"stream": stream,
Expand Down
14 changes: 6 additions & 8 deletions examples/mini_testbench.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,12 @@
log_sum="log/service_model_device.txt"

model_ids=("TinyLlama/TinyLlama-1.1B-Chat-v1.0") # "facebook/opt-1.3b" "huggyllama/llama-7b")
num_devices=(1 2 4)
num_devices=(2)

current_datetime=$(date "+%Y-%m-%d %H:%M:%S")
echo "$current_datetime"
echo "$current_datetime" >> ${log_sum}

"""
for model_id in "${model_ids[@]}"; do
for num_device in "${num_devices[@]}"; do
#IFS='\' read -ra parts <<< "$model_id"
Expand All @@ -19,12 +18,11 @@ for model_id in "${model_ids[@]}"; do
echo "*********************************"
python lpu_inference_arg.py -m ${model_id} -n ${num_device} > log/inference_${model_name}_${num_device}.txt
echo "*********************************" >> ${log_sum}
echo "The Result of log/inference_${model_name}_${num_device}.txt" >> ${log_sum}
echo "[Testbench] The Result of log/inference_${model_name}_${num_device}.txt" >> ${log_sum}
tail -n 1 "log/inference_${model_name}_${num_device}.txt" >> ${log_sum}
echo "" >> ${log_sum}
done
done
"""

for model_id in "${model_ids[@]}"; do
for num_device in "${num_devices[@]}"; do
Expand All @@ -36,10 +34,10 @@ for model_id in "${model_ids[@]}"; do

# Waiting for server
while ! nc -z localhost "8000"; do
echo "Waiting for server..."
echo "[Testbench] Waiting for server..."
sleep 3
done
echo "The server is ready!"
echo "[Testbench] The server is ready!"

python lpu_client.py > log/vllm_serve_${model_name}_${num_device}.txt

Expand All @@ -49,10 +47,10 @@ for model_id in "${model_ids[@]}"; do
kill -SIGINT "$PID"
while true; do
if ps -p "$PID" > /dev/null; then
echo "Kill the process..."
echo "[Testbench] Kill the process..."
sleep 3
else
echo "Process (PID: $PID) is killed."
echo "[Testbench] Process (PID: $PID) is killed."
break
fi
done
Expand Down
1 change: 0 additions & 1 deletion vllm/core/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -610,7 +610,6 @@ def _schedule_running(
else:
#self._append_slots(seq_group, blocks_to_copy)
is_prefill = seq_group.is_prefill()
print_logger(is_prefill)
scheduled_seq_group: ScheduledSequenceGroup = \
self._scheduled_seq_group_cache[self.cache_id].get_object()
scheduled_seq_group.seq_group = seq_group
Expand Down
5 changes: 0 additions & 5 deletions vllm/engine/async_llm_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -806,7 +806,6 @@ def shutdown_background_loop(self) -> None:
if self._background_loop_unshielded is not None:
self._background_loop_unshielded.cancel()
self._background_loop_unshielded = None
print_logger("shutdown")
self.background_loop = None

def _init_engine(self, *args,
Expand Down Expand Up @@ -935,11 +934,8 @@ async def run_engine_loop(self):
asyncio.create_task(
self.engine_step(virtual_engine)))
has_requests_in_progress[virtual_engine] = True
print_logger(has_unfinished_requests)
else:
has_requests_in_progress[virtual_engine] = False
print_logger(has_unfinished_requests)
#self.engine.model_executor.cleanup()
except asyncio.TimeoutError as exc:
logger.error(
"Engine iteration timed out. This should never happen!")
Expand Down Expand Up @@ -1226,7 +1222,6 @@ async def check_health(self) -> None:
t = time.perf_counter()
logger.debug("Starting health check...")
if self.is_stopped:
print_logger("is_stopped")
raise AsyncEngineDeadError("Background loop is stopped.")

if self.engine_use_ray:
Expand Down
1 change: 0 additions & 1 deletion vllm/entrypoints/api_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,6 @@ async def run_server(args: Namespace,
ssl_cert_reqs=args.ssl_cert_reqs,
**uvicorn_kwargs,
)
print_logger("Detect crtl+C")
await shutdown_task
engine.engine.model_executor.cleanup()

Expand Down
2 changes: 1 addition & 1 deletion vllm/executor/lpu_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def _init_executor(self) -> None: #HJ: why not __init__ ?
self.model_config.dtype = torch.bfloat16

# Instantiate the worker and load the model to the device.
#vLLM does not use torch distributed library to execute multi-LPU
# NOTE(hyunjun): vLLM does not use torch distributed library to execute multi-LPU
self.num_device = self.parallel_config.tensor_parallel_size
if self.parallel_config.tensor_parallel_size > 1:
self.parallel_config.tensor_parallel_size = 1
Expand Down
Loading

0 comments on commit 27b0045

Please sign in to comment.