Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions benchmark/prefill_only/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ async def make_http_call(
api_name: Name of the API for error messages
"""
try:
start_time = asyncio.get_event_loop().time()
start_time = asyncio.get_running_loop().time()

request_json = build_http_request_json(request_data)
headers = {"Content-Type": "application/json"}
Expand All @@ -253,7 +253,7 @@ async def make_http_call(
f"[HTTP] {api_name} Request {request_id} failed with status "
f"{resp.status}: {resp_text}"
)
completion_time = asyncio.get_event_loop().time()
completion_time = asyncio.get_running_loop().time()
await results_queue.put((request_id, 0, False, completion_time))
return

Expand All @@ -271,13 +271,13 @@ async def make_http_call(
)
success = False

completion_time = asyncio.get_event_loop().time()
completion_time = asyncio.get_running_loop().time()
elapsed_time = (completion_time - start_time) * 1000
await results_queue.put((request_id, elapsed_time, success, completion_time))

except Exception as e:
print(f"[HTTP] {api_name} Error for request {request_id}: {e}")
completion_time = asyncio.get_event_loop().time()
completion_time = asyncio.get_running_loop().time()
await results_queue.put((request_id, 0, False, completion_time))


Expand Down Expand Up @@ -738,7 +738,7 @@ async def run_generic_benchmark(
tasks = []

# Track timing for sending requests
send_start_time = asyncio.get_event_loop().time()
send_start_time = asyncio.get_running_loop().time()

# HTTP implementation
async with aiohttp.ClientSession(
Expand Down Expand Up @@ -778,7 +778,7 @@ async def run_generic_benchmark(
if i < len(all_requests) - 1:
await sleep_with_distribution(config.distribution, rps)

send_end_time = asyncio.get_event_loop().time()
send_end_time = asyncio.get_running_loop().time()
send_duration = send_end_time - send_start_time

# Wait for all requests to complete with progress tracking
Expand All @@ -796,7 +796,7 @@ async def run_generic_benchmark(
if config.profile:
await send_profile_request("STOP_PROFILE", http_url, session=session)

completion_end_time = asyncio.get_event_loop().time()
completion_end_time = asyncio.get_running_loop().time()
total_duration = completion_end_time - send_start_time

return await process_results(
Expand Down
2 changes: 1 addition & 1 deletion python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ dependencies = [
"tqdm",
"transformers==4.57.1",
"uvicorn",
"uvloop==0.21.0",
"uvloop",
"xgrammar==0.1.27",
"grpcio==1.75.1", # keep it align with compile_proto.py
"grpcio-tools==1.75.1", # keep it align with compile_proto.py
Expand Down
3 changes: 1 addition & 2 deletions python/sglang/srt/entrypoints/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -533,8 +533,7 @@ def update_weights_from_ipc(
zmq_handles=zmq_handles,
flush_cache=flush_cache,
)
loop = asyncio.get_event_loop()
return loop.run_until_complete(
return self.loop.run_until_complete(
self.tokenizer_manager.update_weights_from_ipc(obj, None)
)

Expand Down
4 changes: 2 additions & 2 deletions python/sglang/srt/grpc/grpc_request_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
TokenizedGenerateReqInput,
)
from sglang.srt.server_args import PortArgs, ServerArgs
from sglang.srt.utils import get_zmq_socket, kill_process_tree
from sglang.srt.utils import get_or_create_event_loop, get_zmq_socket, kill_process_tree
from sglang.utils import get_exception_traceback

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -876,7 +876,7 @@ def auto_create_handle_loop(self):
return

self.no_create_loop = True
loop = asyncio.get_event_loop()
loop = get_or_create_event_loop()
self.asyncio_tasks.add(
loop.create_task(print_exception_wrapper(self.handle_loop))
)
Expand Down
8 changes: 5 additions & 3 deletions python/sglang/srt/managers/tokenizer_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@
dataclass_to_string_truncated,
freeze_gc,
get_bool_env_var,
get_or_create_event_loop,
get_zmq_socket,
kill_process_tree,
)
Expand Down Expand Up @@ -1354,12 +1355,13 @@ async def abort_request():

def auto_create_handle_loop(self):
if self._chosen_loop is not None:
current_loop = get_or_create_event_loop()
assert (
asyncio.get_event_loop() == self._chosen_loop
), f"Please ensure only one event loop is ever used with SGLang. Previous loop: {self._chosen_loop}, current loop: {asyncio.get_event_loop()}"
current_loop == self._chosen_loop
), f"Please ensure only one event loop is ever used with SGLang. Previous loop: {self._chosen_loop}, current loop: {current_loop}"
return

loop = asyncio.get_event_loop()
loop = get_or_create_event_loop()
self._chosen_loop = loop
self.asyncio_tasks.add(
loop.create_task(print_exception_wrapper(self.handle_loop))
Expand Down
2 changes: 1 addition & 1 deletion python/sglang/srt/multimodal/processors/llava.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ async def _process_single_image(
grid_pinpoints: str,
):
if self.cpu_executor is not None:
loop = asyncio.get_event_loop()
loop = asyncio.get_running_loop()
return await loop.run_in_executor(
self.cpu_executor,
LlavaImageProcessor._process_single_image_task,
Expand Down
10 changes: 10 additions & 0 deletions python/sglang/srt/utils/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,16 @@
time_infos = {}


def get_or_create_event_loop():
"""Gets the running event loop or creates a new one if it doesn't exist."""
try:
return asyncio.get_running_loop()
except RuntimeError:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
return loop


HIP_FP8_E4M3_FNUZ_MAX = 224.0


Expand Down
3 changes: 2 additions & 1 deletion python/sglang/test/few_shot_gsm8k_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import numpy as np

import sglang as sgl
from sglang.srt.utils import get_or_create_event_loop
from sglang.utils import download_and_cache_file, read_jsonl

INVALID = -9999999
Expand Down Expand Up @@ -89,7 +90,7 @@ def run_eval(args):
# Run requests
tic = time.perf_counter()

loop = asyncio.get_event_loop()
loop = get_or_create_event_loop()

outputs = loop.run_until_complete(
concurrent_generate(engine, prompts, sampling_param)
Expand Down
6 changes: 3 additions & 3 deletions test/srt/openai_server/basic/test_serving_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
python -m unittest discover -s tests -p "test_*unit.py" -v
"""

import asyncio
import json
import unittest
import uuid
Expand All @@ -21,6 +20,7 @@
)
from sglang.srt.entrypoints.openai.serving_chat import OpenAIServingChat
from sglang.srt.managers.io_struct import GenerateReqInput
from sglang.srt.utils import get_or_create_event_loop


class _MockTokenizerManager:
Expand Down Expand Up @@ -389,7 +389,7 @@ async def collect_first_tool_chunk():
break
return line

loop = asyncio.get_event_loop()
loop = get_or_create_event_loop()
line = loop.run_until_complete(collect_first_tool_chunk())
self.assertIsNotNone(line)
self.assertTrue(line.startswith("data: "))
Expand Down Expand Up @@ -564,7 +564,7 @@ async def collect_first_tool_chunk():
break
return line

loop = asyncio.get_event_loop()
loop = get_or_create_event_loop()
line = loop.run_until_complete(collect_first_tool_chunk())
self.assertIsNotNone(line)
self.assertTrue(line.startswith("data: "))
Expand Down
Loading