Skip to content

Commit bb35536

Browse files
committed
update
Signed-off-by: shen-shanshan <[email protected]>
1 parent bee8f3b commit bb35536

File tree

2 files changed

+7
-3
lines changed

2 files changed

+7
-3
lines changed

vllm/distributed/parallel_state.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1221,8 +1221,9 @@ def cleanup_dist_env_and_memory(shutdown_ray: bool = False):
12211221
ray.shutdown()
12221222
gc.collect()
12231223
from vllm.platforms import current_platform
1224-
if not current_platform.is_cpu():
1225-
torch.cuda.empty_cache()
1224+
empty_cache = current_platform.empty_cache
1225+
if empty_cache is not None:
1226+
empty_cache()
12261227
try:
12271228
torch._C._host_emptyCache()
12281229
except AttributeError:

vllm/forward_context.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,10 @@ def set_forward_context(attn_metadata: Any,
120120
# we use synchronous scheduling right now,
121121
# adding a sync point here should not affect
122122
# scheduling of the next batch
123-
torch.cuda.synchronize()
123+
from vllm.platforms import current_platform
124+
synchronize = current_platform.synchronize
125+
if synchronize is not None:
126+
synchronize()
124127
now = time.perf_counter()
125128
# time measurement is in milliseconds
126129
batchsize_forward_time[batchsize].append(

0 commit comments

Comments
 (0)