Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 36 additions & 49 deletions tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import time
import warnings
from collections.abc import Callable, Iterable, Sequence
from contextlib import ExitStack, contextmanager
from contextlib import ExitStack, contextmanager, suppress
from multiprocessing import Process
from pathlib import Path
from typing import Any, Literal
Expand Down Expand Up @@ -1512,65 +1512,52 @@ def wrapper(*args: _P.args, **kwargs: _P.kwargs) -> None:


def spawn_new_process_for_each_test(f: Callable[_P, None]) -> Callable[_P, None]:
"""Decorator to spawn a new process for each test function.

Uses subprocess with cloudpickle to serialize the test function and
propagates exceptions back to the parent, so test failures are never
silently swallowed (fixes https://github.com/vllm-project/vllm/issues/41415).
"""
"""Decorator to spawn a new process for each test function."""

@functools.wraps(f)
def wrapper(*args: _P.args, **kwargs: _P.kwargs) -> None:
with tempfile.NamedTemporaryFile(delete=False, suffix=".tb", mode="wb") as tmp:
tb_file = tmp.name
# Check if we're already in a subprocess
if os.environ.get("RUNNING_IN_SUBPROCESS") == "1":
# If we are, just run the function directly
return f(*args, **kwargs)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

critical

The decorator does not correctly handle async test functions. If f is an asynchronous function, the call f(*args, **kwargs) returns a coroutine that must be awaited. Without an event loop to run the coroutine in the subprocess, the test body will never execute. This is a critical issue for tests like test_custom_logitsprocs in tests/v1/logits_processors/test_custom_online.py, which have been converted to async in this PR.


try:
# Serialize the function + args with cloudpickle so closures work
payload = cloudpickle.dumps((f, args, kwargs, tb_file))

child_script = (
"import sys, cloudpickle, traceback\n"
"try:\n"
" from _pytest.outcomes import Skipped\n"
"except ImportError:\n"
" class Skipped(BaseException): pass\n"
"f, args, kwargs, tb_file = "
"cloudpickle.loads(sys.stdin.buffer.read())\n"
"try:\n"
" f(*args, **kwargs)\n"
"except Skipped:\n"
" sys.exit(0)\n"
"except BaseException:\n"
" open(tb_file, 'w').write(traceback.format_exc())\n"
" sys.exit(1)\n"
)
import torch.multiprocessing as mp

with suppress(RuntimeError):
mp.set_start_method("spawn")

# Get the module
module_name = f.__module__

# Create a process with environment variable set
env = os.environ.copy()
env["RUNNING_IN_SUBPROCESS"] = "1"

with tempfile.TemporaryDirectory() as tempdir:
output_filepath = os.path.join(tempdir, "new_process.tmp")

# `cloudpickle` allows pickling complex functions directly
input_bytes = cloudpickle.dumps((f, output_filepath))

repo_root = str(VLLM_PATH.resolve())
env = os.environ.copy()

env = dict(env or os.environ)
env["PYTHONPATH"] = repo_root + os.pathsep + env.get("PYTHONPATH", "")

result = subprocess.run(
[sys.executable, "-c", child_script],
input=payload,
capture_output=True,
env=env,
cmd = [sys.executable, "-m", f"{module_name}"]

returned = subprocess.run(
cmd, input=input_bytes, capture_output=True, env=env
)
Comment on lines +1540 to 1551
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

critical

This implementation of spawn_new_process_for_each_test is non-functional and re-introduces the silent failure swallowing issue:

  1. Arguments are lost: The args and kwargs passed to the wrapper are not included in the cloudpickle.dumps call at line 1540. The subprocess will not have the necessary inputs to call the function.
  2. Function is not executed: The command python -m {module_name} (line 1547) merely imports the module. It does not contain any logic to read the pickled function from stdin or invoke it.
  3. False Positives: Because the subprocess just imports the module and exits successfully, the test is marked as passed without ever running the test logic. This is likely why CI appears to be fixed by this revert—the tests are simply no longer running.


if result.returncode != 0:
# Read traceback written by child, fall back to stderr
tb = ""
if os.path.exists(tb_file) and os.path.getsize(tb_file) > 0:
with open(tb_file) as fp:
tb = fp.read()
else:
tb = result.stderr.decode()
# check if the subprocess is successful
try:
returned.check_returncode()
except Exception as e:
# wrap raised exception to provide more information
raise RuntimeError(
f"Test subprocess '{f.__name__}' failed "
f"(exit code {result.returncode}):\n{tb}"
)
finally:
with contextlib.suppress(OSError):
os.remove(tb_file)
f"Error raised in subprocess:\n{returned.stderr.decode()}"
) from e

return wrapper

Expand Down
33 changes: 0 additions & 33 deletions tests/utils_/test_spawn_decorator.py

This file was deleted.

Loading
Loading