Skip to content
Merged
Show file tree
Hide file tree
Changes from 33 commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
65b944e
Skip IPC mempool tests on WSL
rparolin Sep 29, 2025
67d5f45
Merge branch 'main' into rparolin/skip_ipc_on_wsl
rparolin Sep 29, 2025
47a8d89
Update cuda_core/tests/conftest.py
rparolin Sep 29, 2025
7f7be24
Update cuda_core/tests/test_ipc_mempool.py
rparolin Sep 29, 2025
d88d627
Apply suggestion from @cpcloud
rparolin Sep 29, 2025
aab97af
addressing feedback
rparolin Sep 29, 2025
fe47883
Making utils globally accessible
rparolin Sep 29, 2025
46cd33c
Merge branch 'rparolin/skip_ipc_on_wsl' of github.com:NVIDIA/cuda-pyt…
rparolin Sep 29, 2025
4366efe
working
rparolin Sep 29, 2025
2b49a77
wip
rparolin Sep 29, 2025
57ead02
wip
rparolin Sep 29, 2025
0a9be40
formatting
rparolin Sep 29, 2025
1da2c82
removing deleted files
rparolin Sep 29, 2025
f7deec9
removing skip helper
rparolin Sep 29, 2025
57fc851
Merge branch 'main' into rparolin/skip_ipc_on_wsl
rparolin Sep 29, 2025
56a2381
wip
rparolin Sep 29, 2025
5e21482
feedback
rparolin Sep 29, 2025
c95d29b
wip
rparolin Sep 29, 2025
7edd190
sorting imports
rparolin Sep 29, 2025
8ce40b8
Update cuda_core/tests/conftest.py
rparolin Oct 1, 2025
2168cd1
Checking if CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES is sup…
rparolin Oct 1, 2025
b40487d
Merge branch 'rparolin/skip_ipc_on_wsl' of github.com:NVIDIA/cuda-pyt…
rparolin Oct 1, 2025
d66d4da
Update cuda_python_test_helpers/__init__.py
rparolin Oct 2, 2025
1f9bb51
Merge branch 'main' into rparolin/skip_ipc_on_wsl
rparolin Oct 2, 2025
4b9c417
merge main branch
rparolin Oct 8, 2025
8d291a6
wip
rparolin Oct 8, 2025
55546cb
Merge branch 'main' into rparolin/skip_ipc_on_wsl
leofang Oct 8, 2025
8664327
[pre-commit.ci] auto code formatting
pre-commit-ci[bot] Oct 8, 2025
7af149b
Creating the cuda_python_test_helpers as a package
rparolin Oct 8, 2025
5977cab
Merge branch 'rparolin/skip_ipc_on_wsl' of github.com:NVIDIA/cuda-pyt…
rparolin Oct 8, 2025
90590ec
[pre-commit.ci] auto code formatting
pre-commit-ci[bot] Oct 8, 2025
034125f
pre-commit changes
rparolin Oct 8, 2025
75c4d25
Merge branch 'rparolin/skip_ipc_on_wsl' of github.com:NVIDIA/cuda-pyt…
rparolin Oct 8, 2025
cd6b714
removing cuda_python_test_helpers/__init__.py
rparolin Oct 8, 2025
baac405
install cuda_python_test_helpers as editable
rparolin Oct 8, 2025
d8b0a46
Merge branch 'main' into rparolin/skip_ipc_on_wsl
leofang Oct 8, 2025
bbe82c8
do not require cuda_python_test_helpers to be pre-installed for now
leofang Oct 8, 2025
7726e05
Revert "install cuda_python_test_helpers as editable"
rparolin Oct 8, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions cuda_core/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

import multiprocessing

import helpers
import pytest

try:
from cuda.bindings import driver
except ImportError:
from cuda import cuda as driver
import multiprocessing

import pytest
from cuda.core.experimental import Device, DeviceMemoryResource, DeviceMemoryResourceOptions, _device
from cuda.core.experimental._utils.cuda_utils import handle_return

Expand Down Expand Up @@ -85,6 +86,12 @@ def ipc_device():
if not device.properties.handle_type_posix_file_descriptor_supported:
pytest.skip("Device does not support IPC")

# Skip on WSL or if driver rejects IPC-enabled mempool creation on this platform/device
from cuda_python_test_helpers import IS_WSL, supports_ipc_mempool

if IS_WSL or not supports_ipc_mempool(device):
pytest.skip("Driver rejects IPC-enabled mempool creation on this platform")

return device


Expand Down
6 changes: 6 additions & 0 deletions cuda_core/tests/memory_ipc/test_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
from cuda.core.experimental import Buffer, Device, DeviceMemoryResource, DeviceMemoryResourceOptions
from cuda.core.experimental._utils.cuda_utils import CUDAError

from cuda_python_test_helpers import supports_ipc_mempool

CHILD_TIMEOUT_SEC = 20
NBYTES = 64
POOL_SIZE = 2097152
Expand All @@ -18,6 +20,10 @@ class ChildErrorHarness:
PARENT_ACTION, CHILD_ACTION, and ASSERT (see below for examples)."""

def test_main(self, ipc_device, ipc_memory_resource):
if not supports_ipc_mempool(ipc_device):
import pytest

pytest.skip("Driver rejects IPC-enabled mempool creation on this platform")
"""Parent process that checks child errors."""
# Attach fixtures to this object for convenience. These can be accessed
# from PARENT_ACTION.
Expand Down
14 changes: 14 additions & 0 deletions cuda_core/tests/memory_ipc/test_memory_ipc.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
from cuda.core.experimental import Buffer, DeviceMemoryResource
from utility import IPCBufferTestHelper

from cuda_python_test_helpers import supports_ipc_mempool

CHILD_TIMEOUT_SEC = 20
NBYTES = 64
NWORKERS = 2
Expand All @@ -14,6 +16,10 @@

class TestIpcMempool:
def test_main(self, ipc_device, ipc_memory_resource):
if not supports_ipc_mempool(ipc_device):
import pytest

pytest.skip("Driver rejects IPC-enabled mempool creation on this platform")
"""Test IPC with memory pools."""
# Set up the IPC-enabled memory pool and share it.
device = ipc_device
Expand Down Expand Up @@ -49,6 +55,10 @@ def child_main(self, device, mr, queue):

class TestIPCMempoolMultiple:
def test_main(self, ipc_device, ipc_memory_resource):
if not supports_ipc_mempool(ipc_device):
import pytest

pytest.skip("Driver rejects IPC-enabled mempool creation on this platform")
"""Test IPC with memory pools using multiple processes."""
# Construct an IPC-enabled memory resource and share it with two children.
device = ipc_device
Expand Down Expand Up @@ -93,6 +103,10 @@ def child_main(self, device, mr, idx, queue):

class TestIPCSharedAllocationHandleAndBufferDescriptors:
def test_main(self, ipc_device, ipc_memory_resource):
if not supports_ipc_mempool(ipc_device):
import pytest

pytest.skip("Driver rejects IPC-enabled mempool creation on this platform")
"""
Demonstrate that a memory pool allocation handle can be reused for IPC
with multiple processes. Uses buffer descriptors.
Expand Down
4 changes: 4 additions & 0 deletions cuda_core/tests/memory_ipc/test_send_buffers.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
from cuda.core.experimental import DeviceMemoryResource, DeviceMemoryResourceOptions
from utility import IPCBufferTestHelper

from cuda_python_test_helpers import supports_ipc_mempool

CHILD_TIMEOUT_SEC = 20
NBYTES = 64
NMRS = 3
Expand All @@ -18,6 +20,8 @@
@pytest.mark.parametrize("nmrs", (1, NMRS))
def test_ipc_send_buffers(ipc_device, nmrs):
"""Test passing buffers sourced from multiple memory resources."""
if not supports_ipc_mempool(ipc_device):
pytest.skip("Driver rejects IPC-enabled mempool creation on this platform")
# Set up several IPC-enabled memory pools.
device = ipc_device
options = DeviceMemoryResourceOptions(max_size=POOL_SIZE, ipc_enabled=True)
Expand Down
8 changes: 8 additions & 0 deletions cuda_core/tests/memory_ipc/test_workerpool.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
from cuda.core.experimental import Buffer, Device, DeviceMemoryResource, DeviceMemoryResourceOptions
from utility import IPCBufferTestHelper

from cuda_python_test_helpers import supports_ipc_mempool

CHILD_TIMEOUT_SEC = 20
NBYTES = 64
NWORKERS = 2
Expand All @@ -28,6 +30,8 @@ class TestIpcWorkerPool:

@pytest.mark.parametrize("nmrs", (1, NMRS))
def test_main(self, ipc_device, nmrs):
if not supports_ipc_mempool(ipc_device):
pytest.skip("Driver rejects IPC-enabled mempool creation on this platform")
device = ipc_device
options = DeviceMemoryResourceOptions(max_size=POOL_SIZE, ipc_enabled=True)
mrs = [DeviceMemoryResource(device, options=options) for _ in range(nmrs)]
Expand Down Expand Up @@ -60,6 +64,8 @@ def init_worker(mrs):

@pytest.mark.parametrize("nmrs", (1, NMRS))
def test_main(self, ipc_device, nmrs):
if not supports_ipc_mempool(ipc_device):
pytest.skip("Driver rejects IPC-enabled mempool creation on this platform")
device = ipc_device
options = DeviceMemoryResourceOptions(max_size=POOL_SIZE, ipc_enabled=True)
mrs = [DeviceMemoryResource(device, options=options) for _ in range(nmrs)]
Expand Down Expand Up @@ -100,6 +106,8 @@ def init_worker(mrs):

@pytest.mark.parametrize("nmrs", (1, NMRS))
def test_main(self, ipc_device, nmrs):
if not supports_ipc_mempool(ipc_device):
pytest.skip("Driver rejects IPC-enabled mempool creation on this platform")
device = ipc_device
options = DeviceMemoryResourceOptions(max_size=POOL_SIZE, ipc_enabled=True)
mrs = [DeviceMemoryResource(device, options=options) for _ in range(nmrs)]
Expand Down
9 changes: 2 additions & 7 deletions cuda_core/tests/test_event.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
# SPDX-License-Identifier: Apache-2.0

import os
import pathlib
import platform
import time

import cuda.core.experimental
Expand All @@ -22,10 +20,7 @@
)

from conftest import skipif_need_cuda_headers


def platform_is_wsl():
return platform.system() == "Linux" and "microsoft" in pathlib.Path("/proc/version").read_text().lower()
from cuda_python_test_helpers import IS_WSL


def test_event_init_disabled():
Expand All @@ -47,7 +42,7 @@ def test_timing_success(init_cuda):
# We only want to exercise the __sub__ method, this test is not meant
# to stress-test the CUDA driver or time.sleep().
delay_ms = delay_seconds * 1000
if os.name == "nt" or platform_is_wsl(): # noqa: SIM108
if os.name == "nt" or IS_WSL: # noqa: SIM108
Comment thread
rparolin marked this conversation as resolved.
# For Python <=3.10, the Windows timer resolution is typically limited to 15.6 ms by default.
generous_tolerance = 100
else:
Expand Down
9 changes: 9 additions & 0 deletions cuda_core/tests/test_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
from cuda.core.experimental._utils.cuda_utils import handle_return
from cuda.core.experimental.utils import StridedMemoryView

from cuda_python_test_helpers import supports_ipc_mempool

POOL_SIZE = 2097152 # 2MB size


Expand Down Expand Up @@ -529,6 +531,9 @@ def test_mempool_attributes(ipc_enabled, mempool_device, property_name, expected
if platform.system() == "Windows":
Comment thread
leofang marked this conversation as resolved.
return # IPC not implemented for Windows

if ipc_enabled and not supports_ipc_mempool(device):
pytest.skip("Driver rejects IPC-enabled mempool creation on this platform")

options = DeviceMemoryResourceOptions(max_size=POOL_SIZE, ipc_enabled=ipc_enabled)
mr = DeviceMemoryResource(device, options=options)
assert mr.is_ipc_enabled == ipc_enabled
Expand Down Expand Up @@ -567,6 +572,10 @@ def test_mempool_attributes(ipc_enabled, mempool_device, property_name, expected
def test_mempool_attributes_ownership(mempool_device):
"""Ensure the attributes bundle handles references correctly."""
device = mempool_device
# Skip if IPC mempool is not supported on this platform/device
if not supports_ipc_mempool(device):
pytest.skip("Driver rejects IPC-enabled mempool creation on this platform")

mr = DeviceMemoryResource(device, dict(max_size=POOL_SIZE))
attributes = mr.attributes
mr.close()
Expand Down
63 changes: 63 additions & 0 deletions cuda_python_test_helpers/__init__.py

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we please not create yet another folder at the repo root? If we are hacking sys.path to load this module anyway, how about let's move it to toolshed/?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's avoid path hacking. It really is possible to use the tools as-is, without resorting to hacks on the first attempt.

Just move the WSL detection into conftest.py and avoid the path hacking, the unnecessary directory, and the unnecessary module.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think path hacking is necessary and can be justified in this case. We are defining common test utilities that can be reused in both cuda.core and cuda.bindings (and potentially cuda.pathfinder) test suites. Without hacking the path we'd end up copying/pasting the same code around.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In that case, moving to toolshed is a mistake. No one will ever find it there. For shared test utilities a distinct package is the right place.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that exactly why this folder exists. Its holding common checks so we don't have to copy/paste variants of them in every module.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a red flag that indicates that we've structured our code in the wrong way.

Complete agreement for production code.

In pytest code, I've seen this many times.

Unfortunately pytest is missing a facility for making this nicer. I don't think duplicating our test code is a healthy response. Of course, one little thing doesn't matter, but having such a hindrance tends to push tired or rushed developers (and who isn't one) into non-ideal compromises, although they may not be as obvious as a sys.path.insert.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given these helpers are using cuda.core, we probably shouldn't use them in cuda.bindings tests regardless to avoid a circular dependency.

I think the helpers are written by only calling cuda.bindings functions, therefore usable in both binding and core tests?

I do appreciate having a common module we can reuse. In the future I'd like to move some of the NVRTC/NVVM/PTX code snippets to this new module so that we only hard-code them in one place and reuse as much as possible.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree having common helpers is ideal, but we shouldn't need to hack sys.path in order to accomplish this. As @cpcloud said in #1045 (comment), the correct solution here would be to put the common helpers into a distinct package and then depend on that package instead. Obviously we don't want to actually ship and publish such a package, but for pure python code only to be used locally as part of testing, the pyproject.toml would be <10 lines of code and in cuda.core / cuda.bindings could be included under the testing dependency groups quite easily.

@kkraus14 kkraus14 Oct 3, 2025

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Additionally, some of the functionality being built here is already being duplicated here: NVIDIA/numba-cuda#488

So maybe we do want to ship some of these as something like testing utilities? 😅

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Having a package with our test utils sounds great to me.

Would you want to publish that to PyPI, too?

If not, I'm not sure how easy or tricky it'll be for numba-cuda to integrate the package into the dev workflow, but it still seems like a good goal. @cpcloud have you done something like that before?

Comment thread
rparolin marked this conversation as resolved.
Outdated
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

import functools
import os
from contextlib import suppress
from typing import Union

import pytest
from cuda.core.experimental._utils.cuda_utils import handle_return


def _detect_wsl() -> bool:
data = ""
with suppress(Exception), open("/proc/sys/kernel/osrelease") as f:
data = f.read().lower()
if "microsoft" in data or "wsl" in data:
return True
return any(os.environ.get(k) for k in ("WSL_DISTRO_NAME", "WSL_INTEROP"))


IS_WSL: bool = _detect_wsl()


Comment thread
rparolin marked this conversation as resolved.
Outdated
@functools.cache
def supports_ipc_mempool(device_id: Union[int, object]) -> bool:
"""Return True if mempool IPC via POSIX file descriptor is supported.

Uses cuDeviceGetAttribute(CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES)
to check for CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR support. Does not
require an active CUDA context.
"""
if _detect_wsl():
return False

try:
# Lazy import to avoid hard dependency when not running GPU tests
try:
from cuda.bindings import driver # type: ignore
except Exception:
from cuda import cuda as driver # type: ignore

# Initialize CUDA
handle_return(driver.cuInit(0))

# Resolve device id from int or Device-like object
dev_id = int(getattr(device_id, "device_id", device_id))

# Query supported mempool handle types bitmask
attr = driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES
mask = handle_return(driver.cuDeviceGetAttribute(attr, dev_id))

# Check POSIX FD handle type support via bitmask
posix_fd = driver.CUmemAllocationHandleType.CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR
return (int(mask) & int(posix_fd)) != 0
except Exception:
return False


__all__ = [
"IS_WSL",
"supports_ipc_mempool",
]
62 changes: 62 additions & 0 deletions cuda_python_test_helpers/cuda_python_test_helpers/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

import functools
import os
from contextlib import suppress
from typing import Union

from cuda.core.experimental._utils.cuda_utils import handle_return


def _detect_wsl() -> bool:
data = ""
with suppress(Exception), open("/proc/sys/kernel/osrelease") as f:
data = f.read().lower()
if "microsoft" in data or "wsl" in data:
return True
return any(os.environ.get(k) for k in ("WSL_DISTRO_NAME", "WSL_INTEROP"))


IS_WSL: bool = _detect_wsl()


@functools.cache
def supports_ipc_mempool(device_id: Union[int, object]) -> bool:
"""Return True if mempool IPC via POSIX file descriptor is supported.

Uses cuDeviceGetAttribute(CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES)
to check for CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR support. Does not
require an active CUDA context.
"""
if _detect_wsl():
return False

try:
# Lazy import to avoid hard dependency when not running GPU tests
try:
from cuda.bindings import driver # type: ignore
except Exception:
from cuda import cuda as driver # type: ignore

# Initialize CUDA
handle_return(driver.cuInit(0))

# Resolve device id from int or Device-like object
dev_id = int(getattr(device_id, "device_id", device_id))

# Query supported mempool handle types bitmask
attr = driver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES
mask = handle_return(driver.cuDeviceGetAttribute(attr, dev_id))

# Check POSIX FD handle type support via bitmask
posix_fd = driver.CUmemAllocationHandleType.CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR
return (int(mask) & int(posix_fd)) != 0
except Exception:
return False


__all__ = [
"IS_WSL",
"supports_ipc_mempool",
]
25 changes: 25 additions & 0 deletions cuda_python_test_helpers/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

[build-system]
requires = ["setuptools>=77.0.0"]
build-backend = "setuptools.build_meta"

[project]
name = "cuda-python-test-helpers"
version = "0.1.0"
description = "Shared test helpers for CUDA Python projects"
readme = {file = "README.md", content-type = "text/markdown"}
authors = [{ name = "NVIDIA Corporation" }]
license = "Apache-2.0"
requires-python = ">=3.9"
classifiers = [
"Programming Language :: Python :: 3 :: Only",
"Operating System :: POSIX :: Linux",
]

[tool.setuptools]
packages = ["cuda_python_test_helpers"]

[project.urls]
repository = "https://github.com/NVIDIA/cuda-python"