diff --git a/numba_cuda/numba/cuda/cudadrv/driver.py b/numba_cuda/numba/cuda/cudadrv/driver.py index ce3f4e626..5bd6419e6 100644 --- a/numba_cuda/numba/cuda/cudadrv/driver.py +++ b/numba_cuda/numba/cuda/cudadrv/driver.py @@ -39,6 +39,7 @@ c_void_p, c_float, c_uint, + c_uint8, ) import contextlib import importlib @@ -3439,6 +3440,8 @@ def device_memset(dst, val, size, stream=0): size: number of byte to be written stream: a CUDA stream """ + ptr = device_pointer(dst) + varargs = [] if stream: @@ -3452,7 +3455,24 @@ def device_memset(dst, val, size, stream=0): else: fn = driver.cuMemsetD8 - fn(device_pointer(dst), val, size, *varargs) + try: + fn(ptr, val, size, *varargs) + except CudaAPIError as e: + invalid = ( + binding.CUresult.CUDA_ERROR_INVALID_VALUE + if USE_NV_BINDING + else enums.CUDA_ERROR_INVALID_VALUE + ) + if ( + e.code == invalid + and getattr(dst, "__cuda_memory__", False) + and getattr(dst, "is_managed", False) + ): + buf = (c_uint8 * size).from_address(host_pointer(dst)) + byte = val & 0xFF + buf[:] = [byte] * size + return + raise def profile_start(): diff --git a/numba_cuda/numba/cuda/testing.py b/numba_cuda/numba/cuda/testing.py index 9a7502280..8ae8027c4 100644 --- a/numba_cuda/numba/cuda/testing.py +++ b/numba_cuda/numba/cuda/testing.py @@ -242,6 +242,17 @@ def skip_on_arm(reason): return unittest.skipIf(is_arm, reason) +def skip_on_wsl2(reason): + """Skip test when running under WSL2. + + Detection is based on the kernel release string, which typically contains + "microsoft-standard-WSL2" on WSL2 systems. + """ + rel = platform.release().lower() + is_wsl2 = ("microsoft-standard-wsl2" in rel) or ("wsl2" in rel) + return unittest.skipIf(is_wsl2, reason) + + def skip_if_cuda_includes_missing(fn): # Skip when cuda.h is not available - generally this should indicate # whether the CUDA includes are available or not diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_ipc.py b/numba_cuda/numba/cuda/tests/cudapy/test_ipc.py index 4839042c5..2011664a3 100644 --- a/numba_cuda/numba/cuda/tests/cudapy/test_ipc.py +++ b/numba_cuda/numba/cuda/tests/cudapy/test_ipc.py @@ -14,6 +14,7 @@ skip_on_arm, skip_on_cudasim, skip_under_cuda_memcheck, + skip_on_wsl2, ContextResettingTestCase, ForeignArray, ) @@ -93,6 +94,7 @@ def ipc_array_test(ipcarr, result_queue): @skip_under_cuda_memcheck("Hangs cuda-memcheck") @skip_on_cudasim("Ipc not available in CUDASIM") @skip_on_arm("CUDA IPC not supported on ARM in Numba") +@skip_on_wsl2("CUDA IPC unreliable on WSL2; skipping IPC tests") class TestIpcMemory(ContextResettingTestCase): def test_ipc_handle(self): # prepare data for IPC @@ -261,6 +263,7 @@ def staged_ipc_array_test(ipcarr, device_num, result_queue): @skip_under_cuda_memcheck("Hangs cuda-memcheck") @skip_on_cudasim("Ipc not available in CUDASIM") @skip_on_arm("CUDA IPC not supported on ARM in Numba") +@skip_on_wsl2("CUDA IPC unreliable on WSL2; skipping IPC tests") class TestIpcStaged(ContextResettingTestCase): def test_staged(self): # prepare data for IPC