[BUG] cudaErrorUnsupportedPtxVersion with cuCIM+CuPy on CUDA 11.5

**Describe the bug**

*From @quasiben.*

I'm seeing some poor behavior of latest cucim with cupy and CEC:

```
In [1]: import cupy as cp

In [2]: a = cp.zeros((3, 3))

In [3]: import cucim

In [4]: a = cp.zeros((3, 3))
---------------------------------------------------------------------------
CUDARuntimeError                          Traceback (most recent call last)
<ipython-input-4-bea1f486f5af> in <module>
----> 1 a = cp.zeros((3, 3))

/datasets/bzaitlen/miniconda3/envs/cucim-2021-11-30/lib/python3.8/site-packages/cupy/_creation/basic.py in zeros(shape, dtype, order)
    207
    208     """
--> 209     a = cupy.ndarray(shape, dtype, order=order)
    210     a.data.memset_async(0, a.nbytes)
    211     return a

cupy/_core/core.pyx in cupy._core.core.ndarray.__init__()

cupy/cuda/memory.pyx in cupy.cuda.memory.alloc()

cupy/cuda/memory.pyx in cupy.cuda.memory.MemoryPool.malloc()

cupy/cuda/memory.pyx in cupy.cuda.memory.MemoryPool.malloc()

cupy/cuda/memory.pyx in cupy.cuda.memory.SingleDeviceMemoryPool.malloc()

cupy/cuda/memory.pyx in cupy.cuda.memory.SingleDeviceMemoryPool._malloc()

cupy/cuda/memory.pyx in cupy.cuda.memory.SingleDeviceMemoryPool._try_malloc()

cupy/cuda/memory.pyx in cupy.cuda.memory.SingleDeviceMemoryPool._try_malloc()

cupy/cuda/memory.pyx in cupy.cuda.memory.SingleDeviceMemoryPool._alloc()

cupy/cuda/memory.pyx in cupy.cuda.memory._malloc()

cupy/cuda/memory.pyx in cupy.cuda.memory._malloc()

cupy/cuda/memory.pyx in cupy.cuda.memory.Memory.__init__()

cupy_backends/cuda/api/runtime.pyx in cupy_backends.cuda.api.runtime.malloc()

cupy_backends/cuda/api/runtime.pyx in cupy_backends.cuda.api.runtime.check_status()

CUDARuntimeError: cudaErrorUnsupportedPtxVersion: the provided PTX was compiled with an unsupported toolchain.
```
**Steps/Code to reproduce bug**

On ampere architecture GPUs (such as GeForce RTX 3090), 

```bash
mamba create -n test-cucim -c rapidsai -c conda-forge cucim cudatoolkit=11.2 cupy=9.6
conda activate test-cucim

python
>>> import cupy as cp
>>> import cucim.clara
>>> a = cp.zeros((3,3))
```

**Expected behavior**

No errors

**Environment details (please complete the following information):**
 - Environment location: [Bare-metal]
 - Method of cuCIM install: [conda]

**Additional context**


CMAKE_CUDA_ARCHITECTURES
- https://github.com/rapidsai/cucim/blob/d6d3af5695451971167f64fe30174ab1a231a6b1/cpp/plugins/cucim.kit.cumed/cmake/modules/CuCIMUtils.cmake#L41-L60
- https://github.com/rapidsai/cucim/blob/d6d3af5695451971167f64fe30174ab1a231a6b1/cpp/plugins/cucim.kit.cuslide/cmake/modules/CuCIMUtils.cmake#L41-L60
- https://github.com/rapidsai/cucim/blob/d6d3af5695451971167f64fe30174ab1a231a6b1/cpp/cmake/modules/CuCIMUtils.cmake#L41-L60
- https://github.com/rapidsai/cucim/blob/d6d3af5695451971167f64fe30174ab1a231a6b1/python/cmake/modules/CuCIMUtils.cmake#L41-L60

The error is related to the use of `nvcc` when no CUDA kernel exists in the code.

```cmake
# At least one file needs to be compiled with nvcc.
# Otherwise, it will cause `/usr/bin/ld: cannot find -lcudart` error message.
set_source_files_properties(src/cucim.cpp src/filesystem/cufile_driver.cpp PROPERTIES LANGUAGE CUDA)
```

	# Define CMAKE_CUDA_ARCHITECTURES for the given architecture values
	#
	# Params:
	# arch_list - architecture value list (e.g., '60;70;75;80;86')
	if(NOT COMMAND cucim_define_cuda_architectures)
	function(cucim_define_cuda_architectures arch_list)
	set(arch_string "")
	# Create SASS for all architectures in the list
	foreach(arch IN LISTS arch_list)
	set(arch_string "${arch_string}" "${arch}-real")
	endforeach(arch)

	# Create PTX for the latest architecture for forward-compatibility.
	list(GET arch_list -1 latest_arch)
	foreach(arch IN LISTS arch_list)
	set(arch_string "${arch_string}" "${latest_arch}-virtual")
	endforeach(arch)
	set(CMAKE_CUDA_ARCHITECTURES ${arch_string} PARENT_SCOPE)
	endfunction()
	endif()

	# Define CMAKE_CUDA_ARCHITECTURES for the given architecture values
	#
	# Params:
	# arch_list - architecture value list (e.g., '60;70;75;80;86')
	if(NOT COMMAND cucim_define_cuda_architectures)
	function(cucim_define_cuda_architectures arch_list)
	set(arch_string "")
	# Create SASS for all architectures in the list
	foreach(arch IN LISTS arch_list)
	set(arch_string "${arch_string}" "${arch}-real")
	endforeach(arch)

	# Create PTX for the latest architecture for forward-compatibility.
	list(GET arch_list -1 latest_arch)
	foreach(arch IN LISTS arch_list)
	set(arch_string "${arch_string}" "${latest_arch}-virtual")
	endforeach(arch)
	set(CMAKE_CUDA_ARCHITECTURES ${arch_string} PARENT_SCOPE)
	endfunction()
	endif()

	# Define CMAKE_CUDA_ARCHITECTURES for the given architecture values
	#
	# Params:
	# arch_list - architecture value list (e.g., '60;70;75;80;86')
	if(NOT COMMAND cucim_define_cuda_architectures)
	function(cucim_define_cuda_architectures arch_list)
	set(arch_string "")
	# Create SASS for all architectures in the list
	foreach(arch IN LISTS arch_list)
	set(arch_string "${arch_string}" "${arch}-real")
	endforeach(arch)

	# Create PTX for the latest architecture for forward-compatibility.
	list(GET arch_list -1 latest_arch)
	foreach(arch IN LISTS arch_list)
	set(arch_string "${arch_string}" "${latest_arch}-virtual")
	endforeach(arch)
	set(CMAKE_CUDA_ARCHITECTURES ${arch_string} PARENT_SCOPE)
	endfunction()
	endif()

	# Define CMAKE_CUDA_ARCHITECTURES for the given architecture values
	#
	# Params:
	# arch_list - architecture value list (e.g., '60;70;75;80;86')
	if(NOT COMMAND cucim_define_cuda_architectures)
	function(cucim_define_cuda_architectures arch_list)
	set(arch_string "")
	# Create SASS for all architectures in the list
	foreach(arch IN LISTS arch_list)
	set(arch_string "${arch_string}" "${arch}-real")
	endforeach(arch)

	# Create PTX for the latest architecture for forward-compatibility.
	list(GET arch_list -1 latest_arch)
	foreach(arch IN LISTS arch_list)
	set(arch_string "${arch_string}" "${latest_arch}-virtual")
	endforeach(arch)
	set(CMAKE_CUDA_ARCHITECTURES ${arch_string} PARENT_SCOPE)
	endfunction()
	endif()

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[BUG] cudaErrorUnsupportedPtxVersion with cuCIM+CuPy on CUDA 11.5 #170

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

[BUG] cudaErrorUnsupportedPtxVersion with cuCIM+CuPy on CUDA 11.5 #170

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions