Skip to content

[BUG] cudaErrorUnsupportedPtxVersion with cuCIM+CuPy on CUDA 11.5 #170

@gigony

Description

@gigony

Describe the bug

From @quasiben.

I'm seeing some poor behavior of latest cucim with cupy and CEC:

In [1]: import cupy as cp

In [2]: a = cp.zeros((3, 3))

In [3]: import cucim

In [4]: a = cp.zeros((3, 3))
---------------------------------------------------------------------------
CUDARuntimeError                          Traceback (most recent call last)
<ipython-input-4-bea1f486f5af> in <module>
----> 1 a = cp.zeros((3, 3))

/datasets/bzaitlen/miniconda3/envs/cucim-2021-11-30/lib/python3.8/site-packages/cupy/_creation/basic.py in zeros(shape, dtype, order)
    207
    208     """
--> 209     a = cupy.ndarray(shape, dtype, order=order)
    210     a.data.memset_async(0, a.nbytes)
    211     return a

cupy/_core/core.pyx in cupy._core.core.ndarray.__init__()

cupy/cuda/memory.pyx in cupy.cuda.memory.alloc()

cupy/cuda/memory.pyx in cupy.cuda.memory.MemoryPool.malloc()

cupy/cuda/memory.pyx in cupy.cuda.memory.MemoryPool.malloc()

cupy/cuda/memory.pyx in cupy.cuda.memory.SingleDeviceMemoryPool.malloc()

cupy/cuda/memory.pyx in cupy.cuda.memory.SingleDeviceMemoryPool._malloc()

cupy/cuda/memory.pyx in cupy.cuda.memory.SingleDeviceMemoryPool._try_malloc()

cupy/cuda/memory.pyx in cupy.cuda.memory.SingleDeviceMemoryPool._try_malloc()

cupy/cuda/memory.pyx in cupy.cuda.memory.SingleDeviceMemoryPool._alloc()

cupy/cuda/memory.pyx in cupy.cuda.memory._malloc()

cupy/cuda/memory.pyx in cupy.cuda.memory._malloc()

cupy/cuda/memory.pyx in cupy.cuda.memory.Memory.__init__()

cupy_backends/cuda/api/runtime.pyx in cupy_backends.cuda.api.runtime.malloc()

cupy_backends/cuda/api/runtime.pyx in cupy_backends.cuda.api.runtime.check_status()

CUDARuntimeError: cudaErrorUnsupportedPtxVersion: the provided PTX was compiled with an unsupported toolchain.

Steps/Code to reproduce bug

On ampere architecture GPUs (such as GeForce RTX 3090),

mamba create -n test-cucim -c rapidsai -c conda-forge cucim cudatoolkit=11.2 cupy=9.6
conda activate test-cucim

python
>>> import cupy as cp
>>> import cucim.clara
>>> a = cp.zeros((3,3))

Expected behavior

No errors

Environment details (please complete the following information):

  • Environment location: [Bare-metal]
  • Method of cuCIM install: [conda]

Additional context

CMAKE_CUDA_ARCHITECTURES

  • # Define CMAKE_CUDA_ARCHITECTURES for the given architecture values
    #
    # Params:
    # arch_list - architecture value list (e.g., '60;70;75;80;86')
    if(NOT COMMAND cucim_define_cuda_architectures)
    function(cucim_define_cuda_architectures arch_list)
    set(arch_string "")
    # Create SASS for all architectures in the list
    foreach(arch IN LISTS arch_list)
    set(arch_string "${arch_string}" "${arch}-real")
    endforeach(arch)
    # Create PTX for the latest architecture for forward-compatibility.
    list(GET arch_list -1 latest_arch)
    foreach(arch IN LISTS arch_list)
    set(arch_string "${arch_string}" "${latest_arch}-virtual")
    endforeach(arch)
    set(CMAKE_CUDA_ARCHITECTURES ${arch_string} PARENT_SCOPE)
    endfunction()
    endif()
  • # Define CMAKE_CUDA_ARCHITECTURES for the given architecture values
    #
    # Params:
    # arch_list - architecture value list (e.g., '60;70;75;80;86')
    if(NOT COMMAND cucim_define_cuda_architectures)
    function(cucim_define_cuda_architectures arch_list)
    set(arch_string "")
    # Create SASS for all architectures in the list
    foreach(arch IN LISTS arch_list)
    set(arch_string "${arch_string}" "${arch}-real")
    endforeach(arch)
    # Create PTX for the latest architecture for forward-compatibility.
    list(GET arch_list -1 latest_arch)
    foreach(arch IN LISTS arch_list)
    set(arch_string "${arch_string}" "${latest_arch}-virtual")
    endforeach(arch)
    set(CMAKE_CUDA_ARCHITECTURES ${arch_string} PARENT_SCOPE)
    endfunction()
    endif()
  • # Define CMAKE_CUDA_ARCHITECTURES for the given architecture values
    #
    # Params:
    # arch_list - architecture value list (e.g., '60;70;75;80;86')
    if(NOT COMMAND cucim_define_cuda_architectures)
    function(cucim_define_cuda_architectures arch_list)
    set(arch_string "")
    # Create SASS for all architectures in the list
    foreach(arch IN LISTS arch_list)
    set(arch_string "${arch_string}" "${arch}-real")
    endforeach(arch)
    # Create PTX for the latest architecture for forward-compatibility.
    list(GET arch_list -1 latest_arch)
    foreach(arch IN LISTS arch_list)
    set(arch_string "${arch_string}" "${latest_arch}-virtual")
    endforeach(arch)
    set(CMAKE_CUDA_ARCHITECTURES ${arch_string} PARENT_SCOPE)
    endfunction()
    endif()
  • # Define CMAKE_CUDA_ARCHITECTURES for the given architecture values
    #
    # Params:
    # arch_list - architecture value list (e.g., '60;70;75;80;86')
    if(NOT COMMAND cucim_define_cuda_architectures)
    function(cucim_define_cuda_architectures arch_list)
    set(arch_string "")
    # Create SASS for all architectures in the list
    foreach(arch IN LISTS arch_list)
    set(arch_string "${arch_string}" "${arch}-real")
    endforeach(arch)
    # Create PTX for the latest architecture for forward-compatibility.
    list(GET arch_list -1 latest_arch)
    foreach(arch IN LISTS arch_list)
    set(arch_string "${arch_string}" "${latest_arch}-virtual")
    endforeach(arch)
    set(CMAKE_CUDA_ARCHITECTURES ${arch_string} PARENT_SCOPE)
    endfunction()
    endif()

The error is related to the use of nvcc when no CUDA kernel exists in the code.

# At least one file needs to be compiled with nvcc.
# Otherwise, it will cause `/usr/bin/ld: cannot find -lcudart` error message.
set_source_files_properties(src/cucim.cpp src/filesystem/cufile_driver.cpp PROPERTIES LANGUAGE CUDA)

Metadata

Metadata

Assignees

Labels

bugSomething isn't working

Type

No type

Projects

No projects

Milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions