diff --git a/numba_cuda/numba/cuda/cudadrv/driver.py b/numba_cuda/numba/cuda/cudadrv/driver.py index c5073a7bb..2dd4cb276 100644 --- a/numba_cuda/numba/cuda/cudadrv/driver.py +++ b/numba_cuda/numba/cuda/cudadrv/driver.py @@ -29,13 +29,7 @@ import re from itertools import product from abc import ABCMeta, abstractmethod -from ctypes import ( - c_int, - byref, - c_size_t, - c_void_p, - c_uint8, -) +from ctypes import c_int, byref, c_void_p, c_uint8 import contextlib import importlib import numpy as np @@ -47,7 +41,7 @@ from numba.cuda import utils, serialize from .error import CudaSupportError, CudaDriverError from .drvapi import API_PROTOTYPES -from .drvapi import cu_occupancy_b2d_size, cu_stream_callback_pyobj +from .drvapi import cu_stream_callback_pyobj from .mappings import FILE_EXTENSION_MAP from .linkable_code import LinkableCode, LTOIR, Fatbin, Object from numba.cuda.utils import cached_file_read @@ -1179,45 +1173,10 @@ def get_max_potential_block_size( :param blocksizelimit: maximum block size the kernel is designed to handle """ - args = (func, b2d_func, memsize, blocksizelimit, flags) - return self._cuda_python_max_potential_block_size(*args) - - def _ctypes_max_potential_block_size( - self, func, b2d_func, memsize, blocksizelimit, flags - ): - gridsize = c_int() - blocksize = c_int() - b2d_cb = cu_occupancy_b2d_size(b2d_func) - args = [ - byref(gridsize), - byref(blocksize), - func.handle, - b2d_cb, - memsize, - blocksizelimit, - ] - - if not flags: - driver.cuOccupancyMaxPotentialBlockSize(*args) - else: - args.append(flags) - driver.cuOccupancyMaxPotentialBlockSizeWithFlags(*args) - - return (gridsize.value, blocksize.value) - - def _cuda_python_max_potential_block_size( - self, func, b2d_func, memsize, blocksizelimit, flags - ): - b2d_cb = ctypes.CFUNCTYPE(c_size_t, c_int)(b2d_func) - ptr = int.from_bytes(b2d_cb, byteorder="little") - driver_b2d_cb = binding.CUoccupancyB2DSize(ptr) - args = [func.handle, driver_b2d_cb, memsize, blocksizelimit] - - if not flags: - return driver.cuOccupancyMaxPotentialBlockSize(*args) - else: - args.append(flags) - return driver.cuOccupancyMaxPotentialBlockSizeWithFlags(*args) + return ( + binding.CUresult.CUDA_SUCCESS, + func.kernel.attributes.max_threads_per_block(), + ) def prepare_for_use(self): """Initialize the context for use. diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py b/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py index 5774784fd..be5aa6796 100644 --- a/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py +++ b/numba_cuda/numba/cuda/tests/cudadrv/test_cuda_driver.py @@ -263,15 +263,6 @@ def test_cuda_driver_occupancy(self): ) self.assertTrue(value > 0) - def b2d(bs): - return bs - - grid, block = self.context.get_max_potential_block_size( - function, b2d, 128, 128 - ) - self.assertTrue(grid > 0) - self.assertTrue(block > 0) - def test_cuda_cache_config(self): from numba import types import numpy as np